def run_analysis_max_univariate_prediction(cname, cause, ename, effect, theiler_window): max_corr = float('-inf') max_corr_Etau = None for E in SWEEP_EMBEDDING_DIMENSIONS: for tau in SWEEP_DELAYS: delays = tuple(range(0, E * tau, tau)) embedding = pyembedding.Embedding(effect[:-1], delays) if embedding.delay_vector_count < embedding.embedding_dimension + 2: sys.stderr.write( ' Lmax < Lmin; skipping E={}, tau={}\n'.format(E, tau)) continue eff_off, eff_off_pred = embedding.simplex_predict_using_embedding( embedding, effect[1:], theiler_window=theiler_window) corr = numpy.corrcoef(eff_off, eff_off_pred)[0, 1] db.execute( 'CREATE TABLE IF NOT EXISTS univariate_predictions (variable, delays, correlation)' ) db.execute('INSERT INTO univariate_predictions VALUES (?,?,?)', [ename, str(delays), corr]) sys.stderr.write(' corr for E={}, tau={} : {}\n'.format( E, tau, corr)) if corr > max_corr: max_corr = corr max_corr_Etau = (E, tau) E, tau = max_corr_Etau delays = tuple(range(0, E * tau, tau)) max_corr_emb = pyembedding.Embedding(effect, delays) sys.stderr.write(' Using E = {}, tau = {}\n'.format(*max_corr_Etau)) run_analysis_for_embedding(cname, cause, ename, effect, max_corr_emb, theiler_window)
def run_analysis_max_univariate_prediction(cname, cause, ename, effect, theiler_window, db, rng, ccm_settings): if 'delta_tau_termination' in ccm_settings: delta_tau_termination = ccm_settings['delta_tau_termination'] else: delta_tau_termination = None max_corr = float('-inf') max_corr_Etau = None for E in ccm_settings['sweep_embedding_dimensions']: if delta_tau_termination is not None: max_corr_this_E = float('-inf') max_corr_tau_this_E = None for tau in (ccm_settings['sweep_delays'] if E > 1 else [1]): delays = tuple(range(0, E * tau, tau)) embedding = pyembedding.Embedding(effect[:-1], delays) if embedding.delay_vector_count < embedding.embedding_dimension + 2: sys.stderr.write( ' Lmax < Lmin; skipping E={}, tau={}\n'.format(E, tau)) continue eff_off, eff_off_pred = embedding.simplex_predict_using_embedding( embedding, effect[1:], theiler_window=theiler_window) corr = numpy.corrcoef(eff_off, eff_off_pred)[0, 1] db.execute( 'CREATE TABLE IF NOT EXISTS univariate_predictions (variable, delays, correlation)' ) db.execute('INSERT INTO univariate_predictions VALUES (?,?,?)', [ename, str(delays), corr]) sys.stderr.write(' corr for E={}, tau={} : {}\n'.format( E, tau, corr)) if corr > max_corr: max_corr = corr max_corr_Etau = (E, tau) if delta_tau_termination is not None: if corr > max_corr_this_E: max_corr_this_E = corr max_corr_tau_this_E = tau if E * (tau - max_corr_tau_this_E) >= delta_tau_termination: sys.stderr.write( '{} taus since a maximum for this E; assuming found.\n' .format(delta_tau_termination)) break E, tau = max_corr_Etau delays = tuple(range(0, E * tau, tau)) max_corr_emb = pyembedding.Embedding(effect, delays) sys.stderr.write(' Using E = {}, tau = {}\n'.format(*max_corr_Etau)) run_analysis_for_embedding(cname, cause, ename, effect, max_corr_emb, theiler_window, ccm_settings['n_ccm_bootstraps'], db, rng)
def run_analysis_surrogates(cname, cause, ename, effect, delays, theiler_window, n_bootstraps, db, rng): #db.execute('CREATE TABLE IF NOT EXISTS surrogate_corrs (cause, effect, use_effect_surrogates, correlation)'); db.execute( 'CREATE TABLE IF NOT EXISTS surrogate_test (cause, effect, use_effect_surrogates, corr_raw, corr_surr_q95, pvalue_greater)' ) db.execute( 'CREATE TABLE IF NOT EXISTS surrogate_dist (cause, effect, use_effect_surrogates, q0, q1, q2_5, q5, q25, q50, q75, q95, q97_5, q99, q100)' ) effect_raw_embedding = pyembedding.Embedding(effect, delays) corr_raw = effect_raw_embedding.ccm( effect_raw_embedding, cause, theiler_window=theiler_window)[0]['correlation'] # Run with cause surrogates only cause_mean, cause_anom = calculate_mean_anomaly(cause, 12) corrs_cause_only = [] for i in range(n_bootstraps): cause_surr = randomize_surrogate(cause_mean, cause_anom, rng) ccm_result, y_actual, y_pred = effect_raw_embedding.ccm( effect_raw_embedding, cause_surr, theiler_window=theiler_window) corrs_cause_only.append(ccm_result['correlation']) #db.execute('INSERT INTO surrogate_corrs VALUES (?,?,?,?)', [cname, ename, 0, ccm_result['correlation']]) db.execute('INSERT INTO surrogate_test VALUES (?,?,?,?,?,?)', [ cname, ename, 0, corr_raw, numpy.percentile(corrs_cause_only, 95), 1.0 - float(statutils.inverse_quantile(corrs_cause_only, corr_raw)) ]) db.execute( 'INSERT INTO surrogate_dist VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?)', [cname, ename, 0] + numpy.percentile( corrs_cause_only, [0, 1, 2.5, 5, 25, 50, 75, 95, 97.5, 99, 100]).tolist()) # Run with cause and effect surrogates effect_mean, effect_anom = calculate_mean_anomaly(effect, 12) corrs_cause_and_effect = [] for i in range(n_bootstraps): cause_surr = randomize_surrogate(cause_mean, cause_anom, rng) effect_surr = randomize_surrogate(effect_mean, effect_anom, rng) effect_surr_embedding = pyembedding.Embedding(effect_surr, delays) ccm_result, y_actual, y_pred = effect_surr_embedding.ccm( effect_surr_embedding, cause_surr, theiler_window=theiler_window) corrs_cause_and_effect.append(ccm_result['correlation']) #db.execute('INSERT INTO surrogate_corrs VALUES (?,?,?,?)', [cname, ename, 1, ccm_result['correlation']]) db.execute('INSERT INTO surrogate_test VALUES (?,?,?,?,?,?)', [ cname, ename, 1, corr_raw, numpy.percentile(corrs_cause_and_effect, 95), 1.0 - float(statutils.inverse_quantile(corrs_cause_and_effect, corr_raw)) ]) db.execute( 'INSERT INTO surrogate_dist VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?)', [cname, ename, 1] + numpy.percentile( corrs_cause_and_effect, [0, 1, 2.5, 5, 25, 50, 75, 95, 97.5, 99, 100]).tolist())
def run_analysis_max_ccm_rho(cname, cause, ename, effect, theiler_window, db, rng, ccm_settings): max_corr = float('-inf') max_corr_emb = None max_corr_Etau = None for E in ccm_settings['sweep_embedding_dimensions']: for tau in (ccm_settings['sweep_delays'] if E > 1 else [1]): delays = tuple(range(0, E * tau, tau)) embedding = pyembedding.Embedding(effect, delays=delays) if embedding.delay_vector_count < embedding.embedding_dimension + 2: sys.stderr.write( ' Lmax < Lmin; skipping E={}, tau={}\n'.format(E, tau)) continue corr = run_ccm(cname, ename, embedding, cause, theiler_window, db) sys.stderr.write(' corr for E={}, tau={} : {}\n'.format( E, tau, corr)) if corr > max_corr: max_corr = corr max_corr_emb = embedding max_corr_Etau = (E, tau) sys.stderr.write(' Using E={}, tau = {}\n'.format(*max_corr_Etau)) run_analysis_for_embedding(cname, cause, ename, effect, max_corr_emb, theiler_window, ccm_settings['n_ccm_bootstraps'], db, rng)
def run_analysis_uniform_sweep(cname, cause, ename, effect, theiler_window, db, rng, ccm_settings): for E in ccm_settings['sweep_embedding_dimensions']: for tau in (ccm_settings['sweep_delays'] if E > 1 else [1]): sys.stderr.write(' Running for E={}, tau={}\n'.format(E, tau)) delays = tuple(range(0, E*tau, tau)) embedding = pyembedding.Embedding(effect, delays=delays) if embedding.delay_vector_count < embedding.embedding_dimension + 2: sys.stderr.write(' Lmax < Lmin; skipping E={}, tau={}\n'.format(E, tau)) continue run_analysis_for_embedding(cname, cause, ename, effect, embedding, theiler_window, ccm_settings['n_ccm_bootstraps'], db, rng)
def run_analysis_uniform_sweep(cname, cause, ename, effect, theiler_window): for E in SWEEP_EMBEDDING_DIMENSIONS: for tau in (SWEEP_DELAYS if E > 1 else [1]): sys.stderr.write(' Running for E={}, tau={}\n'.format(E, tau)) delays = tuple(range(0, E * tau, tau)) embedding = pyembedding.Embedding(effect, delays=delays) if embedding.delay_vector_count < embedding.embedding_dimension + 2: sys.stderr.write( ' Lmax < Lmin; skipping E={}, tau={}\n'.format(E, tau)) continue run_analysis_for_embedding(cname, cause, ename, effect, embedding, theiler_window)
def run_analysis_max_ccm_rho(cname, cause, ename, effect, theiler_window, db, rng, ccm_settings): if 'delta_tau_termination' in ccm_settings: delta_tau_termination = ccm_settings['delta_tau_termination'] else: delta_tau_termination = None max_corr = float('-inf') max_corr_emb = None max_corr_Etau = None for E in ccm_settings['sweep_embedding_dimensions']: if delta_tau_termination is not None: max_corr_this_E = float('-inf') max_corr_tau_this_E = None for tau in (ccm_settings['sweep_delays'] if E > 1 else [1]): delays = tuple(range(0, E * tau, tau)) embedding = pyembedding.Embedding(effect, delays=delays) if embedding.delay_vector_count < embedding.embedding_dimension + 2: sys.stderr.write( ' Lmax < Lmin; skipping E={}, tau={}\n'.format(E, tau)) continue corr = run_ccm(cname, ename, embedding, cause, theiler_window, db) sys.stderr.write(' corr for E={}, tau={} : {}\n'.format( E, tau, corr)) if corr > max_corr: max_corr = corr max_corr_emb = embedding max_corr_Etau = (E, tau) if delta_tau_termination is not None: if corr > max_corr_this_E: max_corr_this_E = corr max_corr_tau_this_E = tau if E * (tau - max_corr_tau_this_E) >= delta_tau_termination: sys.stderr.write( '{} taus since a maximum for this E; assuming found.\n' .format(delta_tau_termination)) break sys.stderr.write(' Using E={}, tau = {}\n'.format(*max_corr_Etau)) run_analysis_for_embedding(cname, cause, ename, effect, max_corr_emb, theiler_window, ccm_settings['n_ccm_bootstraps'], db, rng)
def run_analysis_max_ccm_rho(cname, cause, ename, effect, theiler_window): max_corr = float('-inf') max_corr_emb = None max_corr_Etau = None for E in SWEEP_EMBEDDING_DIMENSIONS: for tau in (SWEEP_DELAYS if E > 1 else [1]): delays = tuple(range(0, E * tau, tau)) embedding = pyembedding.Embedding(effect, delays=delays) if embedding.delay_vector_count < embedding.embedding_dimension + 2: sys.stderr.write( ' Lmax < Lmin; skipping E={}, tau={}\n'.format(E, tau)) continue corr = run_ccm(cname, ename, embedding, cause, theiler_window) sys.stderr.write(' corr for E={}, tau={} : {}\n'.format( E, tau, corr)) if corr > max_corr: max_corr = corr max_corr_emb = embedding max_corr_Etau = (E, tau) sys.stderr.write(' Using E={}, tau = {}\n'.format(*max_corr_Etau)) run_analysis_for_embedding(cname, cause, ename, effect, max_corr_emb, theiler_window)
def run_analysis_max_univariate_prediction_plus_lags(cname, cause, ename, effect, theiler_window, db, rng, ccm_settings): if 'delta_tau_termination' in ccm_settings: delta_tau_termination = ccm_settings['delta_tau_termination'] else: delta_tau_termination = None max_corr = float('-inf') max_corr_Etau = None for E in ccm_settings['sweep_embedding_dimensions']: if delta_tau_termination is not None: max_corr_this_E = float('-inf') max_corr_tau_this_E = None for tau in (ccm_settings['sweep_delays'] if E > 1 else [1]): delays = tuple(range(0, E * tau, tau)) embedding = pyembedding.Embedding(effect[:-1], delays) if embedding.delay_vector_count < embedding.embedding_dimension + 2: sys.stderr.write( ' Lmax < Lmin; skipping E={}, tau={}\n'.format(E, tau)) continue eff_off, eff_off_pred = embedding.simplex_predict_using_embedding( embedding, effect[1:], theiler_window=theiler_window) corr = numpy.corrcoef(eff_off, eff_off_pred)[0, 1] db.execute( 'CREATE TABLE IF NOT EXISTS univariate_predictions (variable, delays, correlation)' ) db.execute('INSERT INTO univariate_predictions VALUES (?,?,?)', [ename, str(delays), corr]) sys.stderr.write(' corr for E={}, tau={} : {}\n'.format( E, tau, corr)) if corr > max_corr: max_corr = corr max_corr_Etau = (E, tau) if delta_tau_termination is not None: if corr > max_corr_this_E: max_corr_this_E = corr max_corr_tau_this_E = tau if E * (tau - max_corr_tau_this_E) >= delta_tau_termination: sys.stderr.write( '{} taus since a maximum for this E; assuming found.\n' .format(delta_tau_termination)) break E, tau = max_corr_Etau max_ccm_lag = ccm_settings['max_ccm_lag'] best_lag_neg = None best_lag_neg_corr_med = None best_lag_neg_corrs = None best_lag_neg_pvalue_increase = None best_lag_neg_pvalue_positive = None best_lag_pos = None best_lag_pos_corr_med = None best_lag_pos_corrs = None best_lag_pos_pvalue_increase = None best_lag_pos_pvalue_positive = None zero_corrs = None zero_corr_med = None zero_pvalue_increase = None zero_pvalue_positive = None for lag in range(-max_ccm_lag, max_ccm_lag + 1): sys.stderr.write(' Using E = {}, tau = {}, lag = {}\n'.format( E, tau, lag)) delays = tuple(range(lag, lag + E * tau, tau)) max_corr_emb = pyembedding.Embedding(effect, delays) corrs, pvalue_increase, pvalue_positive = run_analysis_for_embedding( cname, cause, ename, effect, max_corr_emb, theiler_window, ccm_settings['n_ccm_bootstraps'], db, rng) corr_med = numpy.median(corrs) if lag == 0: zero_corr_med = corr_med zero_corrs = corrs zero_pvalue_increase = pvalue_increase zero_pvalue_positive = pvalue_positive elif lag < 0 and (best_lag_neg is None or corr_med > best_lag_neg_corr_med): best_lag_neg = lag best_lag_neg_corr_med = corr_med best_lag_neg_corrs = corrs best_lag_neg_pvalue_increase = pvalue_increase best_lag_neg_pvalue_positive = pvalue_positive elif lag > 0 and (best_lag_pos is None or corr_med > best_lag_pos_corr_med): best_lag_pos = lag best_lag_pos_corr_med = corr_med best_lag_pos_corrs = corrs best_lag_pos_pvalue_increase = pvalue_increase best_lag_pos_pvalue_positive = pvalue_positive # Get the best negative-or-zero lag if best_lag_neg_corr_med > zero_corr_med: best_lag_nonpos = best_lag_neg best_lag_nonpos_corrs = best_lag_neg_corrs best_lag_nonpos_corr_med = best_lag_neg_corr_med best_lag_nonpos_pvalue_increase = best_lag_neg_pvalue_increase best_lag_nonpos_pvalue_positive = best_lag_neg_pvalue_positive else: best_lag_nonpos = 0 best_lag_nonpos_corrs = zero_corrs best_lag_nonpos_corr_med = zero_corr_med best_lag_nonpos_pvalue_increase = zero_pvalue_increase best_lag_nonpos_pvalue_positive = zero_pvalue_positive # Get the best positive-or-zero lag if best_lag_pos_corr_med > zero_corr_med: best_lag_nonneg = best_lag_pos best_lag_nonneg_corrs = best_lag_pos_corrs best_lag_nonneg_corr_med = best_lag_pos_corr_med best_lag_nonneg_pvalue_increase = best_lag_pos_pvalue_increase best_lag_nonneg_pvalue_positive = best_lag_pos_pvalue_positive else: best_lag_nonneg = 0 best_lag_nonneg_corrs = zero_corrs best_lag_nonneg_corr_med = zero_corr_med best_lag_nonneg_pvalue_increase = zero_pvalue_increase best_lag_nonneg_pvalue_positive = zero_pvalue_positive # Test if negative is better than nonnegative pvalue_neg_best = 1.0 - numpy.mean( statutils.inverse_quantile(best_lag_nonneg_corrs, best_lag_neg_corrs)) # Test if nonpositive is better than positive pvalue_nonpos_best = 1.0 - numpy.mean( statutils.inverse_quantile(best_lag_pos_corrs, best_lag_nonpos_corrs)) db.execute('''CREATE TABLE IF NOT EXISTS ccm_lag_tests ( cause, effect, neg_lag, neg_corr_med, neg_pvalue_positive, neg_pvalue_increase, neg_pvalue_best, nonpos_lag, nonpos_corr_med, nonpos_pvalue_positive, nonpos_pvalue_increase, nonpos_pvalue_best, pos_lag, pos_corr_med, pos_pvalue_positive, pos_pvalue_increase, nonneg_lag, nonneg_corr_med, nonneg_pvalue_positive, nonneg_pvalue_increase )''') db.execute( 'INSERT INTO ccm_lag_tests VALUES (?,?, ?,?,?,?,?, ?,?,?,?,?, ?,?,?,?, ?,?,?,?)', [ cname, ename, best_lag_neg, best_lag_neg_corr_med, best_lag_neg_pvalue_positive, best_lag_neg_pvalue_increase, pvalue_neg_best, best_lag_nonpos, best_lag_nonpos_corr_med, best_lag_nonpos_pvalue_positive, best_lag_nonpos_pvalue_increase, pvalue_nonpos_best, best_lag_pos, best_lag_pos_corr_med, best_lag_pos_pvalue_positive, best_lag_pos_pvalue_increase, best_lag_nonneg, best_lag_nonneg_corr_med, best_lag_nonneg_pvalue_positive, best_lag_nonneg_pvalue_increase ])
def main(): args = parse_arguments() E = args.embedding_dimension Emin = args.min_embedding_dimension Emax = args.max_embedding_dimension tau = args.embedding_lag taumin = args.min_embedding_lag taumax = args.max_embedding_lag dt = args.temporal_separation max_lag = args.max_cross_map_lag lag_skip = args.cross_map_lag_skip n_bootstraps = args.bootstraps cores = args.cores # Check input & output files if args.input_filename == args.output_filename: parser.exit('Input filename and output filename cannot be the same.') if os.path.exists(args.output_filename): if args.overwrite_output: os.remove(args.output_filename) else: sys.stderr.write( 'Output filename {0} exists. Delete it or use --overwrite-output.\n' .format(args.output_filename)) sys.exit(1) # Load input data data = load_data(args.input_filename, args.variable, args.table) # Set up output data db = sqlite3.connect(args.output_filename) if args.identification_target == 'self': assert args.method == 'uniform' if E is None: Etau_dict = identify_Etau(db, data, Emin, Emax, taumin, taumax, dt, cores) else: Etau_dict = OrderedDict([(var_name, (E, tau)) for var_name in data.keys()]) else: assert args.identification_target == 'cross' assert args.method == 'projection' for cname, cause in data.iteritems(): for ename, effect in data.iteritems(): if cname != ename: if args.method == 'uniform': E, tau = Etau_dict[ename] emb = pyembedding.Embedding(effect, range(0, E * tau, tau)) if E is None: sys.stderr.write( 'Skipping {} as effect\n'.format(ename)) continue elif args.method == 'projection': emb = identify_projection_cross(db, cause, effect, dt) sys.stderr.write('Running {}, {}\n'.format(cname, ename)) run_analysis(db, cname, cause, ename, effect, emb, dt, max_lag, lag_skip, n_bootstraps, cores)