def run_analysis_for_embedding(cname, cause, ename, effect, embedding, theiler_window, n_bootstraps, db, rng): # min library size: embedding_dimension + 2, # so vectors should usually have embedding_dimension + 1 neighbors available Lmin = embedding.embedding_dimension + 2 # max library size: just the number of available delay vectors Lmax = embedding.delay_vector_count assert Lmax > Lmin sys.stderr.write(' Using Lmin = {}, Lmax = {}\n'.format(Lmin, Lmax)) corrs_Lmin = run_ccm_bootstraps(cname, ename, embedding, cause, Lmin, theiler_window, n_bootstraps, db, rng) corrs_Lmax = run_ccm_bootstraps(cname, ename, embedding, cause, Lmax, theiler_window, n_bootstraps, db, rng) pvalue_increase = 1.0 - numpy.mean( statutils.inverse_quantile(corrs_Lmin, corrs_Lmax)) pvalue_positive = statutils.inverse_quantile(corrs_Lmax, 0.0).tolist() db.execute( 'CREATE TABLE IF NOT EXISTS ccm_increase (cause, effect, Lmin, Lmax, delays, pvalue_increase)' ) db.execute( 'INSERT INTO ccm_increase VALUES (?,?,?,?,?,?)', [cname, ename, Lmin, Lmax, str(embedding.delays), pvalue_increase]) db.commit() return corrs_Lmax, pvalue_increase, pvalue_positive
def run_analysis_for_embedding(cname, cause, ename, effect, embedding, theiler_window, n_bootstraps, db, rng): # min library size: embedding_dimension + 2, # so vectors should usually have embedding_dimension + 1 neighbors available Lmin = embedding.embedding_dimension + 2 # max library size: just the number of available delay vectors Lmax = embedding.delay_vector_count assert Lmax > Lmin sys.stderr.write(' Using Lmin = {}, Lmax = {}\n'.format(Lmin, Lmax)) corrs_Lmin = run_ccm_bootstraps(cname, ename, embedding, cause, Lmin, theiler_window, n_bootstraps, db, rng) corrs_Lmax = run_ccm_bootstraps(cname, ename, embedding, cause, Lmax, theiler_window, n_bootstraps, db, rng) pvalue_increase = 1.0 - numpy.mean(statutils.inverse_quantile(corrs_Lmin, corrs_Lmax)) pvalue_positive = statutils.inverse_quantile(corrs_Lmax, 0.0).tolist() db.execute( 'CREATE TABLE IF NOT EXISTS ccm_increase (cause, effect, Lmin, Lmax, delays, pvalue_increase)' ) db.execute( 'INSERT INTO ccm_increase VALUES (?,?,?,?,?,?)', [cname, ename, Lmin, Lmax, str(embedding.delays), pvalue_increase] ) db.commit() return corrs_Lmax, pvalue_increase, pvalue_positive
def run_analysis_surrogates(cname, cause, ename, effect, delays, theiler_window, n_bootstraps, db, rng): #db.execute('CREATE TABLE IF NOT EXISTS surrogate_corrs (cause, effect, use_effect_surrogates, correlation)'); db.execute( 'CREATE TABLE IF NOT EXISTS surrogate_test (cause, effect, use_effect_surrogates, corr_raw, corr_surr_q95, pvalue_greater)' ) db.execute( 'CREATE TABLE IF NOT EXISTS surrogate_dist (cause, effect, use_effect_surrogates, q0, q1, q2_5, q5, q25, q50, q75, q95, q97_5, q99, q100)' ) effect_raw_embedding = pyembedding.Embedding(effect, delays) corr_raw = effect_raw_embedding.ccm( effect_raw_embedding, cause, theiler_window=theiler_window)[0]['correlation'] # Run with cause surrogates only cause_mean, cause_anom = calculate_mean_anomaly(cause, 12) corrs_cause_only = [] for i in range(n_bootstraps): cause_surr = randomize_surrogate(cause_mean, cause_anom, rng) ccm_result, y_actual, y_pred = effect_raw_embedding.ccm( effect_raw_embedding, cause_surr, theiler_window=theiler_window) corrs_cause_only.append(ccm_result['correlation']) #db.execute('INSERT INTO surrogate_corrs VALUES (?,?,?,?)', [cname, ename, 0, ccm_result['correlation']]) db.execute('INSERT INTO surrogate_test VALUES (?,?,?,?,?,?)', [ cname, ename, 0, corr_raw, numpy.percentile(corrs_cause_only, 95), 1.0 - float(statutils.inverse_quantile(corrs_cause_only, corr_raw)) ]) db.execute( 'INSERT INTO surrogate_dist VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?)', [cname, ename, 0] + numpy.percentile( corrs_cause_only, [0, 1, 2.5, 5, 25, 50, 75, 95, 97.5, 99, 100]).tolist()) # Run with cause and effect surrogates effect_mean, effect_anom = calculate_mean_anomaly(effect, 12) corrs_cause_and_effect = [] for i in range(n_bootstraps): cause_surr = randomize_surrogate(cause_mean, cause_anom, rng) effect_surr = randomize_surrogate(effect_mean, effect_anom, rng) effect_surr_embedding = pyembedding.Embedding(effect_surr, delays) ccm_result, y_actual, y_pred = effect_surr_embedding.ccm( effect_surr_embedding, cause_surr, theiler_window=theiler_window) corrs_cause_and_effect.append(ccm_result['correlation']) #db.execute('INSERT INTO surrogate_corrs VALUES (?,?,?,?)', [cname, ename, 1, ccm_result['correlation']]) db.execute('INSERT INTO surrogate_test VALUES (?,?,?,?,?,?)', [ cname, ename, 1, corr_raw, numpy.percentile(corrs_cause_and_effect, 95), 1.0 - float(statutils.inverse_quantile(corrs_cause_and_effect, corr_raw)) ]) db.execute( 'INSERT INTO surrogate_dist VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?)', [cname, ename, 1] + numpy.percentile( corrs_cause_and_effect, [0, 1, 2.5, 5, 25, 50, 75, 95, 97.5, 99, 100]).tolist())
def run_ccm_bootstraps(cname, ename, embedding, cause, L, theiler_window, n_bootstraps, db, rng): assert isinstance(embedding, pyembedding.Embedding) corrs = [] for i in range(n_bootstraps): sampled_embedding = embedding.sample_embedding(L, replace=True, rng=rng) ccm_result, y_actual, y_pred = sampled_embedding.ccm(embedding, cause, theiler_window=theiler_window) corrs.append(ccm_result['correlation']) corrs = numpy.array(corrs) pvalue_positive = statutils.inverse_quantile(corrs, 0.0).tolist() db.execute('CREATE TABLE IF NOT EXISTS ccm_correlation_dist (cause, effect, L, delays, mean, sd, pvalue_positive, q0, q1, q2_5, q5, q25, q50, q75, q95, q97_5, q99, q100)') db.execute( 'INSERT INTO ccm_correlation_dist VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)', [cname, ename, L, str(embedding.delays), corrs.mean(), corrs.std(), pvalue_positive] + [x for x in numpy.percentile(corrs, [0, 1, 2.5, 5, 25, 50, 75, 95, 97.5, 99, 100])] ) db.execute('CREATE TABLE IF NOT EXISTS ccm_correlations (cause, effect, L, delays, correlation)') for corr in corrs: db.execute( 'INSERT INTO ccm_correlations VALUES (?,?,?,?,?)', [cname, ename, L, str(embedding.delays), corr] ) return numpy.array(corrs)
def run_ccm_bootstraps(cname, ename, embedding, cause, L, theiler_window, n_bootstraps, db, rng): assert isinstance(embedding, projection.ProjectionEmbedding) corrs = [] for i in range(n_bootstraps): sampled_embedding = embedding.sample_embedding(L, replace=True, rng=rng) ccm_result, y_actual, y_pred = sampled_embedding.ccm(embedding, cause, theiler_window=theiler_window) corrs.append(ccm_result['correlation']) corrs = numpy.array(corrs) db.execute('CREATE TABLE IF NOT EXISTS ccm_correlation_dist (cause, effect, L, d, dmax, mean, sd, pvalue_positive, q0, q1, q2_5, q5, q25, q50, q75, q95, q97_5, q99, q100)') db.execute( 'INSERT INTO ccm_correlation_dist VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)', [cname, ename, L, embedding.d, embedding.dmax, corrs.mean(), corrs.std(), statutils.inverse_quantile(corrs, 0.0).tolist()] + [x for x in numpy.percentile(corrs, [0, 1, 2.5, 5, 25, 50, 75, 95, 97.5, 99, 100])] ) db.execute('CREATE TABLE IF NOT EXISTS ccm_correlations (cause, effect, L, d, dmax, correlation)') for corr in corrs: db.execute( 'INSERT INTO ccm_correlations VALUES (?,?,?,?,?,?)', [cname, ename, L, embedding.d, embedding.dmax, corr] ) return numpy.array(corrs)
def run_analysis_surrogates(cname, cause, ename, effect, delays, theiler_window, n_bootstraps, db, rng): #db.execute('CREATE TABLE IF NOT EXISTS surrogate_corrs (cause, effect, use_effect_surrogates, correlation)'); db.execute('CREATE TABLE IF NOT EXISTS surrogate_test (cause, effect, use_effect_surrogates, corr_raw, corr_surr_q95, pvalue_greater)') db.execute('CREATE TABLE IF NOT EXISTS surrogate_dist (cause, effect, use_effect_surrogates, q0, q1, q2_5, q5, q25, q50, q75, q95, q97_5, q99, q100)') effect_raw_embedding = pyembedding.Embedding(effect, delays) corr_raw = effect_raw_embedding.ccm(effect_raw_embedding, cause, theiler_window=theiler_window)[0]['correlation'] # Run with cause surrogates only cause_mean, cause_anom = calculate_mean_anomaly(cause, 12) corrs_cause_only = [] for i in range(n_bootstraps): cause_surr = randomize_surrogate(cause_mean, cause_anom, rng) ccm_result, y_actual, y_pred = effect_raw_embedding.ccm(effect_raw_embedding, cause_surr, theiler_window=theiler_window) corrs_cause_only.append(ccm_result['correlation']) #db.execute('INSERT INTO surrogate_corrs VALUES (?,?,?,?)', [cname, ename, 0, ccm_result['correlation']]) db.execute('INSERT INTO surrogate_test VALUES (?,?,?,?,?,?)', [cname, ename, 0, corr_raw, numpy.percentile(corrs_cause_only, 95), 1.0 - float(statutils.inverse_quantile(corrs_cause_only, corr_raw)) ]) db.execute('INSERT INTO surrogate_dist VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?)', [cname, ename, 0] + numpy.percentile(corrs_cause_only, [0, 1, 2.5, 5, 25, 50, 75, 95, 97.5, 99, 100]).tolist() ) # Run with cause and effect surrogates effect_mean, effect_anom = calculate_mean_anomaly(effect, 12) corrs_cause_and_effect = [] for i in range(n_bootstraps): cause_surr = randomize_surrogate(cause_mean, cause_anom, rng) effect_surr = randomize_surrogate(effect_mean, effect_anom, rng) effect_surr_embedding = pyembedding.Embedding(effect_surr, delays) ccm_result, y_actual, y_pred = effect_surr_embedding.ccm(effect_surr_embedding, cause_surr, theiler_window=theiler_window) corrs_cause_and_effect.append(ccm_result['correlation']) #db.execute('INSERT INTO surrogate_corrs VALUES (?,?,?,?)', [cname, ename, 1, ccm_result['correlation']]) db.execute('INSERT INTO surrogate_test VALUES (?,?,?,?,?,?)', [cname, ename, 1, corr_raw, numpy.percentile(corrs_cause_and_effect, 95), 1.0 - float(statutils.inverse_quantile(corrs_cause_and_effect, corr_raw)) ]) db.execute('INSERT INTO surrogate_dist VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?)', [cname, ename, 1] + numpy.percentile(corrs_cause_and_effect, [0, 1, 2.5, 5, 25, 50, 75, 95, 97.5, 99, 100]).tolist() )
def run_analysis_max_univariate_prediction_plus_lags(cname, cause, ename, effect, theiler_window, db, rng, ccm_settings): if 'delta_tau_termination' in ccm_settings: delta_tau_termination = ccm_settings['delta_tau_termination'] else: delta_tau_termination = None max_corr = float('-inf') max_corr_Etau = None for E in ccm_settings['sweep_embedding_dimensions']: if delta_tau_termination is not None: max_corr_this_E = float('-inf') max_corr_tau_this_E = None for tau in (ccm_settings['sweep_delays'] if E > 1 else [1]): delays = tuple(range(0, E * tau, tau)) embedding = pyembedding.Embedding(effect[:-1], delays) if embedding.delay_vector_count < embedding.embedding_dimension + 2: sys.stderr.write( ' Lmax < Lmin; skipping E={}, tau={}\n'.format(E, tau)) continue eff_off, eff_off_pred = embedding.simplex_predict_using_embedding( embedding, effect[1:], theiler_window=theiler_window) corr = numpy.corrcoef(eff_off, eff_off_pred)[0, 1] db.execute( 'CREATE TABLE IF NOT EXISTS univariate_predictions (variable, delays, correlation)' ) db.execute('INSERT INTO univariate_predictions VALUES (?,?,?)', [ename, str(delays), corr]) sys.stderr.write(' corr for E={}, tau={} : {}\n'.format( E, tau, corr)) if corr > max_corr: max_corr = corr max_corr_Etau = (E, tau) if delta_tau_termination is not None: if corr > max_corr_this_E: max_corr_this_E = corr max_corr_tau_this_E = tau if E * (tau - max_corr_tau_this_E) >= delta_tau_termination: sys.stderr.write( '{} taus since a maximum for this E; assuming found.\n' .format(delta_tau_termination)) break E, tau = max_corr_Etau max_ccm_lag = ccm_settings['max_ccm_lag'] best_lag_neg = None best_lag_neg_corr_med = None best_lag_neg_corrs = None best_lag_neg_pvalue_increase = None best_lag_neg_pvalue_positive = None best_lag_pos = None best_lag_pos_corr_med = None best_lag_pos_corrs = None best_lag_pos_pvalue_increase = None best_lag_pos_pvalue_positive = None zero_corrs = None zero_corr_med = None zero_pvalue_increase = None zero_pvalue_positive = None for lag in range(-max_ccm_lag, max_ccm_lag + 1): sys.stderr.write(' Using E = {}, tau = {}, lag = {}\n'.format( E, tau, lag)) delays = tuple(range(lag, lag + E * tau, tau)) max_corr_emb = pyembedding.Embedding(effect, delays) corrs, pvalue_increase, pvalue_positive = run_analysis_for_embedding( cname, cause, ename, effect, max_corr_emb, theiler_window, ccm_settings['n_ccm_bootstraps'], db, rng) corr_med = numpy.median(corrs) if lag == 0: zero_corr_med = corr_med zero_corrs = corrs zero_pvalue_increase = pvalue_increase zero_pvalue_positive = pvalue_positive elif lag < 0 and (best_lag_neg is None or corr_med > best_lag_neg_corr_med): best_lag_neg = lag best_lag_neg_corr_med = corr_med best_lag_neg_corrs = corrs best_lag_neg_pvalue_increase = pvalue_increase best_lag_neg_pvalue_positive = pvalue_positive elif lag > 0 and (best_lag_pos is None or corr_med > best_lag_pos_corr_med): best_lag_pos = lag best_lag_pos_corr_med = corr_med best_lag_pos_corrs = corrs best_lag_pos_pvalue_increase = pvalue_increase best_lag_pos_pvalue_positive = pvalue_positive # Get the best negative-or-zero lag if best_lag_neg_corr_med > zero_corr_med: best_lag_nonpos = best_lag_neg best_lag_nonpos_corrs = best_lag_neg_corrs best_lag_nonpos_corr_med = best_lag_neg_corr_med best_lag_nonpos_pvalue_increase = best_lag_neg_pvalue_increase best_lag_nonpos_pvalue_positive = best_lag_neg_pvalue_positive else: best_lag_nonpos = 0 best_lag_nonpos_corrs = zero_corrs best_lag_nonpos_corr_med = zero_corr_med best_lag_nonpos_pvalue_increase = zero_pvalue_increase best_lag_nonpos_pvalue_positive = zero_pvalue_positive # Get the best positive-or-zero lag if best_lag_pos_corr_med > zero_corr_med: best_lag_nonneg = best_lag_pos best_lag_nonneg_corrs = best_lag_pos_corrs best_lag_nonneg_corr_med = best_lag_pos_corr_med best_lag_nonneg_pvalue_increase = best_lag_pos_pvalue_increase best_lag_nonneg_pvalue_positive = best_lag_pos_pvalue_positive else: best_lag_nonneg = 0 best_lag_nonneg_corrs = zero_corrs best_lag_nonneg_corr_med = zero_corr_med best_lag_nonneg_pvalue_increase = zero_pvalue_increase best_lag_nonneg_pvalue_positive = zero_pvalue_positive # Test if negative is better than nonnegative pvalue_neg_best = 1.0 - numpy.mean( statutils.inverse_quantile(best_lag_nonneg_corrs, best_lag_neg_corrs)) # Test if nonpositive is better than positive pvalue_nonpos_best = 1.0 - numpy.mean( statutils.inverse_quantile(best_lag_pos_corrs, best_lag_nonpos_corrs)) db.execute('''CREATE TABLE IF NOT EXISTS ccm_lag_tests ( cause, effect, neg_lag, neg_corr_med, neg_pvalue_positive, neg_pvalue_increase, neg_pvalue_best, nonpos_lag, nonpos_corr_med, nonpos_pvalue_positive, nonpos_pvalue_increase, nonpos_pvalue_best, pos_lag, pos_corr_med, pos_pvalue_positive, pos_pvalue_increase, nonneg_lag, nonneg_corr_med, nonneg_pvalue_positive, nonneg_pvalue_increase )''') db.execute( 'INSERT INTO ccm_lag_tests VALUES (?,?, ?,?,?,?,?, ?,?,?,?,?, ?,?,?,?, ?,?,?,?)', [ cname, ename, best_lag_neg, best_lag_neg_corr_med, best_lag_neg_pvalue_positive, best_lag_neg_pvalue_increase, pvalue_neg_best, best_lag_nonpos, best_lag_nonpos_corr_med, best_lag_nonpos_pvalue_positive, best_lag_nonpos_pvalue_increase, pvalue_nonpos_best, best_lag_pos, best_lag_pos_corr_med, best_lag_pos_pvalue_positive, best_lag_pos_pvalue_increase, best_lag_nonneg, best_lag_nonneg_corr_med, best_lag_nonneg_pvalue_positive, best_lag_nonneg_pvalue_increase ])
def run_analysis_max_univariate_prediction_plus_lags(cname, cause, ename, effect, theiler_window, db, rng): delta_tau_termination = None max_corr = float('-inf') max_corr_Etau = None for E in range(1, 11): if delta_tau_termination is not None: max_corr_this_E = float('-inf') max_corr_tau_this_E = None for tau in (range(1, 61) if E > 1 else [1]): delays = tuple(range(0, E*tau, tau)) embedding = pyembedding.Embedding(effect[:-1], delays) if embedding.delay_vector_count < embedding.embedding_dimension + 2: sys.stderr.write(' Lmax < Lmin; skipping E={}, tau={}\n'.format(E, tau)) continue eff_off, eff_off_pred = embedding.simplex_predict_using_embedding(embedding, effect[1:], theiler_window=theiler_window) corr = numpy.corrcoef(eff_off, eff_off_pred)[0,1] db.execute('CREATE TABLE IF NOT EXISTS univariate_predictions (variable, delays, correlation)') db.execute('INSERT INTO univariate_predictions VALUES (?,?,?)', [ename, str(delays), corr]) sys.stderr.write(' corr for E={}, tau={} : {}\n'.format(E, tau, corr)) if corr > max_corr: max_corr = corr max_corr_Etau = (E, tau) if delta_tau_termination is not None: if corr > max_corr_this_E: max_corr_this_E = corr max_corr_tau_this_E = tau if E * (tau - max_corr_tau_this_E) >= delta_tau_termination: sys.stderr.write('{} taus since a maximum for this E; assuming found.\n'.format(delta_tau_termination)) break E, tau = max_corr_Etau max_ccm_lag = 60 best_lag_neg = None best_lag_neg_corr_med = None best_lag_neg_corrs = None best_lag_neg_pvalue_increase = None best_lag_neg_pvalue_positive = None best_lag_pos = None best_lag_pos_corr_med = None best_lag_pos_corrs = None best_lag_pos_pvalue_increase = None best_lag_pos_pvalue_positive = None zero_corrs = None zero_corr_med = None zero_pvalue_increase = None zero_pvalue_positive = None for lag in range(-max_ccm_lag, max_ccm_lag + 1): sys.stderr.write(' Using E = {}, tau = {}, lag = {}\n'.format(E, tau, lag)) delays = tuple(range(lag, lag + E*tau, tau)) max_corr_emb = pyembedding.Embedding(effect, delays) corrs, pvalue_increase, pvalue_positive = run_analysis_for_embedding( cname, cause, ename, effect, max_corr_emb, theiler_window, 100, db, rng ) corr_med = numpy.median(corrs) if lag == 0: zero_corr_med = corr_med zero_corrs = corrs zero_pvalue_increase = pvalue_increase zero_pvalue_positive = pvalue_positive elif lag < 0 and (best_lag_neg is None or corr_med > best_lag_neg_corr_med): best_lag_neg = lag best_lag_neg_corr_med = corr_med best_lag_neg_corrs = corrs best_lag_neg_pvalue_increase = pvalue_increase best_lag_neg_pvalue_positive = pvalue_positive elif lag > 0 and (best_lag_pos is None or corr_med > best_lag_pos_corr_med): best_lag_pos = lag best_lag_pos_corr_med = corr_med best_lag_pos_corrs = corrs best_lag_pos_pvalue_increase = pvalue_increase best_lag_pos_pvalue_positive = pvalue_positive # Get the best negative-or-zero lag if best_lag_neg_corr_med > zero_corr_med: best_lag_nonpos = best_lag_neg best_lag_nonpos_corrs = best_lag_neg_corrs best_lag_nonpos_corr_med = best_lag_neg_corr_med best_lag_nonpos_pvalue_increase = best_lag_neg_pvalue_increase best_lag_nonpos_pvalue_positive = best_lag_neg_pvalue_positive else: best_lag_nonpos = 0 best_lag_nonpos_corrs = zero_corrs best_lag_nonpos_corr_med = zero_corr_med best_lag_nonpos_pvalue_increase = zero_pvalue_increase best_lag_nonpos_pvalue_positive = zero_pvalue_positive # Get the best positive-or-zero lag if best_lag_pos_corr_med > zero_corr_med: best_lag_nonneg = best_lag_pos best_lag_nonneg_corrs = best_lag_pos_corrs best_lag_nonneg_corr_med = best_lag_pos_corr_med best_lag_nonneg_pvalue_increase = best_lag_pos_pvalue_increase best_lag_nonneg_pvalue_positive = best_lag_pos_pvalue_positive else: best_lag_nonneg = 0 best_lag_nonneg_corrs = zero_corrs best_lag_nonneg_corr_med = zero_corr_med best_lag_nonneg_pvalue_increase = zero_pvalue_increase best_lag_nonneg_pvalue_positive = zero_pvalue_positive # Test if negative is better than nonnegative pvalue_neg_best = 1.0 - numpy.mean(statutils.inverse_quantile(best_lag_nonneg_corrs, best_lag_neg_corrs)) # Test if nonpositive is better than positive pvalue_nonpos_best = 1.0 - numpy.mean(statutils.inverse_quantile(best_lag_pos_corrs, best_lag_nonpos_corrs)) db.execute('''CREATE TABLE IF NOT EXISTS ccm_lag_tests ( cause, effect, neg_lag, neg_corr_med, neg_pvalue_positive, neg_pvalue_increase, neg_pvalue_best, nonpos_lag, nonpos_corr_med, nonpos_pvalue_positive, nonpos_pvalue_increase, nonpos_pvalue_best, pos_lag, pos_corr_med, pos_pvalue_positive, pos_pvalue_increase, nonneg_lag, nonneg_corr_med, nonneg_pvalue_positive, nonneg_pvalue_increase )''') db.execute( 'INSERT INTO ccm_lag_tests VALUES (?,?, ?,?,?,?,?, ?,?,?,?,?, ?,?,?,?, ?,?,?,?)', [ cname, ename, best_lag_neg, best_lag_neg_corr_med, best_lag_neg_pvalue_positive, best_lag_neg_pvalue_increase, pvalue_neg_best, best_lag_nonpos, best_lag_nonpos_corr_med, best_lag_nonpos_pvalue_positive, best_lag_nonpos_pvalue_increase, pvalue_nonpos_best, best_lag_pos, best_lag_pos_corr_med, best_lag_pos_pvalue_positive, best_lag_pos_pvalue_increase, best_lag_nonneg, best_lag_nonneg_corr_med, best_lag_nonneg_pvalue_positive, best_lag_nonneg_pvalue_increase ] )
def run_analysis(db, cname, cause, ename, effect, emb, dt, max_lag, lag_skip, n_bootstraps, cores): # # L = args.library_size # assert L is None pool = multiprocessing.Pool(cores) lag_range = range(-max_lag, -lag_skip + 1, lag_skip) + range( 0, max_lag + 1, lag_skip) args = [(cause, effect, emb, dt, lag, n_bootstraps) for lag in lag_range] results = pyembedding.pool_map(run_analysis_mappable, args, cores) # results = map(run_analysis_mappable, args) db.execute( 'CREATE TABLE IF NOT EXISTS correlations (cause, effect, lag, L, correlation)' ) db.execute( 'CREATE TABLE IF NOT EXISTS tests (cause, effect, lag, Lmin, Lmax, pval_positive, pval_increase)' ) db.execute( 'CREATE TABLE IF NOT EXISTS lagtests (cause, effect, best_lag, pval_positive, pval_increase, pval_neg_best, pval_nonpos_best)' ) best_lag_neg = None best_lag_neg_corr_med = None best_lag_neg_corrs = None best_lag_neg_pval_increase = None best_lag_neg_pval_positive = None best_lag_pos = None best_lag_pos_corr_med = None best_lag_pos_corrs = None best_lag_pos_pval_increase = None best_lag_pos_pval_positive = None zero_corrs = None zero_corr_med = None zero_pval_increase = None zero_pval_positive = None for lag, results_dict in results: Ls = results_dict.keys() Lmin = min(Ls) Lmax = max(Ls) corrs_Lmin = results_dict[Lmin] corrs_Lmax = results_dict[Lmax] if corrs_Lmin is None or corrs_Lmax is None: continue pval_positive = statutils.inverse_quantile(corrs_Lmax, 0.0).tolist() pval_increase = 1.0 - numpy.mean( statutils.inverse_quantile(corrs_Lmin, corrs_Lmax)) db.execute( 'INSERT INTO tests VALUES (?,?,?,?,?,?,?)', [cname, ename, lag, Lmin, Lmax, pval_positive, pval_increase]) corrs = corrs_Lmax corr_med = numpy.median(corrs) if lag == 0: zero_corr_med = corr_med zero_corrs = corrs zero_pval_increase = pval_increase zero_pval_positive = pval_positive elif lag < 0 and (best_lag_neg is None or corr_med > best_lag_neg_corr_med): best_lag_neg = lag best_lag_neg_corr_med = corr_med best_lag_neg_corrs = corrs best_lag_neg_pval_increase = pval_increase best_lag_neg_pval_positive = pval_positive elif lag > 0 and (best_lag_pos is None or corr_med > best_lag_pos_corr_med): best_lag_pos = lag best_lag_pos_corr_med = corr_med best_lag_pos_corrs = corrs best_lag_pos_pval_increase = pval_increase best_lag_pos_pval_positive = pval_positive for L in Ls: corrs = results_dict[L] for corr in corrs: db.execute('INSERT INTO correlations VALUES (?,?,?,?,?)', [cname, ename, lag, L, corr]) # Get the best negative-or-zero lag if best_lag_neg_corr_med > zero_corr_med: best_lag_nonpos = best_lag_neg best_lag_nonpos_corrs = best_lag_neg_corrs best_lag_nonpos_corr_med = best_lag_neg_corr_med best_lag_nonpos_pval_increase = best_lag_neg_pval_increase best_lag_nonpos_pval_positive = best_lag_neg_pval_positive else: best_lag_nonpos = 0 best_lag_nonpos_corrs = zero_corrs best_lag_nonpos_corr_med = zero_corr_med best_lag_nonpos_pval_increase = zero_pval_increase best_lag_nonpos_pval_positive = zero_pval_positive # Get the best positive-or-zero lag if best_lag_pos_corr_med > zero_corr_med: best_lag_nonneg = best_lag_pos best_lag_nonneg_corrs = best_lag_pos_corrs best_lag_nonneg_corr_med = best_lag_pos_corr_med best_lag_nonneg_pval_increase = best_lag_pos_pval_increase best_lag_nonneg_pval_positive = best_lag_pos_pval_positive else: best_lag_nonneg = 0 best_lag_nonneg_corrs = zero_corrs best_lag_nonneg_corr_med = zero_corr_med best_lag_nonneg_pval_increase = zero_pval_increase best_lag_nonneg_pval_positive = zero_pval_positive # Test if negative is better than nonnegative pval_neg_best = 1.0 - numpy.mean( statutils.inverse_quantile(best_lag_nonneg_corrs, best_lag_neg_corrs)) # Test if nonpositive is better than positive pval_nonpos_best = 1.0 - numpy.mean( statutils.inverse_quantile(best_lag_pos_corrs, best_lag_nonpos_corrs)) if best_lag_neg_corr_med > best_lag_pos_corr_med and best_lag_neg_corr_med > zero_corr_med: best_lag = best_lag_neg pval_increase = best_lag_neg_pval_increase pval_positive = best_lag_neg_pval_positive elif best_lag_pos_corr_med > best_lag_neg_corr_med and best_lag_pos_corr_med > zero_corr_med: best_lag = best_lag_pos pval_increase = best_lag_pos_pval_increase pval_positive = best_lag_pos_pval_positive else: best_lag = 0 pval_increase = zero_pval_increase pval_positive = zero_pval_positive db.execute('INSERT INTO lagtests VALUES (?,?,?,?,?,?,?)', [ cname, ename, best_lag, pval_positive, pval_increase, pval_neg_best, pval_nonpos_best ]) db.commit()
def run_analysis(db, cname, cause, ename, effect, emb, dt, max_lag, lag_skip, n_bootstraps, cores): # # L = args.library_size # assert L is None pool = multiprocessing.Pool(cores) lag_range = range(-max_lag, -lag_skip + 1, lag_skip) + range(0, max_lag + 1, lag_skip) args = [(cause, effect, emb, dt, lag, n_bootstraps) for lag in lag_range] results = pyembedding.pool_map(run_analysis_mappable, args, cores) # results = map(run_analysis_mappable, args) db.execute('CREATE TABLE IF NOT EXISTS correlations (cause, effect, lag, L, correlation)') db.execute('CREATE TABLE IF NOT EXISTS tests (cause, effect, lag, Lmin, Lmax, pval_positive, pval_increase)') db.execute('CREATE TABLE IF NOT EXISTS lagtests (cause, effect, best_lag, pval_positive, pval_increase, pval_neg_best, pval_nonpos_best)') best_lag_neg = None best_lag_neg_corr_med = None best_lag_neg_corrs = None best_lag_neg_pval_increase = None best_lag_neg_pval_positive = None best_lag_pos = None best_lag_pos_corr_med = None best_lag_pos_corrs = None best_lag_pos_pval_increase = None best_lag_pos_pval_positive = None zero_corrs = None zero_corr_med = None zero_pval_increase = None zero_pval_positive = None for lag, results_dict in results: Ls = results_dict.keys() Lmin = min(Ls) Lmax = max(Ls) corrs_Lmin = results_dict[Lmin] corrs_Lmax = results_dict[Lmax] if corrs_Lmin is None or corrs_Lmax is None: continue pval_positive = statutils.inverse_quantile(corrs_Lmax, 0.0).tolist() pval_increase = 1.0 - numpy.mean(statutils.inverse_quantile(corrs_Lmin, corrs_Lmax)) db.execute( 'INSERT INTO tests VALUES (?,?,?,?,?,?,?)', [cname, ename, lag, Lmin, Lmax, pval_positive, pval_increase] ) corrs = corrs_Lmax corr_med = numpy.median(corrs) if lag == 0: zero_corr_med = corr_med zero_corrs = corrs zero_pval_increase = pval_increase zero_pval_positive = pval_positive elif lag < 0 and (best_lag_neg is None or corr_med > best_lag_neg_corr_med): best_lag_neg = lag best_lag_neg_corr_med = corr_med best_lag_neg_corrs = corrs best_lag_neg_pval_increase = pval_increase best_lag_neg_pval_positive = pval_positive elif lag > 0 and (best_lag_pos is None or corr_med > best_lag_pos_corr_med): best_lag_pos = lag best_lag_pos_corr_med = corr_med best_lag_pos_corrs = corrs best_lag_pos_pval_increase = pval_increase best_lag_pos_pval_positive = pval_positive for L in Ls: corrs = results_dict[L] for corr in corrs: db.execute( 'INSERT INTO correlations VALUES (?,?,?,?,?)', [cname, ename, lag, L, corr] ) # Get the best negative-or-zero lag if best_lag_neg_corr_med > zero_corr_med: best_lag_nonpos = best_lag_neg best_lag_nonpos_corrs = best_lag_neg_corrs best_lag_nonpos_corr_med = best_lag_neg_corr_med best_lag_nonpos_pval_increase = best_lag_neg_pval_increase best_lag_nonpos_pval_positive = best_lag_neg_pval_positive else: best_lag_nonpos = 0 best_lag_nonpos_corrs = zero_corrs best_lag_nonpos_corr_med = zero_corr_med best_lag_nonpos_pval_increase = zero_pval_increase best_lag_nonpos_pval_positive = zero_pval_positive # Get the best positive-or-zero lag if best_lag_pos_corr_med > zero_corr_med: best_lag_nonneg = best_lag_pos best_lag_nonneg_corrs = best_lag_pos_corrs best_lag_nonneg_corr_med = best_lag_pos_corr_med best_lag_nonneg_pval_increase = best_lag_pos_pval_increase best_lag_nonneg_pval_positive = best_lag_pos_pval_positive else: best_lag_nonneg = 0 best_lag_nonneg_corrs = zero_corrs best_lag_nonneg_corr_med = zero_corr_med best_lag_nonneg_pval_increase = zero_pval_increase best_lag_nonneg_pval_positive = zero_pval_positive # Test if negative is better than nonnegative pval_neg_best = 1.0 - numpy.mean(statutils.inverse_quantile(best_lag_nonneg_corrs, best_lag_neg_corrs)) # Test if nonpositive is better than positive pval_nonpos_best = 1.0 - numpy.mean(statutils.inverse_quantile(best_lag_pos_corrs, best_lag_nonpos_corrs)) if best_lag_neg_corr_med > best_lag_pos_corr_med and best_lag_neg_corr_med > zero_corr_med: best_lag = best_lag_neg pval_increase = best_lag_neg_pval_increase pval_positive = best_lag_neg_pval_positive elif best_lag_pos_corr_med > best_lag_neg_corr_med and best_lag_pos_corr_med > zero_corr_med: best_lag = best_lag_pos pval_increase = best_lag_pos_pval_increase pval_positive = best_lag_pos_pval_positive else: best_lag = 0 pval_increase = zero_pval_increase pval_positive = zero_pval_positive db.execute( 'INSERT INTO lagtests VALUES (?,?,?,?,?,?,?)', [cname, ename, best_lag, pval_positive, pval_increase, pval_neg_best, pval_nonpos_best] ) db.commit()