예제 #1
0
def run_analysis_for_embedding(cname, cause, ename, effect, embedding,
                               theiler_window, n_bootstraps, db, rng):
    # min library size: embedding_dimension + 2,
    # so vectors should usually have embedding_dimension + 1 neighbors available
    Lmin = embedding.embedding_dimension + 2

    # max library size: just the number of available delay vectors
    Lmax = embedding.delay_vector_count

    assert Lmax > Lmin
    sys.stderr.write('  Using Lmin = {}, Lmax = {}\n'.format(Lmin, Lmax))

    corrs_Lmin = run_ccm_bootstraps(cname, ename, embedding, cause, Lmin,
                                    theiler_window, n_bootstraps, db, rng)
    corrs_Lmax = run_ccm_bootstraps(cname, ename, embedding, cause, Lmax,
                                    theiler_window, n_bootstraps, db, rng)

    pvalue_increase = 1.0 - numpy.mean(
        statutils.inverse_quantile(corrs_Lmin, corrs_Lmax))
    pvalue_positive = statutils.inverse_quantile(corrs_Lmax, 0.0).tolist()

    db.execute(
        'CREATE TABLE IF NOT EXISTS ccm_increase (cause, effect, Lmin, Lmax, delays, pvalue_increase)'
    )
    db.execute(
        'INSERT INTO ccm_increase VALUES (?,?,?,?,?,?)',
        [cname, ename, Lmin, Lmax,
         str(embedding.delays), pvalue_increase])
    db.commit()

    return corrs_Lmax, pvalue_increase, pvalue_positive
예제 #2
0
def run_analysis_for_embedding(cname, cause, ename, effect, embedding, theiler_window, n_bootstraps, db, rng):
    # min library size: embedding_dimension + 2,
    # so vectors should usually have embedding_dimension + 1 neighbors available
    Lmin = embedding.embedding_dimension + 2

    # max library size: just the number of available delay vectors
    Lmax = embedding.delay_vector_count

    assert Lmax > Lmin
    sys.stderr.write('  Using Lmin = {}, Lmax = {}\n'.format(Lmin, Lmax))

    corrs_Lmin = run_ccm_bootstraps(cname, ename, embedding, cause, Lmin, theiler_window, n_bootstraps, db, rng)
    corrs_Lmax = run_ccm_bootstraps(cname, ename, embedding, cause, Lmax, theiler_window, n_bootstraps, db, rng)
    
    pvalue_increase = 1.0 - numpy.mean(statutils.inverse_quantile(corrs_Lmin, corrs_Lmax))
    pvalue_positive = statutils.inverse_quantile(corrs_Lmax, 0.0).tolist()

    db.execute(
        'CREATE TABLE IF NOT EXISTS ccm_increase (cause, effect, Lmin, Lmax, delays, pvalue_increase)'
    )
    db.execute(
        'INSERT INTO ccm_increase VALUES (?,?,?,?,?,?)',
        [cname, ename, Lmin, Lmax, str(embedding.delays), pvalue_increase]
    )
    db.commit()
    
    return corrs_Lmax, pvalue_increase, pvalue_positive
예제 #3
0
def run_analysis_surrogates(cname, cause, ename, effect, delays,
                            theiler_window, n_bootstraps, db, rng):
    #db.execute('CREATE TABLE IF NOT EXISTS surrogate_corrs (cause, effect, use_effect_surrogates, correlation)');
    db.execute(
        'CREATE TABLE IF NOT EXISTS surrogate_test (cause, effect, use_effect_surrogates, corr_raw, corr_surr_q95, pvalue_greater)'
    )
    db.execute(
        'CREATE TABLE IF NOT EXISTS surrogate_dist (cause, effect, use_effect_surrogates, q0, q1, q2_5, q5, q25, q50, q75, q95, q97_5, q99, q100)'
    )

    effect_raw_embedding = pyembedding.Embedding(effect, delays)
    corr_raw = effect_raw_embedding.ccm(
        effect_raw_embedding, cause,
        theiler_window=theiler_window)[0]['correlation']

    # Run with cause surrogates only
    cause_mean, cause_anom = calculate_mean_anomaly(cause, 12)
    corrs_cause_only = []
    for i in range(n_bootstraps):
        cause_surr = randomize_surrogate(cause_mean, cause_anom, rng)
        ccm_result, y_actual, y_pred = effect_raw_embedding.ccm(
            effect_raw_embedding, cause_surr, theiler_window=theiler_window)
        corrs_cause_only.append(ccm_result['correlation'])
        #db.execute('INSERT INTO surrogate_corrs VALUES (?,?,?,?)', [cname, ename, 0, ccm_result['correlation']])
    db.execute('INSERT INTO surrogate_test VALUES (?,?,?,?,?,?)', [
        cname, ename, 0, corr_raw,
        numpy.percentile(corrs_cause_only, 95),
        1.0 - float(statutils.inverse_quantile(corrs_cause_only, corr_raw))
    ])
    db.execute(
        'INSERT INTO surrogate_dist VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?)',
        [cname, ename, 0] + numpy.percentile(
            corrs_cause_only,
            [0, 1, 2.5, 5, 25, 50, 75, 95, 97.5, 99, 100]).tolist())

    # Run with cause and effect surrogates
    effect_mean, effect_anom = calculate_mean_anomaly(effect, 12)
    corrs_cause_and_effect = []
    for i in range(n_bootstraps):
        cause_surr = randomize_surrogate(cause_mean, cause_anom, rng)
        effect_surr = randomize_surrogate(effect_mean, effect_anom, rng)
        effect_surr_embedding = pyembedding.Embedding(effect_surr, delays)
        ccm_result, y_actual, y_pred = effect_surr_embedding.ccm(
            effect_surr_embedding, cause_surr, theiler_window=theiler_window)
        corrs_cause_and_effect.append(ccm_result['correlation'])
        #db.execute('INSERT INTO surrogate_corrs VALUES (?,?,?,?)', [cname, ename, 1, ccm_result['correlation']])
    db.execute('INSERT INTO surrogate_test VALUES (?,?,?,?,?,?)', [
        cname, ename, 1, corr_raw,
        numpy.percentile(corrs_cause_and_effect, 95), 1.0 -
        float(statutils.inverse_quantile(corrs_cause_and_effect, corr_raw))
    ])
    db.execute(
        'INSERT INTO surrogate_dist VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?)',
        [cname, ename, 1] + numpy.percentile(
            corrs_cause_and_effect,
            [0, 1, 2.5, 5, 25, 50, 75, 95, 97.5, 99, 100]).tolist())
예제 #4
0
def run_ccm_bootstraps(cname, ename, embedding, cause, L, theiler_window, n_bootstraps, db, rng):
    assert isinstance(embedding, pyembedding.Embedding)

    corrs = []

    for i in range(n_bootstraps):
        sampled_embedding = embedding.sample_embedding(L, replace=True, rng=rng)
        ccm_result, y_actual, y_pred = sampled_embedding.ccm(embedding, cause, theiler_window=theiler_window)

        corrs.append(ccm_result['correlation'])

    corrs = numpy.array(corrs)
    pvalue_positive = statutils.inverse_quantile(corrs, 0.0).tolist()

    db.execute('CREATE TABLE IF NOT EXISTS ccm_correlation_dist (cause, effect, L, delays, mean, sd, pvalue_positive, q0, q1, q2_5, q5, q25, q50, q75, q95, q97_5, q99, q100)')
    db.execute(
        'INSERT INTO ccm_correlation_dist VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)',
        [cname, ename, L, str(embedding.delays), corrs.mean(), corrs.std(), pvalue_positive] +
            [x for x in numpy.percentile(corrs, [0, 1, 2.5, 5, 25, 50, 75, 95, 97.5, 99, 100])]
    )
    
    db.execute('CREATE TABLE IF NOT EXISTS ccm_correlations (cause, effect, L, delays, correlation)')
    for corr in corrs:
        db.execute(
            'INSERT INTO ccm_correlations VALUES (?,?,?,?,?)',
            [cname, ename, L, str(embedding.delays), corr]
    )

    return numpy.array(corrs)
예제 #5
0
def run_ccm_bootstraps(cname, ename, embedding, cause, L, theiler_window, n_bootstraps, db, rng):
    assert isinstance(embedding, projection.ProjectionEmbedding)

    corrs = []

    for i in range(n_bootstraps):
        sampled_embedding = embedding.sample_embedding(L, replace=True, rng=rng)
        ccm_result, y_actual, y_pred = sampled_embedding.ccm(embedding, cause, theiler_window=theiler_window)

        corrs.append(ccm_result['correlation'])

    corrs = numpy.array(corrs)

    db.execute('CREATE TABLE IF NOT EXISTS ccm_correlation_dist (cause, effect, L, d, dmax, mean, sd, pvalue_positive, q0, q1, q2_5, q5, q25, q50, q75, q95, q97_5, q99, q100)')
    db.execute(
        'INSERT INTO ccm_correlation_dist VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)',
        [cname, ename, L, embedding.d, embedding.dmax, corrs.mean(), corrs.std(), statutils.inverse_quantile(corrs, 0.0).tolist()] +
            [x for x in numpy.percentile(corrs, [0, 1, 2.5, 5, 25, 50, 75, 95, 97.5, 99, 100])]
    )
    
    db.execute('CREATE TABLE IF NOT EXISTS ccm_correlations (cause, effect, L, d, dmax, correlation)')
    for corr in corrs:
        db.execute(
            'INSERT INTO ccm_correlations VALUES (?,?,?,?,?,?)',
            [cname, ename, L, embedding.d, embedding.dmax, corr]
    )

    return numpy.array(corrs)
예제 #6
0
def run_analysis_surrogates(cname, cause, ename, effect, delays, theiler_window, n_bootstraps, db, rng):
    #db.execute('CREATE TABLE IF NOT EXISTS surrogate_corrs (cause, effect, use_effect_surrogates, correlation)');
    db.execute('CREATE TABLE IF NOT EXISTS surrogate_test (cause, effect, use_effect_surrogates, corr_raw, corr_surr_q95, pvalue_greater)')
    db.execute('CREATE TABLE IF NOT EXISTS surrogate_dist (cause, effect, use_effect_surrogates, q0, q1, q2_5, q5, q25, q50, q75, q95, q97_5, q99, q100)')
    
    effect_raw_embedding = pyembedding.Embedding(effect, delays)
    corr_raw = effect_raw_embedding.ccm(effect_raw_embedding, cause, theiler_window=theiler_window)[0]['correlation']
    
    # Run with cause surrogates only
    cause_mean, cause_anom = calculate_mean_anomaly(cause, 12)
    corrs_cause_only = []
    for i in range(n_bootstraps):
        cause_surr = randomize_surrogate(cause_mean, cause_anom, rng)
        ccm_result, y_actual, y_pred = effect_raw_embedding.ccm(effect_raw_embedding, cause_surr, theiler_window=theiler_window)
        corrs_cause_only.append(ccm_result['correlation'])
        #db.execute('INSERT INTO surrogate_corrs VALUES (?,?,?,?)', [cname, ename, 0, ccm_result['correlation']])
    db.execute('INSERT INTO surrogate_test VALUES (?,?,?,?,?,?)', [cname, ename, 0,
        corr_raw,
        numpy.percentile(corrs_cause_only, 95),
        1.0 - float(statutils.inverse_quantile(corrs_cause_only, corr_raw))
    ])
    db.execute('INSERT INTO surrogate_dist VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?)',
        [cname, ename, 0] +
        numpy.percentile(corrs_cause_only, [0, 1, 2.5, 5, 25, 50, 75, 95, 97.5, 99, 100]).tolist()
    )
    
    # Run with cause and effect surrogates
    effect_mean, effect_anom = calculate_mean_anomaly(effect, 12)
    corrs_cause_and_effect = []
    for i in range(n_bootstraps):
        cause_surr = randomize_surrogate(cause_mean, cause_anom, rng)
        effect_surr = randomize_surrogate(effect_mean, effect_anom, rng)
        effect_surr_embedding = pyembedding.Embedding(effect_surr, delays)
        ccm_result, y_actual, y_pred = effect_surr_embedding.ccm(effect_surr_embedding, cause_surr, theiler_window=theiler_window)
        corrs_cause_and_effect.append(ccm_result['correlation'])
        #db.execute('INSERT INTO surrogate_corrs VALUES (?,?,?,?)', [cname, ename, 1, ccm_result['correlation']])
    db.execute('INSERT INTO surrogate_test VALUES (?,?,?,?,?,?)', [cname, ename, 1,
        corr_raw,
        numpy.percentile(corrs_cause_and_effect, 95),
        1.0 - float(statutils.inverse_quantile(corrs_cause_and_effect, corr_raw))
    ])
    db.execute('INSERT INTO surrogate_dist VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?)',
        [cname, ename, 1] +
        numpy.percentile(corrs_cause_and_effect, [0, 1, 2.5, 5, 25, 50, 75, 95, 97.5, 99, 100]).tolist()
    )
예제 #7
0
def run_analysis_max_univariate_prediction_plus_lags(cname, cause, ename,
                                                     effect, theiler_window,
                                                     db, rng, ccm_settings):
    if 'delta_tau_termination' in ccm_settings:
        delta_tau_termination = ccm_settings['delta_tau_termination']
    else:
        delta_tau_termination = None

    max_corr = float('-inf')
    max_corr_Etau = None
    for E in ccm_settings['sweep_embedding_dimensions']:
        if delta_tau_termination is not None:
            max_corr_this_E = float('-inf')
            max_corr_tau_this_E = None

        for tau in (ccm_settings['sweep_delays'] if E > 1 else [1]):
            delays = tuple(range(0, E * tau, tau))
            embedding = pyembedding.Embedding(effect[:-1], delays)
            if embedding.delay_vector_count < embedding.embedding_dimension + 2:
                sys.stderr.write(
                    '  Lmax < Lmin; skipping E={}, tau={}\n'.format(E, tau))
                continue

            eff_off, eff_off_pred = embedding.simplex_predict_using_embedding(
                embedding, effect[1:], theiler_window=theiler_window)
            corr = numpy.corrcoef(eff_off, eff_off_pred)[0, 1]

            db.execute(
                'CREATE TABLE IF NOT EXISTS univariate_predictions (variable, delays, correlation)'
            )
            db.execute('INSERT INTO univariate_predictions VALUES (?,?,?)',
                       [ename, str(delays), corr])

            sys.stderr.write('  corr for E={}, tau={} : {}\n'.format(
                E, tau, corr))
            if corr > max_corr:
                max_corr = corr
                max_corr_Etau = (E, tau)

            if delta_tau_termination is not None:
                if corr > max_corr_this_E:
                    max_corr_this_E = corr
                    max_corr_tau_this_E = tau

                if E * (tau - max_corr_tau_this_E) >= delta_tau_termination:
                    sys.stderr.write(
                        '{} taus since a maximum for this E; assuming found.\n'
                        .format(delta_tau_termination))
                    break

    E, tau = max_corr_Etau

    max_ccm_lag = ccm_settings['max_ccm_lag']

    best_lag_neg = None
    best_lag_neg_corr_med = None
    best_lag_neg_corrs = None
    best_lag_neg_pvalue_increase = None
    best_lag_neg_pvalue_positive = None

    best_lag_pos = None
    best_lag_pos_corr_med = None
    best_lag_pos_corrs = None
    best_lag_pos_pvalue_increase = None
    best_lag_pos_pvalue_positive = None

    zero_corrs = None
    zero_corr_med = None
    zero_pvalue_increase = None
    zero_pvalue_positive = None

    for lag in range(-max_ccm_lag, max_ccm_lag + 1):
        sys.stderr.write('  Using E = {}, tau = {}, lag = {}\n'.format(
            E, tau, lag))

        delays = tuple(range(lag, lag + E * tau, tau))
        max_corr_emb = pyembedding.Embedding(effect, delays)
        corrs, pvalue_increase, pvalue_positive = run_analysis_for_embedding(
            cname, cause, ename, effect, max_corr_emb, theiler_window,
            ccm_settings['n_ccm_bootstraps'], db, rng)
        corr_med = numpy.median(corrs)

        if lag == 0:
            zero_corr_med = corr_med
            zero_corrs = corrs
            zero_pvalue_increase = pvalue_increase
            zero_pvalue_positive = pvalue_positive
        elif lag < 0 and (best_lag_neg is None
                          or corr_med > best_lag_neg_corr_med):
            best_lag_neg = lag
            best_lag_neg_corr_med = corr_med
            best_lag_neg_corrs = corrs
            best_lag_neg_pvalue_increase = pvalue_increase
            best_lag_neg_pvalue_positive = pvalue_positive
        elif lag > 0 and (best_lag_pos is None
                          or corr_med > best_lag_pos_corr_med):
            best_lag_pos = lag
            best_lag_pos_corr_med = corr_med
            best_lag_pos_corrs = corrs
            best_lag_pos_pvalue_increase = pvalue_increase
            best_lag_pos_pvalue_positive = pvalue_positive

    # Get the best negative-or-zero lag
    if best_lag_neg_corr_med > zero_corr_med:
        best_lag_nonpos = best_lag_neg
        best_lag_nonpos_corrs = best_lag_neg_corrs
        best_lag_nonpos_corr_med = best_lag_neg_corr_med
        best_lag_nonpos_pvalue_increase = best_lag_neg_pvalue_increase
        best_lag_nonpos_pvalue_positive = best_lag_neg_pvalue_positive
    else:
        best_lag_nonpos = 0
        best_lag_nonpos_corrs = zero_corrs
        best_lag_nonpos_corr_med = zero_corr_med
        best_lag_nonpos_pvalue_increase = zero_pvalue_increase
        best_lag_nonpos_pvalue_positive = zero_pvalue_positive

    # Get the best positive-or-zero lag
    if best_lag_pos_corr_med > zero_corr_med:
        best_lag_nonneg = best_lag_pos
        best_lag_nonneg_corrs = best_lag_pos_corrs
        best_lag_nonneg_corr_med = best_lag_pos_corr_med
        best_lag_nonneg_pvalue_increase = best_lag_pos_pvalue_increase
        best_lag_nonneg_pvalue_positive = best_lag_pos_pvalue_positive
    else:
        best_lag_nonneg = 0
        best_lag_nonneg_corrs = zero_corrs
        best_lag_nonneg_corr_med = zero_corr_med
        best_lag_nonneg_pvalue_increase = zero_pvalue_increase
        best_lag_nonneg_pvalue_positive = zero_pvalue_positive

    # Test if negative is better than nonnegative
    pvalue_neg_best = 1.0 - numpy.mean(
        statutils.inverse_quantile(best_lag_nonneg_corrs, best_lag_neg_corrs))

    # Test if nonpositive is better than positive
    pvalue_nonpos_best = 1.0 - numpy.mean(
        statutils.inverse_quantile(best_lag_pos_corrs, best_lag_nonpos_corrs))

    db.execute('''CREATE TABLE IF NOT EXISTS ccm_lag_tests (
        cause, effect,
        
        neg_lag,
        neg_corr_med,
        neg_pvalue_positive,
        neg_pvalue_increase,
        neg_pvalue_best,
        
        nonpos_lag,
        nonpos_corr_med,
        nonpos_pvalue_positive,
        nonpos_pvalue_increase,
        nonpos_pvalue_best,
        
        pos_lag,
        pos_corr_med,
        pos_pvalue_positive,
        pos_pvalue_increase,
        
        nonneg_lag,
        nonneg_corr_med,
        nonneg_pvalue_positive,
        nonneg_pvalue_increase
    )''')
    db.execute(
        'INSERT INTO ccm_lag_tests VALUES (?,?, ?,?,?,?,?, ?,?,?,?,?, ?,?,?,?, ?,?,?,?)',
        [
            cname, ename, best_lag_neg, best_lag_neg_corr_med,
            best_lag_neg_pvalue_positive, best_lag_neg_pvalue_increase,
            pvalue_neg_best, best_lag_nonpos, best_lag_nonpos_corr_med,
            best_lag_nonpos_pvalue_positive, best_lag_nonpos_pvalue_increase,
            pvalue_nonpos_best, best_lag_pos, best_lag_pos_corr_med,
            best_lag_pos_pvalue_positive, best_lag_pos_pvalue_increase,
            best_lag_nonneg, best_lag_nonneg_corr_med,
            best_lag_nonneg_pvalue_positive, best_lag_nonneg_pvalue_increase
        ])
예제 #8
0
def run_analysis_max_univariate_prediction_plus_lags(cname, cause, ename, effect, theiler_window, db, rng):
    delta_tau_termination = None
    
    max_corr = float('-inf')
    max_corr_Etau = None
    for E in range(1, 11):
        if delta_tau_termination is not None:
            max_corr_this_E = float('-inf')
            max_corr_tau_this_E = None
        
        for tau in (range(1, 61) if E > 1 else [1]):
            delays = tuple(range(0, E*tau, tau))
            embedding = pyembedding.Embedding(effect[:-1], delays)
            if embedding.delay_vector_count < embedding.embedding_dimension + 2:
                sys.stderr.write('  Lmax < Lmin; skipping E={}, tau={}\n'.format(E, tau))
                continue

            eff_off, eff_off_pred = embedding.simplex_predict_using_embedding(embedding, effect[1:], theiler_window=theiler_window)
            corr = numpy.corrcoef(eff_off, eff_off_pred)[0,1]

            db.execute('CREATE TABLE IF NOT EXISTS univariate_predictions (variable, delays, correlation)')
            db.execute('INSERT INTO univariate_predictions VALUES (?,?,?)', [ename, str(delays), corr])

            sys.stderr.write('  corr for E={}, tau={} : {}\n'.format(E, tau, corr))
            if corr > max_corr:
                max_corr = corr
                max_corr_Etau = (E, tau)
            
            if delta_tau_termination is not None:
                if corr > max_corr_this_E:
                    max_corr_this_E = corr
                    max_corr_tau_this_E = tau
            
                if E * (tau - max_corr_tau_this_E) >= delta_tau_termination:
                    sys.stderr.write('{} taus since a maximum for this E; assuming found.\n'.format(delta_tau_termination))
                    break
    
    E, tau = max_corr_Etau
    
    max_ccm_lag = 60
    
    best_lag_neg = None
    best_lag_neg_corr_med = None
    best_lag_neg_corrs = None
    best_lag_neg_pvalue_increase = None
    best_lag_neg_pvalue_positive = None
    
    best_lag_pos = None
    best_lag_pos_corr_med = None
    best_lag_pos_corrs = None
    best_lag_pos_pvalue_increase = None
    best_lag_pos_pvalue_positive = None
    
    zero_corrs = None
    zero_corr_med = None
    zero_pvalue_increase = None
    zero_pvalue_positive = None
    
    for lag in range(-max_ccm_lag, max_ccm_lag + 1):
        sys.stderr.write('  Using E = {}, tau = {}, lag = {}\n'.format(E, tau, lag))
        
        delays = tuple(range(lag, lag + E*tau, tau))
        max_corr_emb = pyembedding.Embedding(effect, delays)
        corrs, pvalue_increase, pvalue_positive = run_analysis_for_embedding(
            cname, cause, ename, effect, max_corr_emb, theiler_window, 100, db, rng
        )
        corr_med = numpy.median(corrs)
        
        if lag == 0:
            zero_corr_med = corr_med
            zero_corrs = corrs
            zero_pvalue_increase = pvalue_increase
            zero_pvalue_positive = pvalue_positive
        elif lag < 0 and (best_lag_neg is None or corr_med > best_lag_neg_corr_med):
            best_lag_neg = lag
            best_lag_neg_corr_med = corr_med
            best_lag_neg_corrs = corrs
            best_lag_neg_pvalue_increase = pvalue_increase
            best_lag_neg_pvalue_positive = pvalue_positive
        elif lag > 0 and (best_lag_pos is None or corr_med > best_lag_pos_corr_med):
            best_lag_pos = lag
            best_lag_pos_corr_med = corr_med
            best_lag_pos_corrs = corrs
            best_lag_pos_pvalue_increase = pvalue_increase
            best_lag_pos_pvalue_positive = pvalue_positive
    
    # Get the best negative-or-zero lag
    if best_lag_neg_corr_med > zero_corr_med:
        best_lag_nonpos = best_lag_neg
        best_lag_nonpos_corrs = best_lag_neg_corrs
        best_lag_nonpos_corr_med = best_lag_neg_corr_med
        best_lag_nonpos_pvalue_increase = best_lag_neg_pvalue_increase
        best_lag_nonpos_pvalue_positive = best_lag_neg_pvalue_positive
    else:
        best_lag_nonpos = 0
        best_lag_nonpos_corrs = zero_corrs
        best_lag_nonpos_corr_med = zero_corr_med
        best_lag_nonpos_pvalue_increase = zero_pvalue_increase
        best_lag_nonpos_pvalue_positive = zero_pvalue_positive
    
    # Get the best positive-or-zero lag
    if best_lag_pos_corr_med > zero_corr_med:
        best_lag_nonneg = best_lag_pos
        best_lag_nonneg_corrs = best_lag_pos_corrs
        best_lag_nonneg_corr_med = best_lag_pos_corr_med
        best_lag_nonneg_pvalue_increase = best_lag_pos_pvalue_increase
        best_lag_nonneg_pvalue_positive = best_lag_pos_pvalue_positive
    else:
        best_lag_nonneg = 0
        best_lag_nonneg_corrs = zero_corrs
        best_lag_nonneg_corr_med = zero_corr_med
        best_lag_nonneg_pvalue_increase = zero_pvalue_increase
        best_lag_nonneg_pvalue_positive = zero_pvalue_positive
    
    # Test if negative is better than nonnegative
    pvalue_neg_best = 1.0 - numpy.mean(statutils.inverse_quantile(best_lag_nonneg_corrs, best_lag_neg_corrs))
    
    # Test if nonpositive is better than positive
    pvalue_nonpos_best = 1.0 - numpy.mean(statutils.inverse_quantile(best_lag_pos_corrs, best_lag_nonpos_corrs))
    
    db.execute('''CREATE TABLE IF NOT EXISTS ccm_lag_tests (
        cause, effect,
        
        neg_lag,
        neg_corr_med,
        neg_pvalue_positive,
        neg_pvalue_increase,
        neg_pvalue_best,
        
        nonpos_lag,
        nonpos_corr_med,
        nonpos_pvalue_positive,
        nonpos_pvalue_increase,
        nonpos_pvalue_best,
        
        pos_lag,
        pos_corr_med,
        pos_pvalue_positive,
        pos_pvalue_increase,
        
        nonneg_lag,
        nonneg_corr_med,
        nonneg_pvalue_positive,
        nonneg_pvalue_increase
    )''')
    db.execute(
        'INSERT INTO ccm_lag_tests VALUES (?,?, ?,?,?,?,?, ?,?,?,?,?, ?,?,?,?, ?,?,?,?)',
        [
            cname, ename,
            
            best_lag_neg,
            best_lag_neg_corr_med,
            best_lag_neg_pvalue_positive,
            best_lag_neg_pvalue_increase,
            pvalue_neg_best,
            
            best_lag_nonpos,
            best_lag_nonpos_corr_med,
            best_lag_nonpos_pvalue_positive,
            best_lag_nonpos_pvalue_increase,
            pvalue_nonpos_best,
            
            best_lag_pos,
            best_lag_pos_corr_med,
            best_lag_pos_pvalue_positive,
            best_lag_pos_pvalue_increase,
            
            best_lag_nonneg,
            best_lag_nonneg_corr_med,
            best_lag_nonneg_pvalue_positive,
            best_lag_nonneg_pvalue_increase
        ]
    )
예제 #9
0
def run_analysis(db, cname, cause, ename, effect, emb, dt, max_lag, lag_skip,
                 n_bootstraps, cores):
    #
    #     L = args.library_size
    #     assert L is None

    pool = multiprocessing.Pool(cores)

    lag_range = range(-max_lag, -lag_skip + 1, lag_skip) + range(
        0, max_lag + 1, lag_skip)
    args = [(cause, effect, emb, dt, lag, n_bootstraps) for lag in lag_range]

    results = pyembedding.pool_map(run_analysis_mappable, args, cores)
    # results = map(run_analysis_mappable, args)

    db.execute(
        'CREATE TABLE IF NOT EXISTS correlations (cause, effect, lag, L, correlation)'
    )
    db.execute(
        'CREATE TABLE IF NOT EXISTS tests (cause, effect, lag, Lmin, Lmax, pval_positive, pval_increase)'
    )
    db.execute(
        'CREATE TABLE IF NOT EXISTS lagtests (cause, effect, best_lag, pval_positive, pval_increase, pval_neg_best, pval_nonpos_best)'
    )

    best_lag_neg = None
    best_lag_neg_corr_med = None
    best_lag_neg_corrs = None
    best_lag_neg_pval_increase = None
    best_lag_neg_pval_positive = None

    best_lag_pos = None
    best_lag_pos_corr_med = None
    best_lag_pos_corrs = None
    best_lag_pos_pval_increase = None
    best_lag_pos_pval_positive = None

    zero_corrs = None
    zero_corr_med = None
    zero_pval_increase = None
    zero_pval_positive = None

    for lag, results_dict in results:
        Ls = results_dict.keys()
        Lmin = min(Ls)
        Lmax = max(Ls)

        corrs_Lmin = results_dict[Lmin]
        corrs_Lmax = results_dict[Lmax]

        if corrs_Lmin is None or corrs_Lmax is None:
            continue

        pval_positive = statutils.inverse_quantile(corrs_Lmax, 0.0).tolist()
        pval_increase = 1.0 - numpy.mean(
            statutils.inverse_quantile(corrs_Lmin, corrs_Lmax))

        db.execute(
            'INSERT INTO tests VALUES (?,?,?,?,?,?,?)',
            [cname, ename, lag, Lmin, Lmax, pval_positive, pval_increase])

        corrs = corrs_Lmax
        corr_med = numpy.median(corrs)

        if lag == 0:
            zero_corr_med = corr_med
            zero_corrs = corrs
            zero_pval_increase = pval_increase
            zero_pval_positive = pval_positive
        elif lag < 0 and (best_lag_neg is None
                          or corr_med > best_lag_neg_corr_med):
            best_lag_neg = lag
            best_lag_neg_corr_med = corr_med
            best_lag_neg_corrs = corrs
            best_lag_neg_pval_increase = pval_increase
            best_lag_neg_pval_positive = pval_positive
        elif lag > 0 and (best_lag_pos is None
                          or corr_med > best_lag_pos_corr_med):
            best_lag_pos = lag
            best_lag_pos_corr_med = corr_med
            best_lag_pos_corrs = corrs
            best_lag_pos_pval_increase = pval_increase
            best_lag_pos_pval_positive = pval_positive

        for L in Ls:
            corrs = results_dict[L]
            for corr in corrs:
                db.execute('INSERT INTO correlations VALUES (?,?,?,?,?)',
                           [cname, ename, lag, L, corr])

    # Get the best negative-or-zero lag
    if best_lag_neg_corr_med > zero_corr_med:
        best_lag_nonpos = best_lag_neg
        best_lag_nonpos_corrs = best_lag_neg_corrs
        best_lag_nonpos_corr_med = best_lag_neg_corr_med
        best_lag_nonpos_pval_increase = best_lag_neg_pval_increase
        best_lag_nonpos_pval_positive = best_lag_neg_pval_positive
    else:
        best_lag_nonpos = 0
        best_lag_nonpos_corrs = zero_corrs
        best_lag_nonpos_corr_med = zero_corr_med
        best_lag_nonpos_pval_increase = zero_pval_increase
        best_lag_nonpos_pval_positive = zero_pval_positive

    # Get the best positive-or-zero lag
    if best_lag_pos_corr_med > zero_corr_med:
        best_lag_nonneg = best_lag_pos
        best_lag_nonneg_corrs = best_lag_pos_corrs
        best_lag_nonneg_corr_med = best_lag_pos_corr_med
        best_lag_nonneg_pval_increase = best_lag_pos_pval_increase
        best_lag_nonneg_pval_positive = best_lag_pos_pval_positive
    else:
        best_lag_nonneg = 0
        best_lag_nonneg_corrs = zero_corrs
        best_lag_nonneg_corr_med = zero_corr_med
        best_lag_nonneg_pval_increase = zero_pval_increase
        best_lag_nonneg_pval_positive = zero_pval_positive

    # Test if negative is better than nonnegative
    pval_neg_best = 1.0 - numpy.mean(
        statutils.inverse_quantile(best_lag_nonneg_corrs, best_lag_neg_corrs))

    # Test if nonpositive is better than positive
    pval_nonpos_best = 1.0 - numpy.mean(
        statutils.inverse_quantile(best_lag_pos_corrs, best_lag_nonpos_corrs))

    if best_lag_neg_corr_med > best_lag_pos_corr_med and best_lag_neg_corr_med > zero_corr_med:
        best_lag = best_lag_neg
        pval_increase = best_lag_neg_pval_increase
        pval_positive = best_lag_neg_pval_positive
    elif best_lag_pos_corr_med > best_lag_neg_corr_med and best_lag_pos_corr_med > zero_corr_med:
        best_lag = best_lag_pos
        pval_increase = best_lag_pos_pval_increase
        pval_positive = best_lag_pos_pval_positive
    else:
        best_lag = 0
        pval_increase = zero_pval_increase
        pval_positive = zero_pval_positive

    db.execute('INSERT INTO lagtests VALUES (?,?,?,?,?,?,?)', [
        cname, ename, best_lag, pval_positive, pval_increase, pval_neg_best,
        pval_nonpos_best
    ])

    db.commit()
예제 #10
0
파일: ccm.py 프로젝트: cobeylab/pyembedding
def run_analysis(db, cname, cause, ename, effect, emb, dt, max_lag, lag_skip, n_bootstraps, cores):
#     
#     L = args.library_size
#     assert L is None
    
    pool = multiprocessing.Pool(cores)
    
    lag_range = range(-max_lag, -lag_skip + 1, lag_skip) + range(0, max_lag + 1, lag_skip)
    args = [(cause, effect, emb, dt, lag, n_bootstraps) for lag in lag_range]
    
    results = pyembedding.pool_map(run_analysis_mappable, args, cores)
    # results = map(run_analysis_mappable, args)
    
    db.execute('CREATE TABLE IF NOT EXISTS correlations (cause, effect, lag, L, correlation)')
    db.execute('CREATE TABLE IF NOT EXISTS tests (cause, effect, lag, Lmin, Lmax, pval_positive, pval_increase)')
    db.execute('CREATE TABLE IF NOT EXISTS lagtests (cause, effect, best_lag, pval_positive, pval_increase, pval_neg_best, pval_nonpos_best)')
    
    best_lag_neg = None
    best_lag_neg_corr_med = None
    best_lag_neg_corrs = None
    best_lag_neg_pval_increase = None
    best_lag_neg_pval_positive = None
    
    best_lag_pos = None
    best_lag_pos_corr_med = None
    best_lag_pos_corrs = None
    best_lag_pos_pval_increase = None
    best_lag_pos_pval_positive = None
    
    zero_corrs = None
    zero_corr_med = None
    zero_pval_increase = None
    zero_pval_positive = None
    
    for lag, results_dict in results:
        Ls = results_dict.keys()
        Lmin = min(Ls)
        Lmax = max(Ls)
        
        corrs_Lmin = results_dict[Lmin]
        corrs_Lmax = results_dict[Lmax]
        
        if corrs_Lmin is None or corrs_Lmax is None:
            continue
        
        pval_positive = statutils.inverse_quantile(corrs_Lmax, 0.0).tolist()
        pval_increase = 1.0 - numpy.mean(statutils.inverse_quantile(corrs_Lmin, corrs_Lmax))
        
        db.execute(
            'INSERT INTO tests VALUES (?,?,?,?,?,?,?)',
            [cname, ename, lag, Lmin, Lmax, pval_positive, pval_increase]
        )
        
        corrs = corrs_Lmax
        corr_med = numpy.median(corrs)
        
        if lag == 0:
            zero_corr_med = corr_med
            zero_corrs = corrs
            zero_pval_increase = pval_increase
            zero_pval_positive = pval_positive
        elif lag < 0 and (best_lag_neg is None or corr_med > best_lag_neg_corr_med):
            best_lag_neg = lag
            best_lag_neg_corr_med = corr_med
            best_lag_neg_corrs = corrs
            best_lag_neg_pval_increase = pval_increase
            best_lag_neg_pval_positive = pval_positive
        elif lag > 0 and (best_lag_pos is None or corr_med > best_lag_pos_corr_med):
            best_lag_pos = lag
            best_lag_pos_corr_med = corr_med
            best_lag_pos_corrs = corrs
            best_lag_pos_pval_increase = pval_increase
            best_lag_pos_pval_positive = pval_positive
        
        for L in Ls:
            corrs = results_dict[L]
            for corr in corrs:
                db.execute(
                    'INSERT INTO correlations VALUES (?,?,?,?,?)',
                    [cname, ename, lag, L, corr]
                )
    
    # Get the best negative-or-zero lag
    if best_lag_neg_corr_med > zero_corr_med:
        best_lag_nonpos = best_lag_neg
        best_lag_nonpos_corrs = best_lag_neg_corrs
        best_lag_nonpos_corr_med = best_lag_neg_corr_med
        best_lag_nonpos_pval_increase = best_lag_neg_pval_increase
        best_lag_nonpos_pval_positive = best_lag_neg_pval_positive
    else:
        best_lag_nonpos = 0
        best_lag_nonpos_corrs = zero_corrs
        best_lag_nonpos_corr_med = zero_corr_med
        best_lag_nonpos_pval_increase = zero_pval_increase
        best_lag_nonpos_pval_positive = zero_pval_positive

    # Get the best positive-or-zero lag
    if best_lag_pos_corr_med > zero_corr_med:
        best_lag_nonneg = best_lag_pos
        best_lag_nonneg_corrs = best_lag_pos_corrs
        best_lag_nonneg_corr_med = best_lag_pos_corr_med
        best_lag_nonneg_pval_increase = best_lag_pos_pval_increase
        best_lag_nonneg_pval_positive = best_lag_pos_pval_positive
    else:
        best_lag_nonneg = 0
        best_lag_nonneg_corrs = zero_corrs
        best_lag_nonneg_corr_med = zero_corr_med
        best_lag_nonneg_pval_increase = zero_pval_increase
        best_lag_nonneg_pval_positive = zero_pval_positive

    # Test if negative is better than nonnegative
    pval_neg_best = 1.0 - numpy.mean(statutils.inverse_quantile(best_lag_nonneg_corrs, best_lag_neg_corrs))

    # Test if nonpositive is better than positive
    pval_nonpos_best = 1.0 - numpy.mean(statutils.inverse_quantile(best_lag_pos_corrs, best_lag_nonpos_corrs))

    if best_lag_neg_corr_med > best_lag_pos_corr_med and best_lag_neg_corr_med > zero_corr_med:
        best_lag = best_lag_neg
        pval_increase = best_lag_neg_pval_increase
        pval_positive = best_lag_neg_pval_positive
    elif best_lag_pos_corr_med > best_lag_neg_corr_med and best_lag_pos_corr_med > zero_corr_med:
        best_lag = best_lag_pos
        pval_increase = best_lag_pos_pval_increase
        pval_positive = best_lag_pos_pval_positive
    else:
        best_lag = 0
        pval_increase = zero_pval_increase
        pval_positive = zero_pval_positive
    
    db.execute(
        'INSERT INTO lagtests VALUES (?,?,?,?,?,?,?)',
        [cname, ename, best_lag, pval_positive, pval_increase, pval_neg_best, pval_nonpos_best]
    )
    
    db.commit()