コード例 #1
0
def run_analysis_max_univariate_prediction(cname, cause, ename, effect,
                                           theiler_window):
    max_corr = float('-inf')
    max_corr_Etau = None
    for E in SWEEP_EMBEDDING_DIMENSIONS:
        for tau in SWEEP_DELAYS:
            delays = tuple(range(0, E * tau, tau))
            embedding = pyembedding.Embedding(effect[:-1], delays)
            if embedding.delay_vector_count < embedding.embedding_dimension + 2:
                sys.stderr.write(
                    '  Lmax < Lmin; skipping E={}, tau={}\n'.format(E, tau))
                continue

            eff_off, eff_off_pred = embedding.simplex_predict_using_embedding(
                embedding, effect[1:], theiler_window=theiler_window)
            corr = numpy.corrcoef(eff_off, eff_off_pred)[0, 1]

            db.execute(
                'CREATE TABLE IF NOT EXISTS univariate_predictions (variable, delays, correlation)'
            )
            db.execute('INSERT INTO univariate_predictions VALUES (?,?,?)',
                       [ename, str(delays), corr])

            sys.stderr.write('  corr for E={}, tau={} : {}\n'.format(
                E, tau, corr))
            if corr > max_corr:
                max_corr = corr
                max_corr_Etau = (E, tau)
    E, tau = max_corr_Etau
    delays = tuple(range(0, E * tau, tau))
    max_corr_emb = pyembedding.Embedding(effect, delays)
    sys.stderr.write('  Using E = {}, tau = {}\n'.format(*max_corr_Etau))
    run_analysis_for_embedding(cname, cause, ename, effect, max_corr_emb,
                               theiler_window)
コード例 #2
0
def run_analysis_max_univariate_prediction(cname, cause, ename, effect,
                                           theiler_window, db, rng,
                                           ccm_settings):
    if 'delta_tau_termination' in ccm_settings:
        delta_tau_termination = ccm_settings['delta_tau_termination']
    else:
        delta_tau_termination = None

    max_corr = float('-inf')
    max_corr_Etau = None
    for E in ccm_settings['sweep_embedding_dimensions']:
        if delta_tau_termination is not None:
            max_corr_this_E = float('-inf')
            max_corr_tau_this_E = None

        for tau in (ccm_settings['sweep_delays'] if E > 1 else [1]):
            delays = tuple(range(0, E * tau, tau))
            embedding = pyembedding.Embedding(effect[:-1], delays)
            if embedding.delay_vector_count < embedding.embedding_dimension + 2:
                sys.stderr.write(
                    '  Lmax < Lmin; skipping E={}, tau={}\n'.format(E, tau))
                continue

            eff_off, eff_off_pred = embedding.simplex_predict_using_embedding(
                embedding, effect[1:], theiler_window=theiler_window)
            corr = numpy.corrcoef(eff_off, eff_off_pred)[0, 1]

            db.execute(
                'CREATE TABLE IF NOT EXISTS univariate_predictions (variable, delays, correlation)'
            )
            db.execute('INSERT INTO univariate_predictions VALUES (?,?,?)',
                       [ename, str(delays), corr])

            sys.stderr.write('  corr for E={}, tau={} : {}\n'.format(
                E, tau, corr))
            if corr > max_corr:
                max_corr = corr
                max_corr_Etau = (E, tau)

            if delta_tau_termination is not None:
                if corr > max_corr_this_E:
                    max_corr_this_E = corr
                    max_corr_tau_this_E = tau

                if E * (tau - max_corr_tau_this_E) >= delta_tau_termination:
                    sys.stderr.write(
                        '{} taus since a maximum for this E; assuming found.\n'
                        .format(delta_tau_termination))
                    break

    E, tau = max_corr_Etau
    delays = tuple(range(0, E * tau, tau))
    max_corr_emb = pyembedding.Embedding(effect, delays)
    sys.stderr.write('  Using E = {}, tau = {}\n'.format(*max_corr_Etau))
    run_analysis_for_embedding(cname, cause, ename, effect, max_corr_emb,
                               theiler_window,
                               ccm_settings['n_ccm_bootstraps'], db, rng)
コード例 #3
0
def run_analysis_surrogates(cname, cause, ename, effect, delays,
                            theiler_window, n_bootstraps, db, rng):
    #db.execute('CREATE TABLE IF NOT EXISTS surrogate_corrs (cause, effect, use_effect_surrogates, correlation)');
    db.execute(
        'CREATE TABLE IF NOT EXISTS surrogate_test (cause, effect, use_effect_surrogates, corr_raw, corr_surr_q95, pvalue_greater)'
    )
    db.execute(
        'CREATE TABLE IF NOT EXISTS surrogate_dist (cause, effect, use_effect_surrogates, q0, q1, q2_5, q5, q25, q50, q75, q95, q97_5, q99, q100)'
    )

    effect_raw_embedding = pyembedding.Embedding(effect, delays)
    corr_raw = effect_raw_embedding.ccm(
        effect_raw_embedding, cause,
        theiler_window=theiler_window)[0]['correlation']

    # Run with cause surrogates only
    cause_mean, cause_anom = calculate_mean_anomaly(cause, 12)
    corrs_cause_only = []
    for i in range(n_bootstraps):
        cause_surr = randomize_surrogate(cause_mean, cause_anom, rng)
        ccm_result, y_actual, y_pred = effect_raw_embedding.ccm(
            effect_raw_embedding, cause_surr, theiler_window=theiler_window)
        corrs_cause_only.append(ccm_result['correlation'])
        #db.execute('INSERT INTO surrogate_corrs VALUES (?,?,?,?)', [cname, ename, 0, ccm_result['correlation']])
    db.execute('INSERT INTO surrogate_test VALUES (?,?,?,?,?,?)', [
        cname, ename, 0, corr_raw,
        numpy.percentile(corrs_cause_only, 95),
        1.0 - float(statutils.inverse_quantile(corrs_cause_only, corr_raw))
    ])
    db.execute(
        'INSERT INTO surrogate_dist VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?)',
        [cname, ename, 0] + numpy.percentile(
            corrs_cause_only,
            [0, 1, 2.5, 5, 25, 50, 75, 95, 97.5, 99, 100]).tolist())

    # Run with cause and effect surrogates
    effect_mean, effect_anom = calculate_mean_anomaly(effect, 12)
    corrs_cause_and_effect = []
    for i in range(n_bootstraps):
        cause_surr = randomize_surrogate(cause_mean, cause_anom, rng)
        effect_surr = randomize_surrogate(effect_mean, effect_anom, rng)
        effect_surr_embedding = pyembedding.Embedding(effect_surr, delays)
        ccm_result, y_actual, y_pred = effect_surr_embedding.ccm(
            effect_surr_embedding, cause_surr, theiler_window=theiler_window)
        corrs_cause_and_effect.append(ccm_result['correlation'])
        #db.execute('INSERT INTO surrogate_corrs VALUES (?,?,?,?)', [cname, ename, 1, ccm_result['correlation']])
    db.execute('INSERT INTO surrogate_test VALUES (?,?,?,?,?,?)', [
        cname, ename, 1, corr_raw,
        numpy.percentile(corrs_cause_and_effect, 95), 1.0 -
        float(statutils.inverse_quantile(corrs_cause_and_effect, corr_raw))
    ])
    db.execute(
        'INSERT INTO surrogate_dist VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?)',
        [cname, ename, 1] + numpy.percentile(
            corrs_cause_and_effect,
            [0, 1, 2.5, 5, 25, 50, 75, 95, 97.5, 99, 100]).tolist())
コード例 #4
0
def run_analysis_max_ccm_rho(cname, cause, ename, effect, theiler_window, db,
                             rng, ccm_settings):
    max_corr = float('-inf')
    max_corr_emb = None
    max_corr_Etau = None
    for E in ccm_settings['sweep_embedding_dimensions']:
        for tau in (ccm_settings['sweep_delays'] if E > 1 else [1]):
            delays = tuple(range(0, E * tau, tau))
            embedding = pyembedding.Embedding(effect, delays=delays)
            if embedding.delay_vector_count < embedding.embedding_dimension + 2:
                sys.stderr.write(
                    '  Lmax < Lmin; skipping E={}, tau={}\n'.format(E, tau))
                continue

            corr = run_ccm(cname, ename, embedding, cause, theiler_window, db)
            sys.stderr.write('  corr for E={}, tau={} : {}\n'.format(
                E, tau, corr))
            if corr > max_corr:
                max_corr = corr
                max_corr_emb = embedding
                max_corr_Etau = (E, tau)
    sys.stderr.write('  Using E={}, tau = {}\n'.format(*max_corr_Etau))
    run_analysis_for_embedding(cname, cause, ename, effect, max_corr_emb,
                               theiler_window,
                               ccm_settings['n_ccm_bootstraps'], db, rng)
コード例 #5
0
def run_analysis_uniform_sweep(cname, cause, ename, effect, theiler_window, db, rng, ccm_settings):
    for E in ccm_settings['sweep_embedding_dimensions']:
        for tau in (ccm_settings['sweep_delays'] if E > 1 else [1]):
            sys.stderr.write('  Running for E={}, tau={}\n'.format(E, tau))
            delays = tuple(range(0, E*tau, tau))
            embedding = pyembedding.Embedding(effect, delays=delays)
            if embedding.delay_vector_count < embedding.embedding_dimension + 2:
                sys.stderr.write('  Lmax < Lmin; skipping E={}, tau={}\n'.format(E, tau))
                continue

            run_analysis_for_embedding(cname, cause, ename, effect, embedding, theiler_window, ccm_settings['n_ccm_bootstraps'], db, rng)
コード例 #6
0
def run_analysis_uniform_sweep(cname, cause, ename, effect, theiler_window):
    for E in SWEEP_EMBEDDING_DIMENSIONS:
        for tau in (SWEEP_DELAYS if E > 1 else [1]):
            sys.stderr.write('  Running for E={}, tau={}\n'.format(E, tau))
            delays = tuple(range(0, E * tau, tau))
            embedding = pyembedding.Embedding(effect, delays=delays)
            if embedding.delay_vector_count < embedding.embedding_dimension + 2:
                sys.stderr.write(
                    '  Lmax < Lmin; skipping E={}, tau={}\n'.format(E, tau))
                continue

            run_analysis_for_embedding(cname, cause, ename, effect, embedding,
                                       theiler_window)
コード例 #7
0
def run_analysis_max_ccm_rho(cname, cause, ename, effect, theiler_window, db,
                             rng, ccm_settings):
    if 'delta_tau_termination' in ccm_settings:
        delta_tau_termination = ccm_settings['delta_tau_termination']
    else:
        delta_tau_termination = None

    max_corr = float('-inf')
    max_corr_emb = None
    max_corr_Etau = None
    for E in ccm_settings['sweep_embedding_dimensions']:
        if delta_tau_termination is not None:
            max_corr_this_E = float('-inf')
            max_corr_tau_this_E = None

        for tau in (ccm_settings['sweep_delays'] if E > 1 else [1]):
            delays = tuple(range(0, E * tau, tau))
            embedding = pyembedding.Embedding(effect, delays=delays)
            if embedding.delay_vector_count < embedding.embedding_dimension + 2:
                sys.stderr.write(
                    '  Lmax < Lmin; skipping E={}, tau={}\n'.format(E, tau))
                continue

            corr = run_ccm(cname, ename, embedding, cause, theiler_window, db)
            sys.stderr.write('  corr for E={}, tau={} : {}\n'.format(
                E, tau, corr))
            if corr > max_corr:
                max_corr = corr
                max_corr_emb = embedding
                max_corr_Etau = (E, tau)

            if delta_tau_termination is not None:
                if corr > max_corr_this_E:
                    max_corr_this_E = corr
                    max_corr_tau_this_E = tau

                if E * (tau - max_corr_tau_this_E) >= delta_tau_termination:
                    sys.stderr.write(
                        '{} taus since a maximum for this E; assuming found.\n'
                        .format(delta_tau_termination))
                    break
    sys.stderr.write('  Using E={}, tau = {}\n'.format(*max_corr_Etau))
    run_analysis_for_embedding(cname, cause, ename, effect, max_corr_emb,
                               theiler_window,
                               ccm_settings['n_ccm_bootstraps'], db, rng)
コード例 #8
0
def run_analysis_max_ccm_rho(cname, cause, ename, effect, theiler_window):
    max_corr = float('-inf')
    max_corr_emb = None
    max_corr_Etau = None
    for E in SWEEP_EMBEDDING_DIMENSIONS:
        for tau in (SWEEP_DELAYS if E > 1 else [1]):
            delays = tuple(range(0, E * tau, tau))
            embedding = pyembedding.Embedding(effect, delays=delays)
            if embedding.delay_vector_count < embedding.embedding_dimension + 2:
                sys.stderr.write(
                    '  Lmax < Lmin; skipping E={}, tau={}\n'.format(E, tau))
                continue

            corr = run_ccm(cname, ename, embedding, cause, theiler_window)
            sys.stderr.write('  corr for E={}, tau={} : {}\n'.format(
                E, tau, corr))
            if corr > max_corr:
                max_corr = corr
                max_corr_emb = embedding
                max_corr_Etau = (E, tau)
    sys.stderr.write('  Using E={}, tau = {}\n'.format(*max_corr_Etau))
    run_analysis_for_embedding(cname, cause, ename, effect, max_corr_emb,
                               theiler_window)
コード例 #9
0
def run_analysis_max_univariate_prediction_plus_lags(cname, cause, ename,
                                                     effect, theiler_window,
                                                     db, rng, ccm_settings):
    if 'delta_tau_termination' in ccm_settings:
        delta_tau_termination = ccm_settings['delta_tau_termination']
    else:
        delta_tau_termination = None

    max_corr = float('-inf')
    max_corr_Etau = None
    for E in ccm_settings['sweep_embedding_dimensions']:
        if delta_tau_termination is not None:
            max_corr_this_E = float('-inf')
            max_corr_tau_this_E = None

        for tau in (ccm_settings['sweep_delays'] if E > 1 else [1]):
            delays = tuple(range(0, E * tau, tau))
            embedding = pyembedding.Embedding(effect[:-1], delays)
            if embedding.delay_vector_count < embedding.embedding_dimension + 2:
                sys.stderr.write(
                    '  Lmax < Lmin; skipping E={}, tau={}\n'.format(E, tau))
                continue

            eff_off, eff_off_pred = embedding.simplex_predict_using_embedding(
                embedding, effect[1:], theiler_window=theiler_window)
            corr = numpy.corrcoef(eff_off, eff_off_pred)[0, 1]

            db.execute(
                'CREATE TABLE IF NOT EXISTS univariate_predictions (variable, delays, correlation)'
            )
            db.execute('INSERT INTO univariate_predictions VALUES (?,?,?)',
                       [ename, str(delays), corr])

            sys.stderr.write('  corr for E={}, tau={} : {}\n'.format(
                E, tau, corr))
            if corr > max_corr:
                max_corr = corr
                max_corr_Etau = (E, tau)

            if delta_tau_termination is not None:
                if corr > max_corr_this_E:
                    max_corr_this_E = corr
                    max_corr_tau_this_E = tau

                if E * (tau - max_corr_tau_this_E) >= delta_tau_termination:
                    sys.stderr.write(
                        '{} taus since a maximum for this E; assuming found.\n'
                        .format(delta_tau_termination))
                    break

    E, tau = max_corr_Etau

    max_ccm_lag = ccm_settings['max_ccm_lag']

    best_lag_neg = None
    best_lag_neg_corr_med = None
    best_lag_neg_corrs = None
    best_lag_neg_pvalue_increase = None
    best_lag_neg_pvalue_positive = None

    best_lag_pos = None
    best_lag_pos_corr_med = None
    best_lag_pos_corrs = None
    best_lag_pos_pvalue_increase = None
    best_lag_pos_pvalue_positive = None

    zero_corrs = None
    zero_corr_med = None
    zero_pvalue_increase = None
    zero_pvalue_positive = None

    for lag in range(-max_ccm_lag, max_ccm_lag + 1):
        sys.stderr.write('  Using E = {}, tau = {}, lag = {}\n'.format(
            E, tau, lag))

        delays = tuple(range(lag, lag + E * tau, tau))
        max_corr_emb = pyembedding.Embedding(effect, delays)
        corrs, pvalue_increase, pvalue_positive = run_analysis_for_embedding(
            cname, cause, ename, effect, max_corr_emb, theiler_window,
            ccm_settings['n_ccm_bootstraps'], db, rng)
        corr_med = numpy.median(corrs)

        if lag == 0:
            zero_corr_med = corr_med
            zero_corrs = corrs
            zero_pvalue_increase = pvalue_increase
            zero_pvalue_positive = pvalue_positive
        elif lag < 0 and (best_lag_neg is None
                          or corr_med > best_lag_neg_corr_med):
            best_lag_neg = lag
            best_lag_neg_corr_med = corr_med
            best_lag_neg_corrs = corrs
            best_lag_neg_pvalue_increase = pvalue_increase
            best_lag_neg_pvalue_positive = pvalue_positive
        elif lag > 0 and (best_lag_pos is None
                          or corr_med > best_lag_pos_corr_med):
            best_lag_pos = lag
            best_lag_pos_corr_med = corr_med
            best_lag_pos_corrs = corrs
            best_lag_pos_pvalue_increase = pvalue_increase
            best_lag_pos_pvalue_positive = pvalue_positive

    # Get the best negative-or-zero lag
    if best_lag_neg_corr_med > zero_corr_med:
        best_lag_nonpos = best_lag_neg
        best_lag_nonpos_corrs = best_lag_neg_corrs
        best_lag_nonpos_corr_med = best_lag_neg_corr_med
        best_lag_nonpos_pvalue_increase = best_lag_neg_pvalue_increase
        best_lag_nonpos_pvalue_positive = best_lag_neg_pvalue_positive
    else:
        best_lag_nonpos = 0
        best_lag_nonpos_corrs = zero_corrs
        best_lag_nonpos_corr_med = zero_corr_med
        best_lag_nonpos_pvalue_increase = zero_pvalue_increase
        best_lag_nonpos_pvalue_positive = zero_pvalue_positive

    # Get the best positive-or-zero lag
    if best_lag_pos_corr_med > zero_corr_med:
        best_lag_nonneg = best_lag_pos
        best_lag_nonneg_corrs = best_lag_pos_corrs
        best_lag_nonneg_corr_med = best_lag_pos_corr_med
        best_lag_nonneg_pvalue_increase = best_lag_pos_pvalue_increase
        best_lag_nonneg_pvalue_positive = best_lag_pos_pvalue_positive
    else:
        best_lag_nonneg = 0
        best_lag_nonneg_corrs = zero_corrs
        best_lag_nonneg_corr_med = zero_corr_med
        best_lag_nonneg_pvalue_increase = zero_pvalue_increase
        best_lag_nonneg_pvalue_positive = zero_pvalue_positive

    # Test if negative is better than nonnegative
    pvalue_neg_best = 1.0 - numpy.mean(
        statutils.inverse_quantile(best_lag_nonneg_corrs, best_lag_neg_corrs))

    # Test if nonpositive is better than positive
    pvalue_nonpos_best = 1.0 - numpy.mean(
        statutils.inverse_quantile(best_lag_pos_corrs, best_lag_nonpos_corrs))

    db.execute('''CREATE TABLE IF NOT EXISTS ccm_lag_tests (
        cause, effect,
        
        neg_lag,
        neg_corr_med,
        neg_pvalue_positive,
        neg_pvalue_increase,
        neg_pvalue_best,
        
        nonpos_lag,
        nonpos_corr_med,
        nonpos_pvalue_positive,
        nonpos_pvalue_increase,
        nonpos_pvalue_best,
        
        pos_lag,
        pos_corr_med,
        pos_pvalue_positive,
        pos_pvalue_increase,
        
        nonneg_lag,
        nonneg_corr_med,
        nonneg_pvalue_positive,
        nonneg_pvalue_increase
    )''')
    db.execute(
        'INSERT INTO ccm_lag_tests VALUES (?,?, ?,?,?,?,?, ?,?,?,?,?, ?,?,?,?, ?,?,?,?)',
        [
            cname, ename, best_lag_neg, best_lag_neg_corr_med,
            best_lag_neg_pvalue_positive, best_lag_neg_pvalue_increase,
            pvalue_neg_best, best_lag_nonpos, best_lag_nonpos_corr_med,
            best_lag_nonpos_pvalue_positive, best_lag_nonpos_pvalue_increase,
            pvalue_nonpos_best, best_lag_pos, best_lag_pos_corr_med,
            best_lag_pos_pvalue_positive, best_lag_pos_pvalue_increase,
            best_lag_nonneg, best_lag_nonneg_corr_med,
            best_lag_nonneg_pvalue_positive, best_lag_nonneg_pvalue_increase
        ])
コード例 #10
0
def main():
    args = parse_arguments()

    E = args.embedding_dimension
    Emin = args.min_embedding_dimension
    Emax = args.max_embedding_dimension

    tau = args.embedding_lag
    taumin = args.min_embedding_lag
    taumax = args.max_embedding_lag

    dt = args.temporal_separation

    max_lag = args.max_cross_map_lag
    lag_skip = args.cross_map_lag_skip

    n_bootstraps = args.bootstraps

    cores = args.cores

    # Check input & output files
    if args.input_filename == args.output_filename:
        parser.exit('Input filename and output filename cannot be the same.')
    if os.path.exists(args.output_filename):
        if args.overwrite_output:
            os.remove(args.output_filename)
        else:
            sys.stderr.write(
                'Output filename {0} exists. Delete it or use --overwrite-output.\n'
                .format(args.output_filename))
            sys.exit(1)

    # Load input data
    data = load_data(args.input_filename, args.variable, args.table)

    # Set up output data
    db = sqlite3.connect(args.output_filename)

    if args.identification_target == 'self':
        assert args.method == 'uniform'

        if E is None:
            Etau_dict = identify_Etau(db, data, Emin, Emax, taumin, taumax, dt,
                                      cores)
        else:
            Etau_dict = OrderedDict([(var_name, (E, tau))
                                     for var_name in data.keys()])
    else:
        assert args.identification_target == 'cross'
        assert args.method == 'projection'

    for cname, cause in data.iteritems():
        for ename, effect in data.iteritems():
            if cname != ename:
                if args.method == 'uniform':
                    E, tau = Etau_dict[ename]
                    emb = pyembedding.Embedding(effect, range(0, E * tau, tau))

                    if E is None:
                        sys.stderr.write(
                            'Skipping {} as effect\n'.format(ename))
                        continue
                elif args.method == 'projection':
                    emb = identify_projection_cross(db, cause, effect, dt)

                sys.stderr.write('Running {}, {}\n'.format(cname, ename))
                run_analysis(db, cname, cause, ename, effect, emb, dt, max_lag,
                             lag_skip, n_bootstraps, cores)