Esempio n. 1
0
 def test_bca_errorbar_output_simple(self):
     np.random.seed(1234567890)
     results_default = boot.ci(self.data, np.average)
     np.random.seed(1234567890)
     results_errorbar = boot.ci(self.data, np.average, output='errorbar')
     np.testing.assert_array_almost_equal(
         results_errorbar.T,
         abs(np.average(self.data) - results_default)[np.newaxis])
Esempio n. 2
0
 def test_pi_multi_2dout_multialpha(self):
     np.random.seed(1234567890)
     results1 = boot.ci((self.x,self.y), stats.linregress, alpha=(0.1,0.2,0.8,0.9),n_samples=2000,method='pi')
     np.random.seed(1234567890)
     results2 = boot.ci(np.vstack((self.x,self.y)).T, lambda a: stats.linregress(a)[0], alpha=(0.1,0.2,0.8,0.9),n_samples=2000,method='pi')
     np.random.seed(1234567890)
     results3 = boot.ci(np.vstack((self.x,self.y)).T, lambda a: stats.linregress(a)[1], alpha=(0.1,0.2,0.8,0.9),n_samples=2000,method='pi')
     np.testing.assert_array_almost_equal(results1[:,0],results2)
     np.testing.assert_array_almost_equal(results1[:,1],results3)
Esempio n. 3
0
 def test_bca_multi_multialpha(self):
     np.random.seed(1234567890)
     results1 = boot.ci((self.x, self.y),
                        lambda a, b: stats.linregress(a, b)[1],
                        alpha=(0.1, 0.2, 0.8, 0.9),
                        n_samples=1000)
     np.random.seed(1234567890)
     results2 = boot.ci(np.vstack((self.x, self.y)).T,
                        lambda a: stats.linregress(a)[1],
                        alpha=(0.1, 0.2, 0.8, 0.9),
                        n_samples=1000)
     np.testing.assert_array_almost_equal(results1, results2)
Esempio n. 4
0
def main(presentation=False):
    #
    # Recreate STDP curve
    #
    theta = 51

    curveparams = {"tau_pre": 0.01, "tau_post": 0.01, "pulse_rate": 5.0}

    # Iterate over some pulse_delay values
    pulse_delays = np.linspace(-0.1, 0.1, 100)

    omegas = simulate(theta, curveparams, pulse_delays)[::-1]
    omegas /= np.amax(omegas)  # normalize

    sim_curve_x, sim_curve_y = BCMSim.fit_stdp_curve(pulse_delays, omegas)

    sim_stdp = {"x": pulse_delays, "y": omegas, "fit_x": sim_curve_x, "fit_y": sim_curve_y}

    plot_stdp_curves(sim_stdp, exp_stdp, presentation)

    #
    # Recreate frequency effects
    #
    freqparams = {"tau_pre": 0.36, "tau_post": 0.0022, "pulse_delay": 0.00135, "num_pairings": 5}

    theta_low = 197
    theta_high = 216
    pulse_rates = [1.0, 2.0, 10.0, 20.0, 100.0]

    np.random.seed(6)  # 1, 5

    low_omegas = simulate(theta_low, freqparams, pulse_rates, trials=25, random_start=True)
    low_scale = 0.2 / np.mean(low_omegas[-1])
    low_omegas *= low_scale
    low_conf = ci(low_omegas, axis=1)

    high_omegas = simulate(theta_high, freqparams, pulse_rates, trials=25, random_start=True)
    high_scale = 0.2 / np.mean(high_omegas[-1])
    high_omegas *= high_scale
    high_conf = ci(high_omegas, axis=1)

    sim_freq_data = {
        "pulse_rates": pulse_rates,
        "low_l": low_conf[0],
        "low_m": np.mean(low_omegas, axis=1),
        "low_h": low_conf[1],
        "high_l": high_conf[0],
        "high_m": np.mean(high_omegas, axis=1),
        "high_h": high_conf[1],
    }

    plot_frequencies(sim_freq_data, exp_freq_data, presentation)
Esempio n. 5
0
 def test_bca_n_samples(self):
     np.random.seed(1234567890)
     results = boot.ci(self.data,
                       np.average,
                       alpha=(0.1, 0.2, 0.8, 0.9),
                       n_samples=500)
     np.testing.assert_array_almost_equal(
         results, np.array([0.40027628, 0.5063184, 0.94082515, 1.05653929]))
 def get_ci(self, vals):
     if len(set(vals)) == 1:
         return (vals[0], vals[0])
     # In case bootstrap.py is missing or not working:
     # loc = np.mean(vals)
     # scale = np.std(vals) / np.sqrt(len(vals))
     # return stats.t.interval(0.95, len(vals)-1, loc=loc, scale=scale)        
     return bootstrap.ci(vals, method='bca')
Esempio n. 7
0
 def get_ci(self, vals):
     if len(set(vals)) == 1:
         return (vals[0], vals[0])
     # In case bootstrap.py is missing or not working:
     # loc = np.mean(vals)
     # scale = np.std(vals) / np.sqrt(len(vals))
     # return stats.t.interval(0.95, len(vals)-1, loc=loc, scale=scale)
     return bootstrap.ci(vals, method='bca')
Esempio n. 8
0
 def test_abc_multialpha_unified(self):
     results = boot.ci(self.data,
                       lambda x, weights: np.average(x, weights=weights),
                       alpha=(0.1, 0.2, 0.8, 0.9),
                       method='abc')
     np.testing.assert_array_almost_equal(
         results, np.array([0.39472915, 0.51161304, 0.93789723,
                            1.04407254]))
Esempio n. 9
0
 def test_pi_multialpha(self):
     np.random.seed(1234567890)
     results = boot.ci(self.data,
                       np.average,
                       method='pi',
                       alpha=(0.1, 0.2, 0.8, 0.9))
     np.testing.assert_array_almost_equal(
         results, np.array([0.40351601, 0.51723236, 0.94547054,
                            1.05749207]))
Esempio n. 10
0
def get_sparsity(bcm_files):
    tr = []
    for bcm_f in bcm_files:
        t, _, transform = get_data(bcm_f)
        tr.append(sparsity_v(transform))
    tr = np.vstack(tr)
    tr_m = np.mean(tr, axis=0)
    tr_lh = ci(tr, axis=0)
    return t, (tr_lh[0], tr_m, tr_lh[1])
Esempio n. 11
0
def get_mse(control_files, other_files):
    se = []
    for c_f, o_f in zip(control_files, other_files):
        time, control, _ = get_data(c_f)
        time, other, _ = get_data(o_f)
        se.append(np.sum((control - other) ** 2, axis=0))
    se = np.vstack(se)
    mean = np.mean(se, axis=0)
    conf = ci(se, axis=0)
    return time, conf[0], mean, conf[1]
Esempio n. 12
0
def calc_bootstrap(data):
    # --- >>> START stats <<< ---
    # Calculate the bootstrap
    CIs = bootstrap.ci(data=data, statfunction=sp.mean)
    # --- >>> STOP stats <<< ---
    
    # Print the data: the "*" turns the array CIs into a list
    print(('The conficence intervals for the mean are: {0} - {1}'.format(*CIs)))
    
    return CIs
Esempio n. 13
0
def calc_bootstrap(data):
    # --- >>> START stats <<< ---
    # Calculate the bootstrap
    CIs = bootstrap.ci(data=data, statfunction=sp.mean)
    # --- >>> STOP stats <<< ---

    # Print the data: the "*" turns the array CIs into a list
    print(
        ('The conficence intervals for the mean are: {0} - {1}'.format(*CIs)))

    return CIs
Esempio n. 14
0
 def test_pi_multi_2dout_multialpha(self):
     np.random.seed(1234567890)
     results1 = boot.ci((self.x, self.y),
                        stats.linregress,
                        alpha=(0.1, 0.2, 0.8, 0.9),
                        n_samples=2000,
                        method='pi')
     np.random.seed(1234567890)
     results2 = boot.ci(np.vstack((self.x, self.y)).T,
                        lambda a: stats.linregress(a)[0],
                        alpha=(0.1, 0.2, 0.8, 0.9),
                        n_samples=2000,
                        method='pi')
     np.random.seed(1234567890)
     results3 = boot.ci(np.vstack((self.x, self.y)).T,
                        lambda a: stats.linregress(a)[1],
                        alpha=(0.1, 0.2, 0.8, 0.9),
                        n_samples=2000,
                        method='pi')
     np.testing.assert_array_almost_equal(results1[:, 0], results2)
     np.testing.assert_array_almost_equal(results1[:, 1], results3)
Esempio n. 15
0
 def test_bca_errorbar_output_simple(self):
     np.random.seed(1234567890)
     results_default = boot.ci(self.data,np.average)
     np.random.seed(1234567890)
     results_errorbar = boot.ci(self.data,np.average,output='errorbar')
     np.testing.assert_array_almost_equal(results_errorbar.T,abs(np.average(self.data)-results_default)[np.newaxis])
Esempio n. 16
0
 def test_bca_simple(self):
     np.random.seed(1234567890)
     results = boot.ci(self.data,np.average)
     np.testing.assert_array_almost_equal(results,np.array([ 0.20907826,  1.19877862]))
Esempio n. 17
0
 def test_pi_multialpha(self):
     np.random.seed(1234567890)
     results = boot.ci(self.data,np.average,method='pi',alpha=(0.1,0.2,0.8,0.9))
     np.testing.assert_array_almost_equal(results,np.array([ 0.40351601,  0.51723236,  0.94547054,  1.05749207]))
Esempio n. 18
0
 def test_abc_multialpha_unified(self):
     results = boot.ci(self.data,lambda x,weights: np.average(x,weights=weights),alpha=(0.1,0.2,0.8,0.9),method='abc')
     np.testing.assert_array_almost_equal(results,np.array([ 0.39472915,  0.51161304,  0.93789723,  1.04407254]))
Esempio n. 19
0
 def test_pi_pandas_series(self):
     np.random.seed(1234567890)
     results = boot.ci(self.pds,np.average,method='pi')
     np.testing.assert_array_almost_equal(results,np.array([ 0.2288689 ,  1.21259752]))
Esempio n. 20
0
 def test_bca_pandas_series(self):
     np.random.seed(1234567890)
     results = boot.ci(self.pds, np.average)
     np.testing.assert_array_almost_equal(
         results, np.array([0.20907826, 1.19877862]))
Esempio n. 21
0
def run(std=True, domain=''):
    entities = os.listdir(properties.evaluation_dir)

    _random, bayes_random, bayes_no_variation, bayes_variation, siddharthan, deemter = get_values(entities, domain)

    general_random = {'accuracy':[], 'string':[], 'jaccard':[]}
    general_bayes_random = {'accuracy':[], 'string':[], 'jaccard':[]}
    general_bayes_no_variation = {'accuracy':[], 'string':[], 'jaccard':[]}
    general_bayes_variation = {'accuracy':[], 'string':[], 'jaccard':[]}
    general_siddharthan = {'accuracy':[], 'string':[], 'jaccard':[]}
    general_deemter = {'accuracy':[], 'string':[], 'jaccard':[]}
    number_samples, number_samples1 = 0, 0

    for fold in bayes_random:
        general_random['accuracy'].append(accuracy_score(_random[fold]['y_real'], _random[fold]['y_pred']))
        general_random['string'].append(np.mean(_random[fold]['string']))
        general_random['jaccard'].append(np.mean(_random[fold]['jaccard']))

        general_siddharthan['accuracy'].append(accuracy_score(siddharthan[fold]['y_real'], siddharthan[fold]['y_pred']))
        general_siddharthan['string'].append(np.mean(siddharthan[fold]['string']))
        general_siddharthan['jaccard'].append(np.mean(siddharthan[fold]['jaccard']))

        general_deemter['accuracy'].append(accuracy_score(deemter[fold]['y_real'], deemter[fold]['y_pred']))
        general_deemter['string'].append(np.mean(deemter[fold]['string']))
        general_deemter['jaccard'].append(np.mean(deemter[fold]['jaccard']))

        general_bayes_random['accuracy'].append(accuracy_score(bayes_random[fold]['y_real'], bayes_random[fold]['y_pred']))
        general_bayes_random['string'].append(np.mean(bayes_random[fold]['string']))
        general_bayes_random['jaccard'].append(np.mean(bayes_random[fold]['jaccard']))

        general_bayes_no_variation['accuracy'].append(accuracy_score(bayes_no_variation[fold]['y_real'], bayes_no_variation[fold]['y_pred']))
        general_bayes_no_variation['string'].append(np.mean(bayes_no_variation[fold]['string']))
        general_bayes_no_variation['jaccard'].append(np.mean(bayes_no_variation[fold]['jaccard']))

        general_bayes_variation['accuracy'].append(accuracy_score(bayes_variation[fold]['y_real'], bayes_variation[fold]['y_pred']))
        general_bayes_variation['string'].append(np.mean(bayes_variation[fold]['string']))
        general_bayes_variation['jaccard'].append(np.mean(bayes_variation[fold]['jaccard']))
        number_samples += len(bayes_no_variation[fold]['string'])
        number_samples1 += len(siddharthan[fold]['string'])

        if std and domain == '':
            print 'Fold', fold
            print 'Labels: '
            print 'Random: ', accuracy_score(_random[fold]['y_real'], _random[fold]['y_pred'])
            print 'Siddharthan: ', accuracy_score(siddharthan[fold]['y_real'], siddharthan[fold]['y_pred'])
            print 'Deemter: ', accuracy_score(deemter[fold]['y_real'], deemter[fold]['y_pred'])
            print 'Bayes Random: ', accuracy_score(bayes_random[fold]['y_real'], bayes_random[fold]['y_pred'])
            print 'Bayes No Variation: ', accuracy_score(bayes_no_variation[fold]['y_real'], bayes_no_variation[fold]['y_pred'])
            print 'Bayes Variation: ', accuracy_score(bayes_variation[fold]['y_real'], bayes_variation[fold]['y_pred'])
            print 20 * '-'
            print 'String Distance: '
            print 'Random: ', np.mean(_random[fold]['string'])
            print 'Siddharthan: ', np.mean(siddharthan[fold]['string'])
            print 'Deemter: ', np.mean(deemter[fold]['string'])
            print 'Bayes Random: ', np.mean(bayes_random[fold]['string'])
            print 'Bayes No Variation: ', np.mean(bayes_no_variation[fold]['string'])
            print 'Bayes Variation: ', np.mean(bayes_variation[fold]['string'])
            print 20 * '-'
            print 'Jaccard Distance: '
            print 'Random: ', np.mean(_random[fold]['jaccard'])
            print 'Siddharthan: ', np.mean(siddharthan[fold]['jaccard'])
            print 'Deemter: ', np.mean(deemter[fold]['jaccard'])
            print 'Bayes Random: ', np.mean(bayes_random[fold]['jaccard'])
            print 'Bayes No Variation: ', np.mean(bayes_no_variation[fold]['jaccard'])
            print 'Bayes Variation: ', np.mean(bayes_variation[fold]['jaccard'])
            print 20 * '-'
            print '\n'

    if std:
        print 'GENERAL', domain
        print 'Labels: '
        print 'Random: ', np.mean(general_random['accuracy'])
        print 'Siddharthan: ', np.mean(general_siddharthan['accuracy'])
        print 'Deemter: ', np.mean(general_deemter['accuracy'])
        print 'Bayes Random: ', np.mean(general_bayes_random['accuracy'])
        print 'Bayes No Variation: ', np.mean(general_bayes_no_variation['accuracy'])
        print 'Bayes Variation: ', np.mean(general_bayes_variation['accuracy'])
        print 20 * '-'
        print 'String Distance: '
        print 'Random: ', mean_confidence_interval(general_random['string']), bootstrap.ci(general_random['string'])
        print 'Siddharthan: ', mean_confidence_interval(general_siddharthan['string']), bootstrap.ci(general_siddharthan['string'])
        print 'Deemter: ', mean_confidence_interval(general_deemter['string']), bootstrap.ci(general_deemter['string'])
        print 'Bayes Random: ', mean_confidence_interval(general_bayes_random['string']), bootstrap.ci(general_bayes_random['string'])
        print 'Bayes No Variation: ', mean_confidence_interval(general_bayes_no_variation['string']), bootstrap.ci(general_bayes_no_variation['string'])
        print 'Bayes Variation: ', mean_confidence_interval(general_bayes_variation['string']), bootstrap.ci(general_bayes_variation['string'])
        print 20 * '-'
        print 'Jaccard Distance: '
        print 'Random: ', mean_confidence_interval(general_random['jaccard']), bootstrap.ci(general_random['jaccard'])
        print 'Siddharthan: ', mean_confidence_interval(general_siddharthan['jaccard']), bootstrap.ci(general_siddharthan['jaccard'])
        print 'Deemter: ', mean_confidence_interval(general_deemter['jaccard']), bootstrap.ci(general_deemter['jaccard'])
        print 'Bayes Random: ', mean_confidence_interval(general_bayes_random['jaccard']), bootstrap.ci(general_bayes_random['jaccard'])
        print 'Bayes No Variation: ', mean_confidence_interval(general_bayes_no_variation['jaccard']), bootstrap.ci(general_bayes_no_variation['jaccard'])
        print 'Bayes Variation: ', mean_confidence_interval(general_bayes_variation['jaccard']), bootstrap.ci(general_bayes_variation['jaccard'])
        print 20 * '-'
        print '\n'

        print 'String -> T-test: Random X PN-Variation'
        t, p = stats.ttest_ind(general_random['string'], general_bayes_no_variation['string'])
        print round(t, 6), p
        t, p = stats.ttest_rel(general_random['string'], general_bayes_no_variation['string'])
        print round(t, 6), p
        print 10 * '-'
        print 'String -> T-test: Random X PN+Variation'
        t, p = stats.ttest_ind(general_random['string'], general_bayes_variation['string'])
        print round(t, 6), p
        t, p = stats.ttest_rel(general_random['string'], general_bayes_variation['string'])
        print round(t, 6), p
        print 10 * '-'

        print 'String -> T-test: Siddarthan X PN-Variation'
        t, p = stats.ttest_ind(general_siddharthan['string'], general_bayes_no_variation['string'])
        print round(t, 6), p
        t, p = stats.ttest_rel(general_siddharthan['string'], general_bayes_no_variation['string'])
        print round(t, 6), p
        print 10 * '-'
        print 'String -> T-test: Siddarthan X PN+Variation'
        t, p = stats.ttest_ind(general_siddharthan['string'], general_bayes_variation['string'])
        print round(t, 6), p
        t, p = stats.ttest_rel(general_siddharthan['string'], general_bayes_variation['string'])
        print round(t, 6), p
        print 10 * '-'

        print 'String -> T-test: Deemter X PN-Variation'
        t, p = stats.ttest_ind(general_deemter['string'], general_bayes_no_variation['string'])
        print round(t, 6), p
        t, p = stats.ttest_rel(general_deemter['string'], general_bayes_no_variation['string'])
        print round(t, 6), p
        print 10 * '-'
        print 'String -> T-test: Deemter X PN+Variation'
        t, p = stats.ttest_ind(general_deemter['string'], general_bayes_variation['string'])
        print round(t, 6), p
        t, p = stats.ttest_rel(general_deemter['string'], general_bayes_variation['string'])
        print round(t, 6), p
        print 10 * '-'

        print 'String -> T-test: PN-Variation X PN+Variation'
        t, p = stats.ttest_ind(general_bayes_no_variation['string'], general_bayes_variation['string'])
        print round(t, 6), p
        t, p = stats.ttest_rel(general_bayes_no_variation['string'], general_bayes_variation['string'])
        print round(t, 6), p
        print 10 * '-'

        print 'String -> One way ANOVA'
        t, p = stats.f_oneway(general_random['string'], general_siddharthan['string'], general_deemter['string'], general_bayes_no_variation['string'], general_bayes_variation['string'])
        print round(t, 6), p
        print 10 * '-'

        print 'Number of samples: ', number_samples, number_samples1

    write_dir = '/roaming/tcastrof/names/eacl/evaluation'
    write_csv(general_random, general_siddharthan, general_deemter, general_bayes_no_variation, general_bayes_variation, write_dir, domain)
Esempio n. 22
0
    def calculate_similarity(self, quartile=False):
        """
        Mode is either ibm or Word2Vec
        """
        print "calculating similarities"
        PATH = os.path.dirname(os.path.abspath(__file__))
        benchmarks = os.listdir(PATH+"/word-sim-data/")
        mode = self.mode
        vocab = self.vocab
        word_vectors = self.word_vectors
        rhos = []
        p_values = []
        num_pairs = []
        reports = ''
        cis = []
        if quartile != False:
            conc = self.conc
        counter = 0
        print "Using benchmarks:", benchmarks
        print "Number of benchmarks:", len(benchmarks)

        for name in benchmarks:
            counter += 1
            print "At benchmark:", name
            print "Remaining:", len(benchmarks)-counter
            benchmark = pd.read_csv(PATH+"/word-sim-data/"+name, sep='\t', header=None)
            a = zip(benchmark[benchmark.columns[0]], benchmark[benchmark.columns[1]])
            benchmark =  dict(zip(a, benchmark[benchmark.columns[2]]))
            wordpairs = [x for x in benchmark.keys() if x[0] in vocab and x[1] in vocab] 

            if quartile == 'lower':
                sorted_wordpairs = sorted([(x,y,conc[x]*conc[y]) for x, y in wordpairs if x in conc 
                                          and y in conc and x != y], key=lambda x: x[2])
                wordpairs = [(x, y) for x,y,z in sorted_wordpairs[:int(len(sorted_wordpairs)*0.5)]]

            elif quartile == 'upper':
                sorted_wordpairs= sorted([(x,y,conc[x]*conc[y]) for x, y in wordpairs if x in conc 
                                          and y in conc and x != y], key=lambda x: x[2])
                wordpairs = [(x, y) for x,y,z in sorted_wordpairs[int(len(sorted_wordpairs)*0.5):]]



            overlap = 0
            orig_sim = []
            predicted_sim = []
            count = 0
            for i in wordpairs:
                count+=1
                word1 = i[0]
                word2 = i[1]
                if word1 in vocab and word2 in vocab:
                    orig_sim.append(benchmark[i])
                    sim = self.similarity(word_vectors[word1], word_vectors[word2])
                    predicted_sim.append(sim)
                if quartile != False:
                    reports += ' '.join([word1, word2, str(conc[word1]), str(conc[word2]), 
                                   str(benchmark[i]), str(sim), name])+'\n'


            num_pairs.append(len(wordpairs))
            corr = spearmanr(orig_sim, predicted_sim)
            CIs = bootstrap.ci(data=(orig_sim, predicted_sim), statfunction=spearmanr, method='pi')  
            performance_record = dict(zip(wordpairs, zip(orig_sim, predicted_sim)))
            print "Bootstrapped 95% confidence intervals\n, ", CIs[:, 0] 
            
            try:
                rhos.append(round(corr[0], 3))
                p_values.append(round(corr[1], 3))
                cis.append(CIs[:, 0])
            except:
                rhos.append('-')
                p_values.append('-')
                cis.append('-')

        benchmarks = map(lambda x: x.replace('.txt', '').replace('EN-', ''), benchmarks)
        return benchmarks, p_values, rhos, num_pairs, cis
Esempio n. 23
0
def get_ci(vals):
    """Bootstrapped 95% confidence intervals."""
    return bootstrap.ci(vals, method='bca')
Esempio n. 24
0
def run(std=True, domain=''):
    entities = os.listdir(properties.evaluation_dir)

    _random, bayes_random, bayes_no_variation, bayes_variation, siddharthan, deemter = get_values(
        entities, domain)

    general_random = {'accuracy': [], 'string': [], 'jaccard': []}
    general_bayes_random = {'accuracy': [], 'string': [], 'jaccard': []}
    general_bayes_no_variation = {'accuracy': [], 'string': [], 'jaccard': []}
    general_bayes_variation = {'accuracy': [], 'string': [], 'jaccard': []}
    general_siddharthan = {'accuracy': [], 'string': [], 'jaccard': []}
    general_deemter = {'accuracy': [], 'string': [], 'jaccard': []}
    number_samples, number_samples1 = 0, 0

    for fold in bayes_random:
        general_random['accuracy'].append(
            accuracy_score(_random[fold]['y_real'], _random[fold]['y_pred']))
        general_random['string'].append(np.mean(_random[fold]['string']))
        general_random['jaccard'].append(np.mean(_random[fold]['jaccard']))

        general_siddharthan['accuracy'].append(
            accuracy_score(siddharthan[fold]['y_real'],
                           siddharthan[fold]['y_pred']))
        general_siddharthan['string'].append(
            np.mean(siddharthan[fold]['string']))
        general_siddharthan['jaccard'].append(
            np.mean(siddharthan[fold]['jaccard']))

        general_deemter['accuracy'].append(
            accuracy_score(deemter[fold]['y_real'], deemter[fold]['y_pred']))
        general_deemter['string'].append(np.mean(deemter[fold]['string']))
        general_deemter['jaccard'].append(np.mean(deemter[fold]['jaccard']))

        general_bayes_random['accuracy'].append(
            accuracy_score(bayes_random[fold]['y_real'],
                           bayes_random[fold]['y_pred']))
        general_bayes_random['string'].append(
            np.mean(bayes_random[fold]['string']))
        general_bayes_random['jaccard'].append(
            np.mean(bayes_random[fold]['jaccard']))

        general_bayes_no_variation['accuracy'].append(
            accuracy_score(bayes_no_variation[fold]['y_real'],
                           bayes_no_variation[fold]['y_pred']))
        general_bayes_no_variation['string'].append(
            np.mean(bayes_no_variation[fold]['string']))
        general_bayes_no_variation['jaccard'].append(
            np.mean(bayes_no_variation[fold]['jaccard']))

        general_bayes_variation['accuracy'].append(
            accuracy_score(bayes_variation[fold]['y_real'],
                           bayes_variation[fold]['y_pred']))
        general_bayes_variation['string'].append(
            np.mean(bayes_variation[fold]['string']))
        general_bayes_variation['jaccard'].append(
            np.mean(bayes_variation[fold]['jaccard']))
        number_samples += len(bayes_no_variation[fold]['string'])
        number_samples1 += len(siddharthan[fold]['string'])

        if std and domain == '':
            print 'Fold', fold
            print 'Labels: '
            print 'Random: ', accuracy_score(_random[fold]['y_real'],
                                             _random[fold]['y_pred'])
            print 'Siddharthan: ', accuracy_score(siddharthan[fold]['y_real'],
                                                  siddharthan[fold]['y_pred'])
            print 'Deemter: ', accuracy_score(deemter[fold]['y_real'],
                                              deemter[fold]['y_pred'])
            print 'Bayes Random: ', accuracy_score(
                bayes_random[fold]['y_real'], bayes_random[fold]['y_pred'])
            print 'Bayes No Variation: ', accuracy_score(
                bayes_no_variation[fold]['y_real'],
                bayes_no_variation[fold]['y_pred'])
            print 'Bayes Variation: ', accuracy_score(
                bayes_variation[fold]['y_real'],
                bayes_variation[fold]['y_pred'])
            print 20 * '-'
            print 'String Distance: '
            print 'Random: ', np.mean(_random[fold]['string'])
            print 'Siddharthan: ', np.mean(siddharthan[fold]['string'])
            print 'Deemter: ', np.mean(deemter[fold]['string'])
            print 'Bayes Random: ', np.mean(bayes_random[fold]['string'])
            print 'Bayes No Variation: ', np.mean(
                bayes_no_variation[fold]['string'])
            print 'Bayes Variation: ', np.mean(bayes_variation[fold]['string'])
            print 20 * '-'
            print 'Jaccard Distance: '
            print 'Random: ', np.mean(_random[fold]['jaccard'])
            print 'Siddharthan: ', np.mean(siddharthan[fold]['jaccard'])
            print 'Deemter: ', np.mean(deemter[fold]['jaccard'])
            print 'Bayes Random: ', np.mean(bayes_random[fold]['jaccard'])
            print 'Bayes No Variation: ', np.mean(
                bayes_no_variation[fold]['jaccard'])
            print 'Bayes Variation: ', np.mean(
                bayes_variation[fold]['jaccard'])
            print 20 * '-'
            print '\n'

    if std:
        print 'GENERAL', domain
        print 'Labels: '
        print 'Random: ', np.mean(general_random['accuracy'])
        print 'Siddharthan: ', np.mean(general_siddharthan['accuracy'])
        print 'Deemter: ', np.mean(general_deemter['accuracy'])
        print 'Bayes Random: ', np.mean(general_bayes_random['accuracy'])
        print 'Bayes No Variation: ', np.mean(
            general_bayes_no_variation['accuracy'])
        print 'Bayes Variation: ', np.mean(general_bayes_variation['accuracy'])
        print 20 * '-'
        print 'String Distance: '
        print 'Random: ', mean_confidence_interval(
            general_random['string']), bootstrap.ci(general_random['string'])
        print 'Siddharthan: ', mean_confidence_interval(
            general_siddharthan['string']), bootstrap.ci(
                general_siddharthan['string'])
        print 'Deemter: ', mean_confidence_interval(
            general_deemter['string']), bootstrap.ci(general_deemter['string'])
        print 'Bayes Random: ', mean_confidence_interval(
            general_bayes_random['string']), bootstrap.ci(
                general_bayes_random['string'])
        print 'Bayes No Variation: ', mean_confidence_interval(
            general_bayes_no_variation['string']), bootstrap.ci(
                general_bayes_no_variation['string'])
        print 'Bayes Variation: ', mean_confidence_interval(
            general_bayes_variation['string']), bootstrap.ci(
                general_bayes_variation['string'])
        print 20 * '-'
        print 'Jaccard Distance: '
        print 'Random: ', mean_confidence_interval(
            general_random['jaccard']), bootstrap.ci(general_random['jaccard'])
        print 'Siddharthan: ', mean_confidence_interval(
            general_siddharthan['jaccard']), bootstrap.ci(
                general_siddharthan['jaccard'])
        print 'Deemter: ', mean_confidence_interval(
            general_deemter['jaccard']), bootstrap.ci(
                general_deemter['jaccard'])
        print 'Bayes Random: ', mean_confidence_interval(
            general_bayes_random['jaccard']), bootstrap.ci(
                general_bayes_random['jaccard'])
        print 'Bayes No Variation: ', mean_confidence_interval(
            general_bayes_no_variation['jaccard']), bootstrap.ci(
                general_bayes_no_variation['jaccard'])
        print 'Bayes Variation: ', mean_confidence_interval(
            general_bayes_variation['jaccard']), bootstrap.ci(
                general_bayes_variation['jaccard'])
        print 20 * '-'
        print '\n'

        print 'String -> T-test: Random X PN-Variation'
        t, p = stats.ttest_ind(general_random['string'],
                               general_bayes_no_variation['string'])
        print round(t, 6), p
        t, p = stats.ttest_rel(general_random['string'],
                               general_bayes_no_variation['string'])
        print round(t, 6), p
        print 10 * '-'
        print 'String -> T-test: Random X PN+Variation'
        t, p = stats.ttest_ind(general_random['string'],
                               general_bayes_variation['string'])
        print round(t, 6), p
        t, p = stats.ttest_rel(general_random['string'],
                               general_bayes_variation['string'])
        print round(t, 6), p
        print 10 * '-'

        print 'String -> T-test: Siddarthan X PN-Variation'
        t, p = stats.ttest_ind(general_siddharthan['string'],
                               general_bayes_no_variation['string'])
        print round(t, 6), p
        t, p = stats.ttest_rel(general_siddharthan['string'],
                               general_bayes_no_variation['string'])
        print round(t, 6), p
        print 10 * '-'
        print 'String -> T-test: Siddarthan X PN+Variation'
        t, p = stats.ttest_ind(general_siddharthan['string'],
                               general_bayes_variation['string'])
        print round(t, 6), p
        t, p = stats.ttest_rel(general_siddharthan['string'],
                               general_bayes_variation['string'])
        print round(t, 6), p
        print 10 * '-'

        print 'String -> T-test: Deemter X PN-Variation'
        t, p = stats.ttest_ind(general_deemter['string'],
                               general_bayes_no_variation['string'])
        print round(t, 6), p
        t, p = stats.ttest_rel(general_deemter['string'],
                               general_bayes_no_variation['string'])
        print round(t, 6), p
        print 10 * '-'
        print 'String -> T-test: Deemter X PN+Variation'
        t, p = stats.ttest_ind(general_deemter['string'],
                               general_bayes_variation['string'])
        print round(t, 6), p
        t, p = stats.ttest_rel(general_deemter['string'],
                               general_bayes_variation['string'])
        print round(t, 6), p
        print 10 * '-'

        print 'String -> T-test: PN-Variation X PN+Variation'
        t, p = stats.ttest_ind(general_bayes_no_variation['string'],
                               general_bayes_variation['string'])
        print round(t, 6), p
        t, p = stats.ttest_rel(general_bayes_no_variation['string'],
                               general_bayes_variation['string'])
        print round(t, 6), p
        print 10 * '-'

        print 'String -> One way ANOVA'
        t, p = stats.f_oneway(general_random['string'],
                              general_siddharthan['string'],
                              general_deemter['string'],
                              general_bayes_no_variation['string'],
                              general_bayes_variation['string'])
        print round(t, 6), p
        print 10 * '-'

        print 'Number of samples: ', number_samples, number_samples1

    write_dir = '/roaming/tcastrof/names/eacl/evaluation'
    write_csv(general_random, general_siddharthan, general_deemter,
              general_bayes_no_variation, general_bayes_variation, write_dir,
              domain)
Esempio n. 25
0
 def test_bca_multialpha(self):
     np.random.seed(1234567890)
     results = boot.ci(self.data, np.average, alpha=(0.1, 0.2, 0.8, 0.9))
     np.testing.assert_array_almost_equal(
         results, np.array([0.39210727, 0.50775386, 0.93673299, 1.0476729]))
Esempio n. 26
0
 def test_bca_multialpha(self):
     np.random.seed(1234567890)
     results = boot.ci(self.data,np.average,alpha=(0.1,0.2,0.8,0.9))
     np.testing.assert_array_almost_equal(results,np.array([ 0.39210727,  0.50775386,  0.93673299,  1.0476729 ]))
Esempio n. 27
0
def plot_learn_curves(channel_zips, conv_zips, rules=("PES", "hPES"), presentation=False):
    group_by = "learn_type"

    figsize = (8, 6) if presentation else (5, 4)
    if presentation:
        matplotlib.rc("font", size=18)
        ext = "svg" if presentation else "pdf"
    else:
        plt.figure(figsize=figsize)

    for func, zips in zip(("channel", "conv"), (channel_zips, conv_zips)):
        if presentation:
            plt.figure(figsize=figsize)
        filenames = {"PES": [], "hPES": [], "control": []}

        # Group files by group_by
        for zfn in zips:
            zfn_b = os.path.basename("%s" % zfn)
            with zipfile.ZipFile("%s.zip" % zfn) as zf:
                for l in zf.open("%s.txt" % zfn_b):
                    if group_by in l:
                        group = l.split("=")[1].strip()
                        filenames[group].append(zfn)

        # Get the control results
        control = []
        for zfn in filenames["control"]:
            with zipfile.ZipFile("%s.zip" % zfn) as zf:
                zfn_b = os.path.basename("%s" % zfn)
                with zf.open("%s.csv" % zfn_b) as fp:
                    time, error = get_data(fp, func, "full")
                control.append(error)
        control = np.vstack(control)

        # Just take the mean for the control
        control = np.mean(control, axis=0)

        # Get the non-control results
        colors = ("b", "g")
        for rule, color in zip(rules, colors):
            err = []
            for zfn in filenames[rule]:
                zfn_b = os.path.basename("%s" % zfn)
                with zipfile.ZipFile("%s.zip" % zfn) as zf:
                    with zf.open("%s.csv" % zfn_b) as fp:
                        time, error = get_data(fp, func, "full")
                    err.append(error)
            err = np.vstack(err) / control
            mean = np.mean(err, axis=0)
            conf = ci(err, axis=0)

            if func == "channel":
                if not presentation:
                    plt.subplot(122)
                plt.title("Learning transmission")
                plt.ylim(-0.4, 9)
                plt.gca().yaxis.tick_right()
            elif func == "conv":
                if not presentation:
                    plt.subplot(121)
                plt.title("Learning binding")
                plt.ylabel("Error relative to control mean")
                if not presentation:
                    plt.xticks(np.arange(0, 80, 20))
                    plt.xlim((0, 80))
                plt.ylim(0.9, 1.6)
                plt.gca().yaxis.set_ticks_position("left")
                plt.gca().spines["right"].set_visible(False)

            plt.gca().spines["top"].set_visible(False)
            plt.gca().xaxis.set_ticks_position("bottom")

            plt.xlabel("Learning time (seconds)")
            plt.axhline(1.0, lw=1, color="0.3")
            plt.fill_between(time, y1=conf[1], y2=conf[0], color=color, alpha=0.3)
            if presentation:
                rule = "Combined, $S$=0.73" if rule == "hPES" else "Supervised, $S$=1"
            plt.plot(time, mean, color=color, lw=1, label=rule)
            if len(rules) > 1:
                if presentation:
                    plt.legend(prop={"size": 16})
                else:
                    plt.legend(prop={"size": 12})

        if presentation:
            plt.tight_layout()
            name = func + "-learncurve" if len(rules) == 2 else func + "-learncurve-pes"
            plt.savefig("%s/%s.%s" % (figuredir, name, ext), transparent=True)
            print "Saved %s.%s" % (name, ext)

    if not presentation:
        plt.tight_layout()
        plt.subplots_adjust(wspace=0)
        name = "fig4-learn-curves" if len(rules) == 2 else "learncurve-pes"
        plt.savefig("%s/%s.%s" % (figuredir, name, ext), transparent=True)
        print "Saved fig4-learn-curves.%s" % ext
        plt.close()
Esempio n. 28
0
 def test_bca_multi_multialpha(self):
     np.random.seed(1234567890)
     results1 = boot.ci((self.x,self.y), lambda a,b: stats.linregress(a,b)[1], alpha=(0.1,0.2,0.8,0.9),n_samples=1000)
     np.random.seed(1234567890)
     results2 = boot.ci(np.vstack((self.x,self.y)).T, lambda a: stats.linregress(a)[1], alpha=(0.1,0.2,0.8,0.9),n_samples=1000)
     np.testing.assert_array_almost_equal(results1,results2)
Esempio n. 29
0
 def test_bca_n_samples(self):
     np.random.seed(1234567890)
     results = boot.ci(self.data,np.average,alpha=(0.1,0.2,0.8,0.9),n_samples=500)
     np.testing.assert_array_almost_equal(results,np.array([ 0.40027628,  0.5063184 ,  0.94082515,  1.05653929]))
Esempio n. 30
0
'''''''''
tr_COV_C = norm(C_mat_hat,'fro')**2/n
tr_COV_C_neg = norm(C_mat_neg_hat,'fro')**2/n

tr_COV_C # |C|_F^2/n for (X_1, X_2)
tr_COV_C_neg # |C|_F^2/n for (X_1, -X_2)

#Note: specify the global variables r_1, r_2, r_12 and P_mat 
#for the function compute_tr_COV_C(Y_1_t, Y_2_t) stated at the lines 15-21
r_1 = r_1_hat
r_2 = r_2_hat
r_12 = r_12_hat
P_mat = P_mat_hat

np.random.seed(0) # seed for the random number geneator of bootstrap
tr_COV_C_interval = boot.ci((Y_1.T, Y_2.T), statfunction=compute_tr_COV_C, alpha=0.05, n_samples=5000, method='bca')

tr_COV_C_interval[0,0],tr_COV_C_interval[1,0] # the 95% bootstrap CI for tr_COV_C
tr_COV_C_interval[0,1],tr_COV_C_interval[1,1] # the 95% bootstrap CI for tr_COV_C_neg


'''''''''
Choose the C of (X_1, X_2) or that of (X_1, -X_2) by the larger one of tr_COV_C and tr_COV_C_neg.
See Remark 2 in the paper for details
'''''''''

#The common-pattern matrix C rescaled with the magnitude of X_k
C_scaled_1_hat = C_mat_hat * norm(X_1_hat,'fro')/np.sqrt(n)
C_scaled_2_hat = C_mat_hat * norm(X_2_hat,'fro')/np.sqrt(n)

#The distinctive-pattern matrix Delta_k
Esempio n. 31
0
 def test_pi_pandas_series(self):
     np.random.seed(1234567890)
     results = boot.ci(self.pds, np.average, method='pi')
     np.testing.assert_array_almost_equal(results,
                                          np.array([0.2288689, 1.21259752]))