def test_bca_errorbar_output_simple(self): np.random.seed(1234567890) results_default = boot.ci(self.data, np.average) np.random.seed(1234567890) results_errorbar = boot.ci(self.data, np.average, output='errorbar') np.testing.assert_array_almost_equal( results_errorbar.T, abs(np.average(self.data) - results_default)[np.newaxis])
def test_pi_multi_2dout_multialpha(self): np.random.seed(1234567890) results1 = boot.ci((self.x,self.y), stats.linregress, alpha=(0.1,0.2,0.8,0.9),n_samples=2000,method='pi') np.random.seed(1234567890) results2 = boot.ci(np.vstack((self.x,self.y)).T, lambda a: stats.linregress(a)[0], alpha=(0.1,0.2,0.8,0.9),n_samples=2000,method='pi') np.random.seed(1234567890) results3 = boot.ci(np.vstack((self.x,self.y)).T, lambda a: stats.linregress(a)[1], alpha=(0.1,0.2,0.8,0.9),n_samples=2000,method='pi') np.testing.assert_array_almost_equal(results1[:,0],results2) np.testing.assert_array_almost_equal(results1[:,1],results3)
def test_bca_multi_multialpha(self): np.random.seed(1234567890) results1 = boot.ci((self.x, self.y), lambda a, b: stats.linregress(a, b)[1], alpha=(0.1, 0.2, 0.8, 0.9), n_samples=1000) np.random.seed(1234567890) results2 = boot.ci(np.vstack((self.x, self.y)).T, lambda a: stats.linregress(a)[1], alpha=(0.1, 0.2, 0.8, 0.9), n_samples=1000) np.testing.assert_array_almost_equal(results1, results2)
def main(presentation=False): # # Recreate STDP curve # theta = 51 curveparams = {"tau_pre": 0.01, "tau_post": 0.01, "pulse_rate": 5.0} # Iterate over some pulse_delay values pulse_delays = np.linspace(-0.1, 0.1, 100) omegas = simulate(theta, curveparams, pulse_delays)[::-1] omegas /= np.amax(omegas) # normalize sim_curve_x, sim_curve_y = BCMSim.fit_stdp_curve(pulse_delays, omegas) sim_stdp = {"x": pulse_delays, "y": omegas, "fit_x": sim_curve_x, "fit_y": sim_curve_y} plot_stdp_curves(sim_stdp, exp_stdp, presentation) # # Recreate frequency effects # freqparams = {"tau_pre": 0.36, "tau_post": 0.0022, "pulse_delay": 0.00135, "num_pairings": 5} theta_low = 197 theta_high = 216 pulse_rates = [1.0, 2.0, 10.0, 20.0, 100.0] np.random.seed(6) # 1, 5 low_omegas = simulate(theta_low, freqparams, pulse_rates, trials=25, random_start=True) low_scale = 0.2 / np.mean(low_omegas[-1]) low_omegas *= low_scale low_conf = ci(low_omegas, axis=1) high_omegas = simulate(theta_high, freqparams, pulse_rates, trials=25, random_start=True) high_scale = 0.2 / np.mean(high_omegas[-1]) high_omegas *= high_scale high_conf = ci(high_omegas, axis=1) sim_freq_data = { "pulse_rates": pulse_rates, "low_l": low_conf[0], "low_m": np.mean(low_omegas, axis=1), "low_h": low_conf[1], "high_l": high_conf[0], "high_m": np.mean(high_omegas, axis=1), "high_h": high_conf[1], } plot_frequencies(sim_freq_data, exp_freq_data, presentation)
def test_bca_n_samples(self): np.random.seed(1234567890) results = boot.ci(self.data, np.average, alpha=(0.1, 0.2, 0.8, 0.9), n_samples=500) np.testing.assert_array_almost_equal( results, np.array([0.40027628, 0.5063184, 0.94082515, 1.05653929]))
def get_ci(self, vals): if len(set(vals)) == 1: return (vals[0], vals[0]) # In case bootstrap.py is missing or not working: # loc = np.mean(vals) # scale = np.std(vals) / np.sqrt(len(vals)) # return stats.t.interval(0.95, len(vals)-1, loc=loc, scale=scale) return bootstrap.ci(vals, method='bca')
def test_abc_multialpha_unified(self): results = boot.ci(self.data, lambda x, weights: np.average(x, weights=weights), alpha=(0.1, 0.2, 0.8, 0.9), method='abc') np.testing.assert_array_almost_equal( results, np.array([0.39472915, 0.51161304, 0.93789723, 1.04407254]))
def test_pi_multialpha(self): np.random.seed(1234567890) results = boot.ci(self.data, np.average, method='pi', alpha=(0.1, 0.2, 0.8, 0.9)) np.testing.assert_array_almost_equal( results, np.array([0.40351601, 0.51723236, 0.94547054, 1.05749207]))
def get_sparsity(bcm_files): tr = [] for bcm_f in bcm_files: t, _, transform = get_data(bcm_f) tr.append(sparsity_v(transform)) tr = np.vstack(tr) tr_m = np.mean(tr, axis=0) tr_lh = ci(tr, axis=0) return t, (tr_lh[0], tr_m, tr_lh[1])
def get_mse(control_files, other_files): se = [] for c_f, o_f in zip(control_files, other_files): time, control, _ = get_data(c_f) time, other, _ = get_data(o_f) se.append(np.sum((control - other) ** 2, axis=0)) se = np.vstack(se) mean = np.mean(se, axis=0) conf = ci(se, axis=0) return time, conf[0], mean, conf[1]
def calc_bootstrap(data): # --- >>> START stats <<< --- # Calculate the bootstrap CIs = bootstrap.ci(data=data, statfunction=sp.mean) # --- >>> STOP stats <<< --- # Print the data: the "*" turns the array CIs into a list print(('The conficence intervals for the mean are: {0} - {1}'.format(*CIs))) return CIs
def calc_bootstrap(data): # --- >>> START stats <<< --- # Calculate the bootstrap CIs = bootstrap.ci(data=data, statfunction=sp.mean) # --- >>> STOP stats <<< --- # Print the data: the "*" turns the array CIs into a list print( ('The conficence intervals for the mean are: {0} - {1}'.format(*CIs))) return CIs
def test_pi_multi_2dout_multialpha(self): np.random.seed(1234567890) results1 = boot.ci((self.x, self.y), stats.linregress, alpha=(0.1, 0.2, 0.8, 0.9), n_samples=2000, method='pi') np.random.seed(1234567890) results2 = boot.ci(np.vstack((self.x, self.y)).T, lambda a: stats.linregress(a)[0], alpha=(0.1, 0.2, 0.8, 0.9), n_samples=2000, method='pi') np.random.seed(1234567890) results3 = boot.ci(np.vstack((self.x, self.y)).T, lambda a: stats.linregress(a)[1], alpha=(0.1, 0.2, 0.8, 0.9), n_samples=2000, method='pi') np.testing.assert_array_almost_equal(results1[:, 0], results2) np.testing.assert_array_almost_equal(results1[:, 1], results3)
def test_bca_errorbar_output_simple(self): np.random.seed(1234567890) results_default = boot.ci(self.data,np.average) np.random.seed(1234567890) results_errorbar = boot.ci(self.data,np.average,output='errorbar') np.testing.assert_array_almost_equal(results_errorbar.T,abs(np.average(self.data)-results_default)[np.newaxis])
def test_bca_simple(self): np.random.seed(1234567890) results = boot.ci(self.data,np.average) np.testing.assert_array_almost_equal(results,np.array([ 0.20907826, 1.19877862]))
def test_pi_multialpha(self): np.random.seed(1234567890) results = boot.ci(self.data,np.average,method='pi',alpha=(0.1,0.2,0.8,0.9)) np.testing.assert_array_almost_equal(results,np.array([ 0.40351601, 0.51723236, 0.94547054, 1.05749207]))
def test_abc_multialpha_unified(self): results = boot.ci(self.data,lambda x,weights: np.average(x,weights=weights),alpha=(0.1,0.2,0.8,0.9),method='abc') np.testing.assert_array_almost_equal(results,np.array([ 0.39472915, 0.51161304, 0.93789723, 1.04407254]))
def test_pi_pandas_series(self): np.random.seed(1234567890) results = boot.ci(self.pds,np.average,method='pi') np.testing.assert_array_almost_equal(results,np.array([ 0.2288689 , 1.21259752]))
def test_bca_pandas_series(self): np.random.seed(1234567890) results = boot.ci(self.pds, np.average) np.testing.assert_array_almost_equal( results, np.array([0.20907826, 1.19877862]))
def run(std=True, domain=''): entities = os.listdir(properties.evaluation_dir) _random, bayes_random, bayes_no_variation, bayes_variation, siddharthan, deemter = get_values(entities, domain) general_random = {'accuracy':[], 'string':[], 'jaccard':[]} general_bayes_random = {'accuracy':[], 'string':[], 'jaccard':[]} general_bayes_no_variation = {'accuracy':[], 'string':[], 'jaccard':[]} general_bayes_variation = {'accuracy':[], 'string':[], 'jaccard':[]} general_siddharthan = {'accuracy':[], 'string':[], 'jaccard':[]} general_deemter = {'accuracy':[], 'string':[], 'jaccard':[]} number_samples, number_samples1 = 0, 0 for fold in bayes_random: general_random['accuracy'].append(accuracy_score(_random[fold]['y_real'], _random[fold]['y_pred'])) general_random['string'].append(np.mean(_random[fold]['string'])) general_random['jaccard'].append(np.mean(_random[fold]['jaccard'])) general_siddharthan['accuracy'].append(accuracy_score(siddharthan[fold]['y_real'], siddharthan[fold]['y_pred'])) general_siddharthan['string'].append(np.mean(siddharthan[fold]['string'])) general_siddharthan['jaccard'].append(np.mean(siddharthan[fold]['jaccard'])) general_deemter['accuracy'].append(accuracy_score(deemter[fold]['y_real'], deemter[fold]['y_pred'])) general_deemter['string'].append(np.mean(deemter[fold]['string'])) general_deemter['jaccard'].append(np.mean(deemter[fold]['jaccard'])) general_bayes_random['accuracy'].append(accuracy_score(bayes_random[fold]['y_real'], bayes_random[fold]['y_pred'])) general_bayes_random['string'].append(np.mean(bayes_random[fold]['string'])) general_bayes_random['jaccard'].append(np.mean(bayes_random[fold]['jaccard'])) general_bayes_no_variation['accuracy'].append(accuracy_score(bayes_no_variation[fold]['y_real'], bayes_no_variation[fold]['y_pred'])) general_bayes_no_variation['string'].append(np.mean(bayes_no_variation[fold]['string'])) general_bayes_no_variation['jaccard'].append(np.mean(bayes_no_variation[fold]['jaccard'])) general_bayes_variation['accuracy'].append(accuracy_score(bayes_variation[fold]['y_real'], bayes_variation[fold]['y_pred'])) general_bayes_variation['string'].append(np.mean(bayes_variation[fold]['string'])) general_bayes_variation['jaccard'].append(np.mean(bayes_variation[fold]['jaccard'])) number_samples += len(bayes_no_variation[fold]['string']) number_samples1 += len(siddharthan[fold]['string']) if std and domain == '': print 'Fold', fold print 'Labels: ' print 'Random: ', accuracy_score(_random[fold]['y_real'], _random[fold]['y_pred']) print 'Siddharthan: ', accuracy_score(siddharthan[fold]['y_real'], siddharthan[fold]['y_pred']) print 'Deemter: ', accuracy_score(deemter[fold]['y_real'], deemter[fold]['y_pred']) print 'Bayes Random: ', accuracy_score(bayes_random[fold]['y_real'], bayes_random[fold]['y_pred']) print 'Bayes No Variation: ', accuracy_score(bayes_no_variation[fold]['y_real'], bayes_no_variation[fold]['y_pred']) print 'Bayes Variation: ', accuracy_score(bayes_variation[fold]['y_real'], bayes_variation[fold]['y_pred']) print 20 * '-' print 'String Distance: ' print 'Random: ', np.mean(_random[fold]['string']) print 'Siddharthan: ', np.mean(siddharthan[fold]['string']) print 'Deemter: ', np.mean(deemter[fold]['string']) print 'Bayes Random: ', np.mean(bayes_random[fold]['string']) print 'Bayes No Variation: ', np.mean(bayes_no_variation[fold]['string']) print 'Bayes Variation: ', np.mean(bayes_variation[fold]['string']) print 20 * '-' print 'Jaccard Distance: ' print 'Random: ', np.mean(_random[fold]['jaccard']) print 'Siddharthan: ', np.mean(siddharthan[fold]['jaccard']) print 'Deemter: ', np.mean(deemter[fold]['jaccard']) print 'Bayes Random: ', np.mean(bayes_random[fold]['jaccard']) print 'Bayes No Variation: ', np.mean(bayes_no_variation[fold]['jaccard']) print 'Bayes Variation: ', np.mean(bayes_variation[fold]['jaccard']) print 20 * '-' print '\n' if std: print 'GENERAL', domain print 'Labels: ' print 'Random: ', np.mean(general_random['accuracy']) print 'Siddharthan: ', np.mean(general_siddharthan['accuracy']) print 'Deemter: ', np.mean(general_deemter['accuracy']) print 'Bayes Random: ', np.mean(general_bayes_random['accuracy']) print 'Bayes No Variation: ', np.mean(general_bayes_no_variation['accuracy']) print 'Bayes Variation: ', np.mean(general_bayes_variation['accuracy']) print 20 * '-' print 'String Distance: ' print 'Random: ', mean_confidence_interval(general_random['string']), bootstrap.ci(general_random['string']) print 'Siddharthan: ', mean_confidence_interval(general_siddharthan['string']), bootstrap.ci(general_siddharthan['string']) print 'Deemter: ', mean_confidence_interval(general_deemter['string']), bootstrap.ci(general_deemter['string']) print 'Bayes Random: ', mean_confidence_interval(general_bayes_random['string']), bootstrap.ci(general_bayes_random['string']) print 'Bayes No Variation: ', mean_confidence_interval(general_bayes_no_variation['string']), bootstrap.ci(general_bayes_no_variation['string']) print 'Bayes Variation: ', mean_confidence_interval(general_bayes_variation['string']), bootstrap.ci(general_bayes_variation['string']) print 20 * '-' print 'Jaccard Distance: ' print 'Random: ', mean_confidence_interval(general_random['jaccard']), bootstrap.ci(general_random['jaccard']) print 'Siddharthan: ', mean_confidence_interval(general_siddharthan['jaccard']), bootstrap.ci(general_siddharthan['jaccard']) print 'Deemter: ', mean_confidence_interval(general_deemter['jaccard']), bootstrap.ci(general_deemter['jaccard']) print 'Bayes Random: ', mean_confidence_interval(general_bayes_random['jaccard']), bootstrap.ci(general_bayes_random['jaccard']) print 'Bayes No Variation: ', mean_confidence_interval(general_bayes_no_variation['jaccard']), bootstrap.ci(general_bayes_no_variation['jaccard']) print 'Bayes Variation: ', mean_confidence_interval(general_bayes_variation['jaccard']), bootstrap.ci(general_bayes_variation['jaccard']) print 20 * '-' print '\n' print 'String -> T-test: Random X PN-Variation' t, p = stats.ttest_ind(general_random['string'], general_bayes_no_variation['string']) print round(t, 6), p t, p = stats.ttest_rel(general_random['string'], general_bayes_no_variation['string']) print round(t, 6), p print 10 * '-' print 'String -> T-test: Random X PN+Variation' t, p = stats.ttest_ind(general_random['string'], general_bayes_variation['string']) print round(t, 6), p t, p = stats.ttest_rel(general_random['string'], general_bayes_variation['string']) print round(t, 6), p print 10 * '-' print 'String -> T-test: Siddarthan X PN-Variation' t, p = stats.ttest_ind(general_siddharthan['string'], general_bayes_no_variation['string']) print round(t, 6), p t, p = stats.ttest_rel(general_siddharthan['string'], general_bayes_no_variation['string']) print round(t, 6), p print 10 * '-' print 'String -> T-test: Siddarthan X PN+Variation' t, p = stats.ttest_ind(general_siddharthan['string'], general_bayes_variation['string']) print round(t, 6), p t, p = stats.ttest_rel(general_siddharthan['string'], general_bayes_variation['string']) print round(t, 6), p print 10 * '-' print 'String -> T-test: Deemter X PN-Variation' t, p = stats.ttest_ind(general_deemter['string'], general_bayes_no_variation['string']) print round(t, 6), p t, p = stats.ttest_rel(general_deemter['string'], general_bayes_no_variation['string']) print round(t, 6), p print 10 * '-' print 'String -> T-test: Deemter X PN+Variation' t, p = stats.ttest_ind(general_deemter['string'], general_bayes_variation['string']) print round(t, 6), p t, p = stats.ttest_rel(general_deemter['string'], general_bayes_variation['string']) print round(t, 6), p print 10 * '-' print 'String -> T-test: PN-Variation X PN+Variation' t, p = stats.ttest_ind(general_bayes_no_variation['string'], general_bayes_variation['string']) print round(t, 6), p t, p = stats.ttest_rel(general_bayes_no_variation['string'], general_bayes_variation['string']) print round(t, 6), p print 10 * '-' print 'String -> One way ANOVA' t, p = stats.f_oneway(general_random['string'], general_siddharthan['string'], general_deemter['string'], general_bayes_no_variation['string'], general_bayes_variation['string']) print round(t, 6), p print 10 * '-' print 'Number of samples: ', number_samples, number_samples1 write_dir = '/roaming/tcastrof/names/eacl/evaluation' write_csv(general_random, general_siddharthan, general_deemter, general_bayes_no_variation, general_bayes_variation, write_dir, domain)
def calculate_similarity(self, quartile=False): """ Mode is either ibm or Word2Vec """ print "calculating similarities" PATH = os.path.dirname(os.path.abspath(__file__)) benchmarks = os.listdir(PATH+"/word-sim-data/") mode = self.mode vocab = self.vocab word_vectors = self.word_vectors rhos = [] p_values = [] num_pairs = [] reports = '' cis = [] if quartile != False: conc = self.conc counter = 0 print "Using benchmarks:", benchmarks print "Number of benchmarks:", len(benchmarks) for name in benchmarks: counter += 1 print "At benchmark:", name print "Remaining:", len(benchmarks)-counter benchmark = pd.read_csv(PATH+"/word-sim-data/"+name, sep='\t', header=None) a = zip(benchmark[benchmark.columns[0]], benchmark[benchmark.columns[1]]) benchmark = dict(zip(a, benchmark[benchmark.columns[2]])) wordpairs = [x for x in benchmark.keys() if x[0] in vocab and x[1] in vocab] if quartile == 'lower': sorted_wordpairs = sorted([(x,y,conc[x]*conc[y]) for x, y in wordpairs if x in conc and y in conc and x != y], key=lambda x: x[2]) wordpairs = [(x, y) for x,y,z in sorted_wordpairs[:int(len(sorted_wordpairs)*0.5)]] elif quartile == 'upper': sorted_wordpairs= sorted([(x,y,conc[x]*conc[y]) for x, y in wordpairs if x in conc and y in conc and x != y], key=lambda x: x[2]) wordpairs = [(x, y) for x,y,z in sorted_wordpairs[int(len(sorted_wordpairs)*0.5):]] overlap = 0 orig_sim = [] predicted_sim = [] count = 0 for i in wordpairs: count+=1 word1 = i[0] word2 = i[1] if word1 in vocab and word2 in vocab: orig_sim.append(benchmark[i]) sim = self.similarity(word_vectors[word1], word_vectors[word2]) predicted_sim.append(sim) if quartile != False: reports += ' '.join([word1, word2, str(conc[word1]), str(conc[word2]), str(benchmark[i]), str(sim), name])+'\n' num_pairs.append(len(wordpairs)) corr = spearmanr(orig_sim, predicted_sim) CIs = bootstrap.ci(data=(orig_sim, predicted_sim), statfunction=spearmanr, method='pi') performance_record = dict(zip(wordpairs, zip(orig_sim, predicted_sim))) print "Bootstrapped 95% confidence intervals\n, ", CIs[:, 0] try: rhos.append(round(corr[0], 3)) p_values.append(round(corr[1], 3)) cis.append(CIs[:, 0]) except: rhos.append('-') p_values.append('-') cis.append('-') benchmarks = map(lambda x: x.replace('.txt', '').replace('EN-', ''), benchmarks) return benchmarks, p_values, rhos, num_pairs, cis
def get_ci(vals): """Bootstrapped 95% confidence intervals.""" return bootstrap.ci(vals, method='bca')
def run(std=True, domain=''): entities = os.listdir(properties.evaluation_dir) _random, bayes_random, bayes_no_variation, bayes_variation, siddharthan, deemter = get_values( entities, domain) general_random = {'accuracy': [], 'string': [], 'jaccard': []} general_bayes_random = {'accuracy': [], 'string': [], 'jaccard': []} general_bayes_no_variation = {'accuracy': [], 'string': [], 'jaccard': []} general_bayes_variation = {'accuracy': [], 'string': [], 'jaccard': []} general_siddharthan = {'accuracy': [], 'string': [], 'jaccard': []} general_deemter = {'accuracy': [], 'string': [], 'jaccard': []} number_samples, number_samples1 = 0, 0 for fold in bayes_random: general_random['accuracy'].append( accuracy_score(_random[fold]['y_real'], _random[fold]['y_pred'])) general_random['string'].append(np.mean(_random[fold]['string'])) general_random['jaccard'].append(np.mean(_random[fold]['jaccard'])) general_siddharthan['accuracy'].append( accuracy_score(siddharthan[fold]['y_real'], siddharthan[fold]['y_pred'])) general_siddharthan['string'].append( np.mean(siddharthan[fold]['string'])) general_siddharthan['jaccard'].append( np.mean(siddharthan[fold]['jaccard'])) general_deemter['accuracy'].append( accuracy_score(deemter[fold]['y_real'], deemter[fold]['y_pred'])) general_deemter['string'].append(np.mean(deemter[fold]['string'])) general_deemter['jaccard'].append(np.mean(deemter[fold]['jaccard'])) general_bayes_random['accuracy'].append( accuracy_score(bayes_random[fold]['y_real'], bayes_random[fold]['y_pred'])) general_bayes_random['string'].append( np.mean(bayes_random[fold]['string'])) general_bayes_random['jaccard'].append( np.mean(bayes_random[fold]['jaccard'])) general_bayes_no_variation['accuracy'].append( accuracy_score(bayes_no_variation[fold]['y_real'], bayes_no_variation[fold]['y_pred'])) general_bayes_no_variation['string'].append( np.mean(bayes_no_variation[fold]['string'])) general_bayes_no_variation['jaccard'].append( np.mean(bayes_no_variation[fold]['jaccard'])) general_bayes_variation['accuracy'].append( accuracy_score(bayes_variation[fold]['y_real'], bayes_variation[fold]['y_pred'])) general_bayes_variation['string'].append( np.mean(bayes_variation[fold]['string'])) general_bayes_variation['jaccard'].append( np.mean(bayes_variation[fold]['jaccard'])) number_samples += len(bayes_no_variation[fold]['string']) number_samples1 += len(siddharthan[fold]['string']) if std and domain == '': print 'Fold', fold print 'Labels: ' print 'Random: ', accuracy_score(_random[fold]['y_real'], _random[fold]['y_pred']) print 'Siddharthan: ', accuracy_score(siddharthan[fold]['y_real'], siddharthan[fold]['y_pred']) print 'Deemter: ', accuracy_score(deemter[fold]['y_real'], deemter[fold]['y_pred']) print 'Bayes Random: ', accuracy_score( bayes_random[fold]['y_real'], bayes_random[fold]['y_pred']) print 'Bayes No Variation: ', accuracy_score( bayes_no_variation[fold]['y_real'], bayes_no_variation[fold]['y_pred']) print 'Bayes Variation: ', accuracy_score( bayes_variation[fold]['y_real'], bayes_variation[fold]['y_pred']) print 20 * '-' print 'String Distance: ' print 'Random: ', np.mean(_random[fold]['string']) print 'Siddharthan: ', np.mean(siddharthan[fold]['string']) print 'Deemter: ', np.mean(deemter[fold]['string']) print 'Bayes Random: ', np.mean(bayes_random[fold]['string']) print 'Bayes No Variation: ', np.mean( bayes_no_variation[fold]['string']) print 'Bayes Variation: ', np.mean(bayes_variation[fold]['string']) print 20 * '-' print 'Jaccard Distance: ' print 'Random: ', np.mean(_random[fold]['jaccard']) print 'Siddharthan: ', np.mean(siddharthan[fold]['jaccard']) print 'Deemter: ', np.mean(deemter[fold]['jaccard']) print 'Bayes Random: ', np.mean(bayes_random[fold]['jaccard']) print 'Bayes No Variation: ', np.mean( bayes_no_variation[fold]['jaccard']) print 'Bayes Variation: ', np.mean( bayes_variation[fold]['jaccard']) print 20 * '-' print '\n' if std: print 'GENERAL', domain print 'Labels: ' print 'Random: ', np.mean(general_random['accuracy']) print 'Siddharthan: ', np.mean(general_siddharthan['accuracy']) print 'Deemter: ', np.mean(general_deemter['accuracy']) print 'Bayes Random: ', np.mean(general_bayes_random['accuracy']) print 'Bayes No Variation: ', np.mean( general_bayes_no_variation['accuracy']) print 'Bayes Variation: ', np.mean(general_bayes_variation['accuracy']) print 20 * '-' print 'String Distance: ' print 'Random: ', mean_confidence_interval( general_random['string']), bootstrap.ci(general_random['string']) print 'Siddharthan: ', mean_confidence_interval( general_siddharthan['string']), bootstrap.ci( general_siddharthan['string']) print 'Deemter: ', mean_confidence_interval( general_deemter['string']), bootstrap.ci(general_deemter['string']) print 'Bayes Random: ', mean_confidence_interval( general_bayes_random['string']), bootstrap.ci( general_bayes_random['string']) print 'Bayes No Variation: ', mean_confidence_interval( general_bayes_no_variation['string']), bootstrap.ci( general_bayes_no_variation['string']) print 'Bayes Variation: ', mean_confidence_interval( general_bayes_variation['string']), bootstrap.ci( general_bayes_variation['string']) print 20 * '-' print 'Jaccard Distance: ' print 'Random: ', mean_confidence_interval( general_random['jaccard']), bootstrap.ci(general_random['jaccard']) print 'Siddharthan: ', mean_confidence_interval( general_siddharthan['jaccard']), bootstrap.ci( general_siddharthan['jaccard']) print 'Deemter: ', mean_confidence_interval( general_deemter['jaccard']), bootstrap.ci( general_deemter['jaccard']) print 'Bayes Random: ', mean_confidence_interval( general_bayes_random['jaccard']), bootstrap.ci( general_bayes_random['jaccard']) print 'Bayes No Variation: ', mean_confidence_interval( general_bayes_no_variation['jaccard']), bootstrap.ci( general_bayes_no_variation['jaccard']) print 'Bayes Variation: ', mean_confidence_interval( general_bayes_variation['jaccard']), bootstrap.ci( general_bayes_variation['jaccard']) print 20 * '-' print '\n' print 'String -> T-test: Random X PN-Variation' t, p = stats.ttest_ind(general_random['string'], general_bayes_no_variation['string']) print round(t, 6), p t, p = stats.ttest_rel(general_random['string'], general_bayes_no_variation['string']) print round(t, 6), p print 10 * '-' print 'String -> T-test: Random X PN+Variation' t, p = stats.ttest_ind(general_random['string'], general_bayes_variation['string']) print round(t, 6), p t, p = stats.ttest_rel(general_random['string'], general_bayes_variation['string']) print round(t, 6), p print 10 * '-' print 'String -> T-test: Siddarthan X PN-Variation' t, p = stats.ttest_ind(general_siddharthan['string'], general_bayes_no_variation['string']) print round(t, 6), p t, p = stats.ttest_rel(general_siddharthan['string'], general_bayes_no_variation['string']) print round(t, 6), p print 10 * '-' print 'String -> T-test: Siddarthan X PN+Variation' t, p = stats.ttest_ind(general_siddharthan['string'], general_bayes_variation['string']) print round(t, 6), p t, p = stats.ttest_rel(general_siddharthan['string'], general_bayes_variation['string']) print round(t, 6), p print 10 * '-' print 'String -> T-test: Deemter X PN-Variation' t, p = stats.ttest_ind(general_deemter['string'], general_bayes_no_variation['string']) print round(t, 6), p t, p = stats.ttest_rel(general_deemter['string'], general_bayes_no_variation['string']) print round(t, 6), p print 10 * '-' print 'String -> T-test: Deemter X PN+Variation' t, p = stats.ttest_ind(general_deemter['string'], general_bayes_variation['string']) print round(t, 6), p t, p = stats.ttest_rel(general_deemter['string'], general_bayes_variation['string']) print round(t, 6), p print 10 * '-' print 'String -> T-test: PN-Variation X PN+Variation' t, p = stats.ttest_ind(general_bayes_no_variation['string'], general_bayes_variation['string']) print round(t, 6), p t, p = stats.ttest_rel(general_bayes_no_variation['string'], general_bayes_variation['string']) print round(t, 6), p print 10 * '-' print 'String -> One way ANOVA' t, p = stats.f_oneway(general_random['string'], general_siddharthan['string'], general_deemter['string'], general_bayes_no_variation['string'], general_bayes_variation['string']) print round(t, 6), p print 10 * '-' print 'Number of samples: ', number_samples, number_samples1 write_dir = '/roaming/tcastrof/names/eacl/evaluation' write_csv(general_random, general_siddharthan, general_deemter, general_bayes_no_variation, general_bayes_variation, write_dir, domain)
def test_bca_multialpha(self): np.random.seed(1234567890) results = boot.ci(self.data, np.average, alpha=(0.1, 0.2, 0.8, 0.9)) np.testing.assert_array_almost_equal( results, np.array([0.39210727, 0.50775386, 0.93673299, 1.0476729]))
def test_bca_multialpha(self): np.random.seed(1234567890) results = boot.ci(self.data,np.average,alpha=(0.1,0.2,0.8,0.9)) np.testing.assert_array_almost_equal(results,np.array([ 0.39210727, 0.50775386, 0.93673299, 1.0476729 ]))
def plot_learn_curves(channel_zips, conv_zips, rules=("PES", "hPES"), presentation=False): group_by = "learn_type" figsize = (8, 6) if presentation else (5, 4) if presentation: matplotlib.rc("font", size=18) ext = "svg" if presentation else "pdf" else: plt.figure(figsize=figsize) for func, zips in zip(("channel", "conv"), (channel_zips, conv_zips)): if presentation: plt.figure(figsize=figsize) filenames = {"PES": [], "hPES": [], "control": []} # Group files by group_by for zfn in zips: zfn_b = os.path.basename("%s" % zfn) with zipfile.ZipFile("%s.zip" % zfn) as zf: for l in zf.open("%s.txt" % zfn_b): if group_by in l: group = l.split("=")[1].strip() filenames[group].append(zfn) # Get the control results control = [] for zfn in filenames["control"]: with zipfile.ZipFile("%s.zip" % zfn) as zf: zfn_b = os.path.basename("%s" % zfn) with zf.open("%s.csv" % zfn_b) as fp: time, error = get_data(fp, func, "full") control.append(error) control = np.vstack(control) # Just take the mean for the control control = np.mean(control, axis=0) # Get the non-control results colors = ("b", "g") for rule, color in zip(rules, colors): err = [] for zfn in filenames[rule]: zfn_b = os.path.basename("%s" % zfn) with zipfile.ZipFile("%s.zip" % zfn) as zf: with zf.open("%s.csv" % zfn_b) as fp: time, error = get_data(fp, func, "full") err.append(error) err = np.vstack(err) / control mean = np.mean(err, axis=0) conf = ci(err, axis=0) if func == "channel": if not presentation: plt.subplot(122) plt.title("Learning transmission") plt.ylim(-0.4, 9) plt.gca().yaxis.tick_right() elif func == "conv": if not presentation: plt.subplot(121) plt.title("Learning binding") plt.ylabel("Error relative to control mean") if not presentation: plt.xticks(np.arange(0, 80, 20)) plt.xlim((0, 80)) plt.ylim(0.9, 1.6) plt.gca().yaxis.set_ticks_position("left") plt.gca().spines["right"].set_visible(False) plt.gca().spines["top"].set_visible(False) plt.gca().xaxis.set_ticks_position("bottom") plt.xlabel("Learning time (seconds)") plt.axhline(1.0, lw=1, color="0.3") plt.fill_between(time, y1=conf[1], y2=conf[0], color=color, alpha=0.3) if presentation: rule = "Combined, $S$=0.73" if rule == "hPES" else "Supervised, $S$=1" plt.plot(time, mean, color=color, lw=1, label=rule) if len(rules) > 1: if presentation: plt.legend(prop={"size": 16}) else: plt.legend(prop={"size": 12}) if presentation: plt.tight_layout() name = func + "-learncurve" if len(rules) == 2 else func + "-learncurve-pes" plt.savefig("%s/%s.%s" % (figuredir, name, ext), transparent=True) print "Saved %s.%s" % (name, ext) if not presentation: plt.tight_layout() plt.subplots_adjust(wspace=0) name = "fig4-learn-curves" if len(rules) == 2 else "learncurve-pes" plt.savefig("%s/%s.%s" % (figuredir, name, ext), transparent=True) print "Saved fig4-learn-curves.%s" % ext plt.close()
def test_bca_multi_multialpha(self): np.random.seed(1234567890) results1 = boot.ci((self.x,self.y), lambda a,b: stats.linregress(a,b)[1], alpha=(0.1,0.2,0.8,0.9),n_samples=1000) np.random.seed(1234567890) results2 = boot.ci(np.vstack((self.x,self.y)).T, lambda a: stats.linregress(a)[1], alpha=(0.1,0.2,0.8,0.9),n_samples=1000) np.testing.assert_array_almost_equal(results1,results2)
def test_bca_n_samples(self): np.random.seed(1234567890) results = boot.ci(self.data,np.average,alpha=(0.1,0.2,0.8,0.9),n_samples=500) np.testing.assert_array_almost_equal(results,np.array([ 0.40027628, 0.5063184 , 0.94082515, 1.05653929]))
''''''''' tr_COV_C = norm(C_mat_hat,'fro')**2/n tr_COV_C_neg = norm(C_mat_neg_hat,'fro')**2/n tr_COV_C # |C|_F^2/n for (X_1, X_2) tr_COV_C_neg # |C|_F^2/n for (X_1, -X_2) #Note: specify the global variables r_1, r_2, r_12 and P_mat #for the function compute_tr_COV_C(Y_1_t, Y_2_t) stated at the lines 15-21 r_1 = r_1_hat r_2 = r_2_hat r_12 = r_12_hat P_mat = P_mat_hat np.random.seed(0) # seed for the random number geneator of bootstrap tr_COV_C_interval = boot.ci((Y_1.T, Y_2.T), statfunction=compute_tr_COV_C, alpha=0.05, n_samples=5000, method='bca') tr_COV_C_interval[0,0],tr_COV_C_interval[1,0] # the 95% bootstrap CI for tr_COV_C tr_COV_C_interval[0,1],tr_COV_C_interval[1,1] # the 95% bootstrap CI for tr_COV_C_neg ''''''''' Choose the C of (X_1, X_2) or that of (X_1, -X_2) by the larger one of tr_COV_C and tr_COV_C_neg. See Remark 2 in the paper for details ''''''''' #The common-pattern matrix C rescaled with the magnitude of X_k C_scaled_1_hat = C_mat_hat * norm(X_1_hat,'fro')/np.sqrt(n) C_scaled_2_hat = C_mat_hat * norm(X_2_hat,'fro')/np.sqrt(n) #The distinctive-pattern matrix Delta_k
def test_pi_pandas_series(self): np.random.seed(1234567890) results = boot.ci(self.pds, np.average, method='pi') np.testing.assert_array_almost_equal(results, np.array([0.2288689, 1.21259752]))