def prep_data_tests(data_tests, values, index, columns, reset_index=None, prefix='Unknown_', impute_grouping=['Wafer_ID'], cont_func=['median'], missing=True, Threshold=20): data_pivot_tests = pivot_data(data_tests, values, index, columns, reset_index, prefix) data_tests_pivot_imputed, columns_drop, wafers_drop = drop_missing_values( data_pivot_tests, Threshold) data_tests_pivot_imputed = replace_missing_values( data_tests_pivot_imputed, impute_grouping, data_tests_pivot_imputed.columns, missing) data_tests_wafer_medians = stats(data_tests_pivot_imputed, ['Wafer_ID'], cont_func, '_wafer_') data_tests_fablot_medians = stats(data_tests_pivot_imputed, ['FabLot', 'Wafer_ID'], cont_func, '_fablot_') data_tests_stats = pd.merge(data_tests_wafer_medians.reset_index(), data_tests_fablot_medians.reset_index(), on=['Wafer_ID'], how='inner').set_index(['FabLot', 'Wafer_ID']) data_tests.to_csv('data_model_tests.csv', index=True) data_tests_stats.to_csv('data_model_tests_stats.csv', index=True) return data_tests_pivot_imputed, columns_drop, wafers_drop, data_tests_stats
def main(): #initializing #stop loss positions, the maximum long positions we can get #without certain constraints, you will long indefinites times #as long as the market condition triggers the signal #in a whipsaw condition, it is suicidal stls = 3 ticker = 'NVDA' stdate = '2015-04-01' eddate = '2018-02-15' #slicer is used for plotting #a three year dataset with 750 data points would be too much slicer = 700 #downloading data df = yf.download(ticker, start=stdate, end=eddate) trading_signals = signal_generation(df, heikin_ashi, stls) viz = trading_signals[slicer:] plot(viz, ticker) portfolio_details = portfolio(viz) profit(portfolio_details) stats(portfolio_details, trading_signals, stdate, eddate)
def analyzeTweets(keyword, option): # once in analyze mode, determine which analysis / visualization to do df = readSQL(keyword) df['datetime'] = pd.to_datetime(df['datetime']) n = len(df) if option == 'stats': stats(df, stripHash(keyword)) return elif option == 'interval': interval(df, keyword, n) return elif option == 'line': line(df, keyword, n) return elif option == 'dist': dist(df, keyword, n) return elif option == 'scatter': scatter(df, keyword, n) return elif option == 'pie': pie(keyword) return elif option == 'map': sentMap(df, keyword) return
def eval_bias_variance_learner_cont(env, agent, sup, T, num_samples=1): s = env.reset() biases = [] variances = [] for i in range(num_samples): bias, variance, t = 0, 0, 0 while t < T: a = agent.sample_action(s) # \E_D(\pi^D_\theta(s)) a_sup = sup.intended_action(s) # \pi^*(s) # For variance, at each state sample actions from a random model and compare # to that from expected model a_ensemble_list = agent.intended_actions(s) ensemble_idx = np.random.randint(len(a_ensemble_list)) a_ensemble = a_ensemble_list[ensemble_idx] # \pi^D_\theta(s) # Need to evaluate bias/variance on learner's dist next_s, r, done, _ = env.step(a) s = next_s bias += np.sum((a - a_sup)**2) variance += np.sum((a - a_ensemble)**2) t += 1 if done == True: break bias /= float(t) variance /= float(t) biases.append(bias) variances.append(variance) return stats(biases), stats(variances)
def strstats(v): if v.dtype == float: return "mean %f std %f min %f 10th %f 50th %f 90th %f max %f" % stats(v) elif v.dtype == int: return "mean %d std %d min %d 10th %d 50th %d 90th %d max %d" % stats(v) else: raise Exception("weird dtype %s"% v.dtype)
def main(): # parse_ftp() pop = 'uganda' coverage = '4x' dn = 'DNAse-seq' dn = sys.argv[-1] print( pop, coverage, dn, ) l_vcfs = glob.glob('../pipeline/%s%s/out_UnifiedGenotyper/*.vcf' % ( pop, coverage, )) l_vcfs_sorted = sort_nicely(l_vcfs) l_vcfs_sorted = [ '../pipeline/uganda4x/out_UnifiedGenotyper/UnifiedGenotyper.2.2.vcf' ] #### l_vcfs_sorted = l_vcfs_sorted[:10] sep = '/' # l_vcfs_exome = glob.glob('/nfs/t149_1kg/phase1_v3/ALL.chr*.exome.1000GApr12.vcf') # l_vcfs_exome_sorted = sort_nicely(l_vcfs_exome) # l_vcfs = glob.glob('/nfs/t149_1kg/phase1_v3/ALL.chr*.phase1_release_v3.20101123.snps_indels_svs.genotypes.vcf.gz') # l_vcfs_sorted = sort_nicely(l_vcfs) # sep = '|' ## gnuplot.contour_plot( ## path_dat='INDEL_%s.dat' %(dn), ## bool_remove=False, ## xlabel='dist_m_i_n (1000bp)',ylabel='INDEL length', ## bool_log=False, ## x1=0,x2=50,y1=1,y2=20, ## ) ## stop d_lengths = loop_UG_out(l_vcfs_sorted, dn, sep) plot_contour(dn, d_lengths) s = '' for k1 in d_lengths.keys(): for k2, v in d_lengths.items(): s += '%s %s %s\n' % (k1, k2, v) fd = open('%s.dict', 'w') fd.write(s) fd.close() stats( d_lengths, dn, ) ######## plot_length_distribution(pop,coverage,d_lengths,) return
def print_entry(key, value): def stats(s, units=""): conf = "{:0.1f}({:0.2f}%)".format(s['ci']['abs'], s['ci']['perc']) return "{:8.1f}{} +/- {:15s}".format(s['average'], units, conf) print "{:>50s} {} {}".format(key, stats(value['time_stat'], units="ms"), stats(value['count_stat']))
def print_entry(key, value): def stats(s, units=""): conf = "{:0.1f}({:0.2f}%)".format(s['ci']['abs'], s['ci']['perc']) return "{:8.1f}{} +/- {:15s}".format(s['average'], units, conf) print "{:>50s} {} {}".format( key, stats(value['time_stat'], units="ms"), stats(value['count_stat']) )
def printrestart(restart): ''' print quick summary info about the current state of the sampler. ''' print "restart info: " print " current shape of chain: (nwalkers x niterations x ndim) ", np.shape( restart['chain']) print " autocorrelation lengths for each parameter: ", restart['acor'] stats(restart['acor']) print " acceptance rate for each walker: ", restart['accept'] stats(restart['accept'])
def probsplots(allprobs, fn, chain, burnin=0): import matplotlib matplotlib.use('agg') import matplotlib.pyplot as plt nwalker = np.shape(allprobs)[0] iters = np.shape(allprobs)[1] ndim = np.shape(chain)[2] # plot the trace of the probabilities for every walker. fig, ax = plt.subplots() for walker in range(nwalker): ax.plot(allprobs[walker, burnin:], alpha=.3, ls='--') plt.savefig(fn + '_probs.png') plt.close(fig) print "saved " + fn + '_probs.png' fig, ax = plt.subplots() kss = [] kssthetas = np.zeros((ndim, iters - 1)) # chain ~ walker x iter x dim for iter in range(iters - 1): kss.append(ksprob(allprobs[:, iter], allprobs[:, iters - 1])) for k in range(ndim): kssthetas[k, iter] = ksprob(chain[:, iter, k], chain[:, iters - 1, k]) ax.scatter(range(iters - 1)[-25:], kss[-25:], s=30, marker='+') colors = [ 'k', 'r', 'green', 'orange', 'lightblue', 'grey', 'purple', 'pink', 'yellow', 'blue', 'lightgreen', 'darkgreen' ] * 5 for k in range(ndim): ax.plot(range(iters - 1)[-25:], kssthetas[k, -25:], color=colors[k]) ax.set_xlabel('iter') ax.set_ylabel('ks prob vs last iteration') ax.set_ylim(-0.01, 1.02) plt.savefig(fn + '_ks.png') plt.close(fig) changes = np.zeros(nwalker) for walker in range(nwalker): for iter in range(iters - 1): if allprobs[walker, iter] != allprobs[walker, iter + 1]: changes[walker] += 1.0 changes = changes / float(iters - 1.0) print "long-term acceptance fraction stats: " stats(changes) acor = np.zeros(nwalker) for walker in range(nwalker): acor[walker] = emcee.autocorr.integrated_time( allprobs[walker, burnin:], window=min([50, iters / 2])) print "acor stats: " stats(acor)
def main(argv=None): if argv is None: argv = sys.argv parser = ArgumentParser('Report mean and standard deviation for the stream of numbers read from stdin', formatter_class=ArgumentDefaultsHelpFormatter) args = parser.parse_args(argv[1:]) stats = Stats() for line in sys.stdin: stats(float(line)) print(stats)
def main(argv=None): if argv is None: argv = sys.argv parser = ArgumentParser( 'Report mean and standard deviation for the stream of numbers read from stdin', formatter_class=ArgumentDefaultsHelpFormatter) args = parser.parse_args(argv[1:]) stats = Stats() for line in sys.stdin: stats(float(line)) print(stats)
def analyze_results(config, test_method, answer_path, list_of_req, quality_bonus): full_data, accepted_sessions = data_cleaning(answer_path, test_method) n_workers = number_of_uniqe_workers(full_data) print(f"{n_workers} workers participated in this batch.") stats(answer_path) # votes_per_file, votes_per_condition = transform(accepted_sessions) if len(accepted_sessions) > 1: print( "Transforming data (the ones with 'accepted_and_use' ==1 --> group per clip" ) use_condition_level = config.has_option('general', 'condition_pattern') votes_per_file, vote_per_condition = transform( test_method, accepted_sessions, config.has_option('general', 'condition_pattern')) votes_per_file_path = os.path.splitext( answer_path)[0] + '_votes_per_clip.csv' votes_per_cond_path = os.path.splitext( answer_path)[0] + '_votes_per_cond.csv' condition_keys = [] if config.has_option('general', 'condition_pattern'): condition_keys = config['general']['condition_keys'].split(',') condition_keys.append('Unknown') headers = create_headers_for_per_file_report(test_method, condition_keys) write_dict_as_csv(votes_per_file, votes_per_file_path, headers=headers) print(f' Votes per files are saved in: {votes_per_file_path}') if use_condition_level: write_dict_as_csv(vote_per_condition, votes_per_cond_path) print(f' Votes per files are saved in: {votes_per_cond_path}') bonus_file = os.path.splitext( answer_path)[0] + '_quantity_bonus_report.csv' quantity_bonus_df = calc_quantity_bonuses(full_data, list_of_req, bonus_file) if quality_bonus: quality_bonus_path = os.path.splitext( answer_path)[0] + '_quality_bonus_report.csv' if 'all' not in list_of_req: quantity_bonus_df = calc_quantity_bonuses( full_data, ['all'], None) if use_condition_level: votes_to_use = vote_per_condition else: votes_to_use = votes_per_file calc_quality_bonuses(quantity_bonus_df, accepted_sessions, votes_to_use, config, quality_bonus_path, n_workers, test_method, use_condition_level)
def writer(information, outfile, features, end_structure): dist = stats([int(item[1]) for item in information]) length_stem = stats([item[2] for item in information]) terminal_structure = stats([item[3] for item in information]) ordered_ends = sorted(end_structure.items(), key=operator.itemgetter(1), reverse=True) with open(outfile, 'w') as outp: outp.write( "distance of nearest Structure (discounting base pairing in 5' terminal 50 nt, due to dubious nature of this interaction)\n" ) outp.write("mean,min,max,median,std_dev\n") for item in dist[0:-2]: outp.write(str(item) + ",") outp.write(str(dist[-1])) outp.write("\n") outp.write("\n") outp.write( "length of base-pairing for nearest structure (ignores one nucleotide mismatch)\n" ) outp.write("mean,min,max,median,std_dev\n") for item in length_stem[0:-2]: outp.write(str(item) + ",") outp.write(str(length_stem[-1])) outp.write("\n") outp.write("\n") outp.write( "how structured is the 3' terminal 50 nt (percent base paired,will discount base pairing in 5' terminal 50 nt, due to dubious nature of this interaction)\n" ) outp.write("mean,min,max,median,std_dev\n") for item in terminal_structure[0:-2]: outp.write(str(item) + ",") outp.write(str(terminal_structure[-1])) outp.write("\n") outp.write("\n") q = 1 outp.write( "3' terminal 50 nt ordered as a function of structure (top = most structured, bottom = least structured )\n" ) outp.write("rank,percent_stranded,coordinate,gene_feature\n") for item in ordered_ends: outp.write( str(q) + ',' + str(item[1]) + ',' + str(item[0]) + ',' + str(features[item[0]]) + "\n") q += 1
def get_lda_individual(self, topic_num, alpha, eta, text): """get individual lda model""" s = stats(360) post = s.get_data_by_day() nonOrg = post.loc[post['label'] == 1] lyrics = post.loc[post['label'] == 2] topic = LDATopicModel() c = Count_Vect() # change here to define lyrics or quote if text in 'lyrics': text = lyrics[['text', 'userid']] text['text'] = text['text'].apply(lambda x: c.remove_noise(str(x))) text = c.get_precocessed_text(text) text['text'] = text['text'].apply(lambda x: x.split()) dictionary = gensim.corpora.Dictionary(text['text'])# generate dictionary bow_corpus = [dictionary.doc2bow(doc) for doc in text['text']] model, coherence = topic.get_lda_score_eval(dictionary, bow_corpus, topic_num, alpha, eta) elif text in 'quotes': text = nonOrg[['text', 'userid']] text['text'] = text['text'].apply(lambda x: c.remove_noise(str(x))) text = c.get_precocessed_text(text) text['text'] = text['text'].apply(lambda x: x.split()) dictionary = gensim.corpora.Dictionary(text['text'])# generate dictionary bow_corpus = [dictionary.doc2bow(doc) for doc in text['text']] model, coherence = topic.get_lda_score_eval(dictionary, bow_corpus, topic_num, alpha, eta) # model evaluation return model, coherence
def main(): # parse_ftp() pop = 'uganda' coverage = '4x' dn = 'DNAse-seq' dn = sys.argv[-1] print(pop,coverage,dn,) l_vcfs = glob.glob('../pipeline/%s%s/out_UnifiedGenotyper/*.vcf' %(pop,coverage,)) l_vcfs_sorted = sort_nicely(l_vcfs) l_vcfs_sorted = ['../pipeline/uganda4x/out_UnifiedGenotyper/UnifiedGenotyper.2.2.vcf'] #### l_vcfs_sorted = l_vcfs_sorted[:10] sep = '/' # l_vcfs_exome = glob.glob('/nfs/t149_1kg/phase1_v3/ALL.chr*.exome.1000GApr12.vcf') # l_vcfs_exome_sorted = sort_nicely(l_vcfs_exome) # l_vcfs = glob.glob('/nfs/t149_1kg/phase1_v3/ALL.chr*.phase1_release_v3.20101123.snps_indels_svs.genotypes.vcf.gz') # l_vcfs_sorted = sort_nicely(l_vcfs) # sep = '|' ## gnuplot.contour_plot( ## path_dat='INDEL_%s.dat' %(dn), ## bool_remove=False, ## xlabel='dist_m_i_n (1000bp)',ylabel='INDEL length', ## bool_log=False, ## x1=0,x2=50,y1=1,y2=20, ## ) ## stop d_lengths = loop_UG_out(l_vcfs_sorted,dn,sep) plot_contour(dn,d_lengths) s = '' for k1 in d_lengths.keys(): for k2,v in d_lengths.items(): s += '%s %s %s\n' %(k1,k2,v) fd = open('%s.dict','w') fd.write(s) fd.close() stats(d_lengths,dn,) ######## plot_length_distribution(pop,coverage,d_lengths,) return
def regression(self, pre_var, data): s = stats(self.days) #data = s.get_count_quote() X = data[pre_var] y = data["cesd_sum"] model = sm.OLS(y, X).fit() print(model.summary())
def answer(series, field, p, q, r=0, file="test.png"): df1 = series[field] plot_autocorrelations(df1) # %% df1_model = create_model(df1, p, q, r) stats(df1_model) # %% analise_model(df1_model) # %% plot_autocorrelations(df1_model.resid, "Residuals") # %% extract_resid_stats(df1_model) # %% plot_predictions(df1, df1_model, file) return df1_model
def eval_sim_err_statistics_cont(env, sup, T, num_samples=1): losses = [] for i in range(num_samples): tmp_states, int_actions, taken_actions, _ = collect_traj(env, sup, T) int_actions = np.array(int_actions) taken_actions = np.array(taken_actions) errors = (int_actions - taken_actions)**2.0 errors = np.sum(errors, axis=1) losses.append(np.mean(errors)) return stats(losses)
def mmm(): mmmArray = (re.split(',', modmedian.get())) mmmdesired_array = [int(numeric_string) for numeric_string in mmmArray] df = pd.DataFrame({ " rating1": mmmdesired_array, "dummy": range(len(mmmdesired_array)) }) f, (ax_box, ax_hist) = plt.subplots(2, sharex=True, gridspec_kw={"height_ratios": (0.2, 1)}) mean = df[' rating1'].mean() median = df[' rating1'].median() mode = df[' rating1'].mode().get_values()[0] sns.boxplot(df[" rating1"], ax=ax_box) ax_box.axvline(mean, color='r', linestyle='--') ax_box.axvline(median, color='g', linestyle='-') ax_box.axvline(mode, color='b', linestyle='-') sns.distplot(df[" rating1"], ax=ax_hist) ax_hist.axvline(mean, color='r', linestyle='--') ax_hist.axvline(median, color='g', linestyle='-') ax_hist.axvline(mode, color='b', linestyle='-') plt.legend({'Mean': mean, 'Median': median, 'Mode': mode}) label = 'Mode=' + str(mode) + ', Median=' + str(median) + ', Mean=' + str( mean) ax_box.set(xlabel=label) if (stats(mmmdesired_array) == 0.0): ax_hist.set(xlabel="Symmetric (Zero Skewness)") elif (stats(mmmdesired_array) > 1): ax_hist.set(xlabel="Skewed to the Right (Positive Skewness)") else: ax_hist.set(xlabel="Skewed to the left (Negative Skewness)") plt.show()
def F(env, pi1, pi2, sup, T, num_samples=1): losses = [] for i in range(num_samples): # collect trajectory with states visited and actions taken by agent tmp_states, _, _, _ = collect_traj(env, pi1, T) tmp_actions = np.array([pi2.intended_action(s) for s in tmp_states]) sup_actions = np.array([sup.intended_action(s) for s in tmp_states]) errors = 1.0 - np.mean(sup_actions == tmp_actions) # compute the mean error on that trajectory (may not be T samples since game ends early on failures) losses.append(np.mean(errors)) # compute the mean and sem on averaged losses. return stats(losses)
def main(): file = sys.argv[1] id = sys.argv[2] df = pd.read_csv(file) if id == 'average': df = df.drop(['student'], 1) df = df.reindex(sorted(df.columns), axis=1) dates = list(df) means = df[dates].mean() df = pd.DataFrame(means).transpose() stats(df) else: id = int(id) frame = df.loc[df['student'] == id] if frame.empty: print("Nezname student ID") return frame = frame.drop(['student'], 1) stats(frame)
def eval_agent_statistics_disc(env, agent, sup, T, num_samples=1): """ evaluate in the given environment along the agent's distribution for T timesteps on num_samples """ losses = [] for i in range(num_samples): tmp_states, _, tmp_actions, _ = collect_traj(env, agent, T) sup_actions = np.array([sup.intended_action(s) for s in tmp_states]) errors = (-(sup_actions == tmp_actions)).astype(int) losses.append(np.mean(errors)) return stats(losses)
def eval_sup_statistics_disc(env, agent, sup, T, num_samples=1): """ Evaluate on the supervisor's trajectory in the given env for T timesteps """ losses = [] for i in range(num_samples): tmp_states, _, _, _ = collect_traj(env, sup, T) tmp_actions = np.array([agent.intended_action(s) for s in tmp_states]) sup_actions = np.array([sup.intended_action(s) for s in tmp_states]) errors = (-(sup_actions == tmp_actions)).astype(int) losses.append(np.mean(errors)) return stats(losses)
def get_lda(self): """get lda topics""" s = stats(180) post = s.get_data_by_day() nonOrg = post.loc[post['label'] == 1] lyrics = nonOrg.loc[nonOrg['tag'] == 2] topic = LDATopicModel() c = Count_Vect() text = lyrics[['text', 'userid']] text['text'] = text['text'].apply(lambda x: c.remove_noise(str(x))) text = c.get_precocessed_text(text) topics, model = topic.get_lda_score(text, 10) return topics, model
def eval_agent_statistics_discrete(env, lnr, sup, T, num_samples=1): """ evaluate loss in the given environment along the agent's distribution for T timesteps on num_samples """ losses = [] for i in range(num_samples): # collect trajectory with states visited and actions taken by agent tmp_states, _, tmp_actions, _ = collect_traj(env, lnr, T) sup_actions = np.array([sup.intended_action(s) for s in tmp_states]) errors = 1.0 - np.mean(sup_actions == tmp_actions) # compute the mean error on that trajectory (may not be T samples since game ends early on failures) losses.append(np.mean(errors)) # compute the mean and sem on averaged losses. return stats(losses)
def hierarchical(pause=False, **kwargs): """Binary networks with overlapping nodes and hierarchies This program is an implementation of the algorithm described in the paper'Direc ted, weighted and overlapping benchmark graphs for community detection algorithm s', written by Andrea Lancichinetti and Santo Fortunato. In particular, this program is to produce binary networks with overlapping nodes and hierarchies. -N [number of nodes] -k [average degree] -maxk [maximum degree] -t1 [minus exponent for the degree sequence] -t2 [minus exponent for the community size distribution] -minc [minimum for the micro community sizes] -maxc [maximum for the micro community sizes] -on [number of overlapping nodes] -om [number of memberships of the overlapping nodes] -minC [minimum for the macro community size] -maxC [maximum for the macro community size] -mu1 [mixing parameter for the macro communities (see Readme file)] -mu2 [mixing parameter for the micro communities (see Readme file)] Example2: ./hbenchmark -f flags.dat ./hbenchmark -N 10000 -k 20 -maxk 50 -mu2 0.3 -minc 20 -maxc 50 -minC 100 -maxC 1000 -mu1 0.1 """ prog = _get_file('lfr_benchmarks/new/hierarchical_bench2_2/hbenchmark') args = [prog] + makeargs(kwargs) print "Arguments are: ", " ".join(args) with tmpdir_context(chdir=True, prefix="tmp-lfrbenchmark", dir=tmpbase): retcode = subprocess.call(args) assert retcode == 0 g = networkx.Graph() for n, c in read_file('community_first_level.dat'): g.add_node(n - 1, microC=c - 1) for n, c in read_file('community_second_level.dat'): g.add_node(n - 1, macroC=c - 1) for n1, n2 in read_file('network.dat'): g.add_edge(n1 - 1, n2 - 1) g.graph['stats'] = stats(g) if pause: import fitz.interact fitz.interact.interact() return g
def hierarchical(pause=False, **kwargs): """Binary networks with overlapping nodes and hierarchies This program is an implementation of the algorithm described in the paper'Direc ted, weighted and overlapping benchmark graphs for community detection algorithm s', written by Andrea Lancichinetti and Santo Fortunato. In particular, this program is to produce binary networks with overlapping nodes and hierarchies. -N [number of nodes] -k [average degree] -maxk [maximum degree] -t1 [minus exponent for the degree sequence] -t2 [minus exponent for the community size distribution] -minc [minimum for the micro community sizes] -maxc [maximum for the micro community sizes] -on [number of overlapping nodes] -om [number of memberships of the overlapping nodes] -minC [minimum for the macro community size] -maxC [maximum for the macro community size] -mu1 [mixing parameter for the macro communities (see Readme file)] -mu2 [mixing parameter for the micro communities (see Readme file)] Example2: ./hbenchmark -f flags.dat ./hbenchmark -N 10000 -k 20 -maxk 50 -mu2 0.3 -minc 20 -maxc 50 -minC 100 -maxC 1000 -mu1 0.1 """ prog = _get_file('lfr_benchmarks/new/hierarchical_bench2_2/hbenchmark') args = [ prog ] + makeargs(kwargs) print "Arguments are: ", " ".join(args) with tmpdir_context(chdir=True, prefix="tmp-lfrbenchmark", dir=tmpbase): retcode = subprocess.call(args) assert retcode == 0 g = networkx.Graph() for n, c in read_file('community_first_level.dat'): g.add_node(n-1, microC=c-1) for n, c in read_file('community_second_level.dat'): g.add_node(n-1, macroC=c-1) for n1, n2 in read_file('network.dat'): g.add_edge(n1-1, n2-1) g.graph['stats'] = stats(g) if pause: import fitz.interact ; fitz.interact.interact() return g
def toHTML(logDirName): logDir = os.getcwd() + '/' + 'table' + '-' + logDirName out = open(logDirName + '.html', 'w') out.write('<html><body>') prefs = [] for root, dirs, files in os.walk(logDir): for name in files: if re.compile(".*-.*.out$").match(name): prefs.append(name.split('-')[0]) prefs = set(prefs) out.write('<table border=1>') out.write( "<tr><td>Experiment</td><td>xml</td><td>log</td><td>status</td><td>errors</td><td>warnings</td><td>yikes</td><td>previous</td></tr>\n" ) for p in prefs: for root, dirs, files in os.walk(logDir): for name in files: if re.compile(p + "-.*.out$").match(name): print name out.write('<tr>') out.write('<td><b>%s - %s</b></td>' % (p, name.split('-')[1])) outFile = ("%s/%s" % (root, name)) out.write('<td><a href=\"%s\">%s</a></td>' % (outFile, outFile.split('/')[-1])) out.write('<td><a href=\"%s/%s\">log</a></td>' % (root, name.replace('.out', '.log'))) out.write(' %s ' % stats(root + '/' + name)) i = 0 out.write('<td>') while '%s.%d' % (name, i) in files: out.write(' :: <a href=\"%s/%s.%d\">%d</a>' % (root, name, i, i)) i = i + 1 out.write('</td>') out.write('</tr>\n') out.write('</table>') out.write('<br>\n') out.write('</body></html>\n\n\n') out.close()
def get_elo(results): """ "results" is an array of length 2*n+1 with aggregated frequences for n games.""" results = LLRcalc.regularize(results) games, mu, var = stats(results) stdev = math.sqrt(var) # 95% confidence interval for mu mu_min = mu + Phi_inv(0.025) * stdev / math.sqrt(games) mu_max = mu + Phi_inv(0.975) * stdev / math.sqrt(games) el = elo(mu) elo95 = (elo(mu_max) - elo(mu_min)) / 2.0 los = Phi((mu - 0.5) / (stdev / math.sqrt(games))) return el, elo95, los
def computeClippedImageStats(im, low=3, high=3, ignore=None): import collections im = im[~(np.isnan(im) | np.isinf(im))] if ignore is not None: for i in ignore: im = im[im != i] tmp = im if low != 0 and high != 0 and tmp.min() != tmp.max(): _, low, upp = scipy.stats.sigmaclip(tmp, low=low, high=high) if not np.isnan(low) and not np.isnan(upp) and low != upp: tmp = im[(im > low) & (im < upp)] mean1 = np.nanmean(tmp) sig1 = np.nanstd(tmp) stats = collections.namedtuple('stats', 'mean stdev min max') return stats(mean=mean1, stdev=sig1, min=np.nanmin(im), max=np.nanmax(im))
def F2(env, pi1, pi2, sup, T, num_samples=1): losses = [] for i in range(num_samples): # collect trajectory with states visited and actions taken by agent tmp_states, _, _, _ = collect_traj(env, pi1, T) tmp_actions = np.array([pi2.intended_action(s) for s in tmp_states]) tmp_scores = np.array([pi2.decision_function(s) for s in tmp_states]) sup_actions = np.array([sup.intended_action(s) for s in tmp_states]) n = len(sup_actions) hinge = hinge_loss(sup_actions, tmp_scores) penalty = pi2.est.alpha * .5 * np.square(np.linalg.norm(pi2.est.coef_)) print("hinge: " + str(hinge)) print("penalty: " + str(penalty)) errors = hinge / n + penalty # compute the mean error on that trajectory (may not be T samples since game ends early on failures) losses.append(np.mean(errors)) # compute the mean and sem on averaged losses. return stats(losses)
def eval_sup_statistics_cont(env, agent, sup, T, num_samples=1): """ Evaluate loss on the supervisor's trajectory in the given env for T timesteps """ losses = [] for i in range(num_samples): # collect states made by the supervisor (actions are sampled so not collected) tmp_states, _, _, _ = collect_traj(env, sup, T) # get inteded actions from the agent and supervisor tmp_actions = np.array([agent.intended_action(s) for s in tmp_states]) sup_actions = np.array([sup.intended_action(s) for s in tmp_states]) errors = (sup_actions - tmp_actions)**2.0 # compute the mean error on that traj errors = np.sum(errors, axis=1) losses.append(np.mean(errors)) # generate statistics, same as above return stats(losses)
def rows(infile, outfile, minstep, maxstep): out = open(outfile,'w') data = np.loadtxt(infile,skiprows=1) for x in range(1,data.shape[1]): out.write(stats(infile,x,minstep,maxstep) + ' ' + repr(x) + '-' + repr(x+1) + '\n')
p5 = portfolio_return(data, marketcapweights)[pd.datetime(1994, 1, 1):] drag1 = p3 - p1 drag2 = p4 - p5 def stats(x): ann_mean = x.mean() * 12 ann_std = x.std() * (12**.5) geo_mean = ann_mean - (ann_std**2) / 2.0 sharpe = geo_mean / ann_std return (ann_mean, ann_std, geo_mean, sharpe) print stats(p1) print stats(p3) print stats(p4) print stats(p5) toplot = pd.concat([p1, p3, p4, p5], axis=1) toplot.columns = ["Optimised", "Handcraft", "Equal", "Market Cap"] toplot.cumsum().plot() show() p1.cumsum().plot(color="black", ls="solid") p3.cumsum().plot(color="gray", ls="solid") p4.cumsum().plot(color="black", ls="dashed") p5.cumsum().plot(color="gray", ls="dashed")
meso_array = scipy.array(meso_list) thermo_array = scipy.array(thermo_list) meso_mean = scipy.mean(meso_array) meso_std = scipy.std(meso_array) thermo_mean = scipy.mean(thermo_array) thermo_std = scipy.std(thermo_array) p_val = scipy.stats.ttest_ind(thermo_array, meso_array)[1] s = '\nMm: ' + str(meso_mean) + '\nSTDm: ' + str( meso_std) + '\nMt: ' + str(thermo_mean) + '\nSTDt: ' + str( thermo_std) + '\nP: ' + str(p_val) return s ######get the stats and write to outfile ofile = open('results5A-10k-P5050-thesisnorm-wnq.txt', 'w') nip_stats = stats(meso_nips, thermo_nips) s1 = 'Normalized N_ip: ' + nip_stats + '\n' ofile.write(s1) ratio_stats = stats(meso_ratios, thermo_ratios) s2 = '\nRatio rep atr: ' + ratio_stats + '\n' ofile.write(s2) iso_stats = stats(meso_isos, thermo_isos) s3 = '\nFraction isolated: ' + iso_stats + '\n' ofile.write(s3) branched_stats = stats(meso_branches, thermo_branches) s4 = '\nFraction branched: ' + branched_stats + '\n' ofile.write(s4) meso_nip_data = str(meso_nips) thermo_nip_data = str(thermo_nips) meso_ratio_data = str(meso_ratios) thermo_ratio_data = str(thermo_ratios)
sum = s, lines = l, mean = m, variance = v, std_dev = std_dev, min = min_value, max = max_value, median = median_value, confidence = confidence, low_limit = ci[0], high_limit = ci[1]); return result if __name__ == "__main__": parse_args() result = stats(sys.stdin, field, delimiter, skip, confidence, navalue) print_st("Field", result.field) print_st("Lines", result.lines) print_st("Mean", result.mean) print_st("Variance", result.variance) print_st("StdDev", result.std_dev) print_st("Sum", result.sum) print_st("Min", result.min) print_st("Max", result.max) print_st("Median", result.median) print_st("Confidence", result.confidence) print_st("Cnf.Itv.L", result.low_limit) print_st("Cnf.Itv.U", result.high_limit)
#plt.show() def stats(binding_data, proximity_data): n_bins = 50 hist_b, bins_b = np.histogram(binding_data, bins=np.linspace(np.min(binding_data), np.max(binding_data), n_bins)) hist_p, bins_p = np.histogram(proximity_data, bins=np.linspace(np.min(binding_data), np.max(binding_data), n_bins)) #print hist_b, hist_p #print "Binding data> mean:%s, median:%s, std:%s" %(np.mean(binding_data), np.median(binding_data), np.std(binding_data)) #print "Proximity data> mean:%s, median:%s, std:%s" %(np.mean(proximity_data), np.median(proximity_data), np.std(proximity_data)) #print scipy.stats.spearmanr(hist_b, hist_p) #print bins_b, bins_p print ks_2samp(hist_b, hist_b) print ks_2samp(hist_b, hist_p) mob_name = 'crystals_mob.txt' aln_name = 'crystals_aln.csv' #mob_name = 'models_mob.txt' #aln_name = 'models_aln.csv' mob_dict, aln_dict = get_data(mob_name, aln_name) data = combine_data(mob_dict, aln_dict) binding_data, proximity_data = mobility_stats(data) print len(binding_data), len(proximity_data) #plots(binding_data, proximity_data) stats(binding_data, proximity_data)
L = L.ravel() # obtain the indices to sort and unsort the flattened array i_sort = np.argsort(L)[::-1] i_unsort = np.argsort(i_sort) L_cumsum = L[i_sort].cumsum() L_cumsum /= L_cumsum[-1] xbins = 0.5 * (xbins[1:] + xbins[:-1]) ybins = 0.5 * (ybins[1:] + ybins[:-1]) return xbins, ybins, L_cumsum[i_unsort].reshape(shape) Msin_median,Msin_std,Msin_range =stats(Msin_samples) print('Msini med,std,range',Msin_median/(1.898*10**(30)),Msin_std,Msin_range/(1.898*10**(30))) Period_median,Period_std,Period_range=stats(Period_samples) print('Period med,std,range',Period_median,Period_std,Period_range) Tzero_median,Tzero_std,Period_range=stats(Tzero_samples) print('Tzero med,std,range',Tzero_median,Tzero_std,Period_range) #Msini_pdf=mlab.normpdf(Msin_samples,Msin_median,Msin_std) #Period_pdf=mlab.normpdf(Period_samples,Period_median,Period_std) #Tzero_pdf=mlab.normpdf(Tzero_samples,Tzero_median,Tzero_std) Msin,Period,sig_MP=compute_sigma_level(Msin_samples, Period_samples) Msin,Tzero,sig_MT=compute_sigma_level(Msin_samples, Tzero_samples) Tzero,Period,sig_TP=compute_sigma_level(Tzero_samples, Period_samples) Abest=77.47156475290217
data.append(float(item)) s = sum(data) l = len(data) m = mean(data, s) v = variance(data, m) min_value = 0 if len(data) == 0 else min(data) max_value = 0 if len(data) == 0 else max(data) std_dev = v ** 0.5 median_value = median(data) ci = mean_confidence_interval(data, confidence) return (field, l, m, v, std_dev, s, min_value, max_value, median_value, ci) if __name__ == "__main__": parse_args() field, l, m, v, std_dev, s, min_value, max_value, median_value, ci = stats(sys.stdin, field, delimiter, skip, confidence, navalue) print_st("Field", field) print_st("Lines", l) print_st("Mean", m) print_st("Variance", v) print_st("StdDev", std_dev) print_st("Sum", s) print_st("Min", min_value) print_st("Max", max_value) print_st("Median", median_value) print_st("Confidence", confidence) print_st("Cnf.Itv.L", ci[0]) print_st("Cnf.Itv.U", ci[1])
root = '/nfs/christoq_ls/nkern/C4/MassRich/TRUTH' # Load Data truth = fits.open(data_root+'/c4_cluster_truth_revH100_rev5.fits')[1].data truth['m200mean']*=1e10 truth['m200crit']*=1e10 Rich = truth['n10virm19'] Mass = truth['m200crit'] rich_low,rich_high,rich_step = 3.3,4.8,0.3 richbins = np.arange(rich_low,rich_high,rich_step) mass_low,mass_high,mass_step = 14.25,15.15,0.2 massbins = np.arange(mass_low,mass_high,mass_step) d = stats(Rich,Mass,rich_low,rich_high,rich_step,log10=False,fitline=True) globals().update(d) plot = True if plot == True: fig,ax = mp.subplots(1,2,figsize=(13,7)) ## ax[0] is MassRich p1, = ax[0].plot(X,Y,'ko',alpha=.2) ax[0].set_xlim(X.min()-.1,X.max()+.1) ax[0].set_ylim(Y.min()-.1,Y.max()+.5) ax[0].set_xlabel('log( N200 )',fontsize=15) ax[0].set_ylabel('log( Mass )',fontsize=15) ax[0].fill_between(richbins,Y.min()-.1,Y.max()+.5,alpha=.1) ax[0].plot(X,X*model.coef_+model.intercept_,'r') ## ax[1] is Scatter
def summaryStats(df, filterColumns, returnColumns, regress=None, debug=False): if regress == None: regressionColumns = [None] else: regressionColumns = [None] + regress groupby={} data = df key = 'ALL' val = 'ALL' statColumns = ['return', 'total', 'win_ct', 'lose_ct', 'win_ratio', 'lose_ratio', 'return_med', 'return_avg', 'return_stddev', 'return_min', 'return_max'] if regress != None: statColumns.reverse() statColumns.append('variable') statColumns.reverse() statColumns.append('slope') statColumns.append('intercept') statColumns.append('r') statColumns.append('r_low') statColumns.append('r_high') statColumns.append('2_tail_prob') statColumns.append('std_err') statColumns.reverse() statColumns.append('columnValue') statColumns.append('columnKey') statColumns.reverse() rows = [] for returns in returnColumns: returnsData = data.dropna(subset=[returns]) for var in regressionColumns: if var != None and len(returnsData)>0: if debug: print key, val, var, returns regressionData = returnsData.dropna(subset=[var]) tmpFilters = regressionData[var] else: regressionData = returnsData tmpFilters = [] tmpReturns = regressionData[returns] results = stats(var, tmpFilters, tmpReturns) row = {'columnKey': key, 'columnValue': val, 'variable': var, 'return': returns} row.update(results) rows.append(row) for col in filterColumns: g = data[col].unique() groupby[col] = filter( None, [v if pd.notnull(v) else None for v in g]) keys = groupby.keys() for key in keys: for val in groupby[key]: filteredData = data[data[key] == val] for returns in returnColumns: returnsData = filteredData.dropna(subset=[var]) for var in regressionColumns: if var != None: regressionData = returnsData.dropna(subset=[var]) tmpFilters = regressionData[var] else: regressionData = returnsData tmpFilters = [] tmpReturns = regressionData[returns] results = stats(var, tmpFilters, tmpReturns) row = {'columnKey': key, 'columnValue': val, 'variable': var, 'return': returns} row.update(results) rows.append(row) data = pd.DataFrame(rows, columns=statColumns) try: data['r'] = data['r'].real data['r_low'] = data['r_low'].real data['r_high'] = data['r_high'].real except: pass return data