def run_collapse(self, hosts, cmd): progress = None if self.progressbar: from progressbar import ProgressBar, Percentage, Bar, ETA, FileTransferSpeed progress = ProgressBar(widgets=[ "Running: ", Percentage(), ' ', Bar(marker='.'), ' ', ETA(), ' ', FileTransferSpeed() ], maxval=len(hosts)) codes = {"total": 0, "error": 0, "success": 0} outputs = defaultdict(list) def worker(host, cmd): p = Popen(self.get_parallel_ssh_options(host, cmd), stdout=PIPE, stderr=PIPE) o = "" while True: outs, _, _ = select([p.stdout, p.stderr], [], []) outline = errline = "" if p.stdout in outs: outline = p.stdout.readline() if p.stderr in outs: errline = p.stderr.readline() o += outline + errline if outline == "" and errline == "" and p.poll() is not None: break if o == "": o = colored("[ No Output ]\n", "yellow") outputs[o].append(host) if p.poll() == 0: codes["success"] += 1 else: codes["error"] += 1 codes["total"] += 1 if self.progressbar: progress.update(codes["total"]) pool = Pool(self.ssh_threads) if self.progressbar: progress.start() for host in hosts: pool.start(Greenlet(worker, host, cmd)) try: pool.join() except KeyboardInterrupt: pass if self.progressbar: progress.finish() self.print_exec_results(codes) print() for output, hosts in outputs.items(): msg = " %s " % ','.join(hosts) table_width = min([len(msg) + 2, terminal_size()[0]]) cprint("=" * table_width, "blue", attrs=["bold"]) cprint(msg, "blue", attrs=["bold"]) cprint("=" * table_width, "blue", attrs=["bold"]) print(output)
def NETSGraph(results, NETS_edges, node_labeler, node_type, edge_labeler): ''' Function takes a json file of query results, a list of NETS edges, node and edge metadata dictionaries, and a dictionary containing NETS edge information by BIO node. Using these items the function creates the directed OWL-NETS abstraction network. Node metadata includes: labels (a list of human readable labels); id (the endpoint database identifiers); and bio (the NETS node type). Edge metadata includes: labels (human readable label for the edge between two NETS nodes) and id (the ontology concept term used to link the NETS nodes). :param results: json file containing the query results from endpoint :param NETS_edges: list of lists, where each list is a NETS edge and the order specifies a directional relationship :param node_labeler: node metadata nested lists (list[0] contains the NETS nodes label triples, list[1] contains the contains the NETS nodes identifier triples) :param node_type: dictionary with BIO node as key and set of NETS node types as value :param edge_labeler: dictionary where the keys are the NETS edges and the values are the edge labels :return: OWL-NETS directed graph ''' print 'Started building OWL-NETS graph' # initialize progress bar progress bar widgets = [Percentage(), Bar(), FormatLabel('(elapsed: %(elapsed)s)')] pbar = ProgressBar(widgets=widgets, maxval=len(results['results']['bindings'])) NETS_graph = nx.DiGraph() for res in pbar(results['results']['bindings']): for edge in NETS_edges: i = res[str(edge[0].strip('?').encode('utf8'))]['value'].encode('utf8') j = res[str(edge[1].strip('?').encode('utf8'))]['value'].encode('utf8') # set nodes NETS_graph.add_node(min(node_labeler[edge[0].strip('?')][i]['label'], key=len), labels=node_labeler[edge[0].strip('?')][i]['label'], id=node_labeler[edge[0].strip('?')][i]['id'], bio=i, type='-'.join(list(node_type[i]))) # gets second node in edge NETS_graph.add_node(min(node_labeler[edge[1].strip('?')][j]['label'], key=len), labels=node_labeler[edge[1].strip('?')][j]['label'], id=node_labeler[edge[1].strip('?')][j]['id'], bio=j, type='-'.join(list(node_type[j]))) # add edge NETS_graph.add_edge(min(node_labeler[edge[0].strip('?')][i]['label'], key=len), min(node_labeler[edge[1].strip('?')][j]['label'], key=len), labels=res[(edge_labeler[tuple(edge)]['label']).strip('?')]['value'].encode('utf8'), id=(edge_labeler[tuple(edge)]['id']).strip('?'), edge='-'.join([edge[0].strip('?'), edge[1].strip('?')])) # closes first progress bar pbar.finish() print 'Finished building OWL-NETS graph' print '\n' # print information about graph print 'Directed OWL-NETS Graph has ' + str(len(NETS_graph.nodes())) + ' nodes, ' + str( len(NETS_graph.edges())) + ' edges, and ' + str( nx.number_connected_components(NETS_graph.to_undirected())) + ' connected component(s)' return NETS_graph
def _do_distribution(self, data, spectrograms, name): pbar = None try: from progressbar import ProgressBar, Percentage, Bar pbar = ProgressBar(widgets=[Percentage(), Bar()], maxval=len(data.track_names)) pbar.start() except Exception as e: pass if config.learn_phase: if not os.path.exists(self.analysisPath): os.mkdir(self.analysisPath) h5f_path = os.path.join(self.analysisPath, "distribution_ir_%s.hdf5" % name) h5file = h5py.File(h5f_path, "w") h5real = h5file.create_group("real") h5imag = h5file.create_group("imag") plt.figure(figsize=(15, 15)) plt.suptitle(name) ax1 = plt.subplot(231) bins = self._do_distribution_plot(pbar, h5real, data, spectrograms, None, "upper", "real") plt.subplot(232, sharey=ax1, sharex=ax1) self._do_distribution_plot(pbar, h5real, data, spectrograms, bins, "center", "real") plt.subplot(233, sharey=ax1, sharex=ax1) self._do_distribution_plot(pbar, h5real, data, spectrograms, bins, "lower", "real") ax1 = plt.subplot(234) bins = self._do_distribution_plot(pbar, h5imag, data, spectrograms, None, "upper", "imag") plt.subplot(235, sharey=ax1, sharex=ax1) self._do_distribution_plot(pbar, h5imag, data, spectrograms, bins, "center", "imag") plt.subplot(236, sharey=ax1, sharex=ax1) self._do_distribution_plot(pbar, h5imag, data, spectrograms, bins, "lower", "imag") h5file.close() if not os.path.exists(self.analysisPath): os.mkdir(self.analysisPath) plt.savefig(os.path.join(self.analysisPath, "distribution_%s_ir.png" % name)) plt.close() else: if not os.path.exists(self.analysisPath): os.mkdir(self.analysisPath) h5f_path = os.path.join(self.analysisPath, "distribution_amplitude_%s.hdf5" % name) h5file = h5py.File(h5f_path, "w") plt.figure(figsize=(15, 15)) plt.suptitle(name) ax1 = plt.subplot(131) bins = self._do_distribution_plot(pbar, h5file, data, spectrograms, None, "upper") plt.subplot(132, sharey=ax1, sharex=ax1) self._do_distribution_plot(pbar, h5file, data, spectrograms, bins, "center") plt.subplot(133, sharey=ax1, sharex=ax1) self._do_distribution_plot(pbar, h5file, data, spectrograms, bins, "lower") h5file.close() if not os.path.exists(self.analysisPath): os.mkdir(self.analysisPath) plt.savefig(os.path.join(self.analysisPath, "distribution_%s_amplitude.png" % name)) plt.close()
triggers = zapi.trigger.get( output=['description', 'triggerid'], hostids=[h[0]['hostid']], expandDescription=1, search={'description': ': {0}'.format(host['trigger'])}) logger.info('Found {0} triggers for host {1}'.format( triggers.__len__(), host['host'])) logger.print_json(triggers) for t in triggers: maintenance_triggers_ids.append(t['triggerid']) i = 0 logger.info('Found {0} triggers'.format(maintenance_triggers_ids.__len__())) bar = ProgressBar( maxval=maintenance_triggers_ids.__len__(), widgets=[Percentage(), ReverseBar(), ETA(), RotatingMarker(), Timer()]).start() for t in maintenance_triggers_ids: if args.run == True: out = zapi.trigger.update(triggerid=t, status=args.status) i += 1 bar.update(i) else: logger.warning('Should change triggerid {0} to status {1}'.format( t, args.status)) bar.finish() zapi.user.logout()
tempid = np.zeros(n) dist = np.zeros(n) # construct the KDTree from the centroid nodes print('Constructing KDTree object from centroid nodes ...') source = np.column_stack((centroid_x, centroid_y)) tree = spatial.cKDTree(source) # used for FEM shape function ones = np.ones(3) # the list that stores the triangle polygon for a particular TIN element poly = list() # for the progress bar w = [Percentage(), Bar(), ETA()] pbar = ProgressBar(widgets=w, maxval=n).start() print('Searching using KDTree ...') for i in range(len(x)): # just do for one node for now d, idx = tree.query((x[i], y[i]), k=neigh) # instead of specifying number of neighbours, specify search radius #idx = tree.query_ball_point( (m_x[i],m_y[i]), neigh) # reconstruct a poly out of the tin element for each idx not_found = 0 for j in range(len(idx)): # find the area of each triangle in the search space x1 = t_x[t_ikle[idx[j], 0]]
predict_span = 50 grid_circ = 7 data_dir = "../data/h5_test/{}_{}".format(start_string, end_string) if not os.path.exists(data_dir): os.makedirs(data_dir) aq_count = 0 print("\nFetching data to export...") for aq_name in aq_location.keys(): aggregate = 0 ti.sleep(0.1) bar = PB(initial_value=0, maxval=delta_time + 1, widgets=[aq_name, ' ', Bar('=', '[', ']'), ' ', Percentage()]) valid_count = 0 near_grids, grid_coor_array = get_grids(aq_name, grid_circ) # Validate the near grid matrix algorithm # plt.figure() # plt.title(aq_name) # plt.plot(aq_location[aq_name][0], aq_location[aq_name][1], '.') # plt.plot(grid_coor_array[:, 0], grid_coor_array[:, 1], '.') # plt.show() # Exporting data from start to end grid_matrix = [] history_matrix = [] predict_matrix = []
def pleiopred_genomewide(data_file_D1, data_file_D2, alpha, Pi, init_betas_prefix, ld_radius=None, ld_dict=None, out_file_prefix=None, n1=None, n2=None, PRF=None, num_iter=60, burn_in=10, zero_jump_prob=0.05, user_h1=None, user_h2=None): """ Calculate LDpred for a genome """ prf_chr = PRF['chrom'] prf_sids = PRF['sids'] h2_D1 = PRF['h2_D1'] h2_D2 = PRF['h2_D2'] df1 = h5py.File(data_file_D1, 'r') df2 = h5py.File(data_file_D2, 'r') cord_data_g1 = df1['cord_data'] cord_data_g2 = df2['cord_data'] has_phenotypes1 = False if 'y' in df1.keys(): 'Validation phenotypes of disease 1 found.' y1 = df1['y'][...] # Phenotype num_individs1 = len(y1) prs_D1 = sp.zeros(num_individs1) has_phenotypes1 = True has_phenotypes2 = False if 'y' in df2.keys(): 'Validation phenotypes of disease 2 found.' y2 = df2['y'][...] # Phenotype num_individs2 = len(y2) prs_D2 = sp.zeros(num_individs2) has_phenotypes2 = True ld_scores_dict = ld_dict['ld_scores_dict'] chrom_ld_dict = ld_dict['chrom_ld_dict'] chrom_ref_ld_mats = ld_dict['chrom_ref_ld_mats'] chrom_snps = ld_dict['chrom_snps'] chrom_snpids = ld_dict['chrom_snpids'] chrom_betas1 = ld_dict['chrom_betas1'] chrom_betas2 = ld_dict['chrom_betas2'] num_snps1 = 0 sum_beta2s1 = 0 num_snps2 = 0 sum_beta2s2 = 0 chr_list = list(set(cord_data_g1.keys()) & set(cord_data_g2.keys())) for chrom_str in chromosomes_list: if chrom_str in chr_list: betas1 = chrom_betas1[chrom_str] n_snps1 = len(betas1) num_snps1 += n_snps1 sum_beta2s1 += sp.sum(betas1**2) betas2 = chrom_betas2[chrom_str] n_snps2 = len(betas2) num_snps2 += n_snps2 sum_beta2s2 += sp.sum(betas2**2) if user_h1 is None or user_h2 is None: L1 = ld_scores_dict['avg_gw_ld_score'] chi_square_lambda1 = sp.mean(n1 * sum_beta2s1 / float(num_snps1)) print 'Genome-wide lambda inflation of D1:', chi_square_lambda1 print 'Genome-wide mean LD score of D1:', L1 gw_h2_ld_score_est1 = max(0.0001, (max(1, chi_square_lambda1) - 1) / (n1 * (L1 / num_snps1))) print 'Estimated genome-wide heritability of D1:', gw_h2_ld_score_est1 assert chi_square_lambda1 > 1, 'Something is wrong with the GWAS summary statistics of D1. Perhaps there were issues parsing of them, or the given GWAS sample size (N) was too small. Either way, lambda (the mean Chi-square statistic) is too small. ' L2 = ld_scores_dict['avg_gw_ld_score'] chi_square_lambda2 = sp.mean(n2 * sum_beta2s2 / float(num_snps2)) print 'Genome-wide lambda inflation of D2:', chi_square_lambda2 print 'Genome-wide mean LD score of D2:', L2 gw_h2_ld_score_est2 = max(0.0001, (max(1, chi_square_lambda2) - 1) / (n2 * (L2 / num_snps2))) print 'Estimated genome-wide heritability of D2:', gw_h2_ld_score_est2 assert chi_square_lambda2 > 1, 'Something is wrong with the GWAS summary statistics of D2. Perhaps there were issues parsing of them, or the given GWAS sample size (N) was too small. Either way, lambda (the mean Chi-square statistic) is too small. ' else: gw_h2_ld_score_est1 = user_h1 gw_h2_ld_score_est2 = user_h2 h2_new1 = sp.sum(h2_D1) sig_12_D1 = (1.0) / n1 pr_sig1 = {} h2_new2 = sp.sum(h2_D2) sig_12_D2 = (1.0) / n2 pr_sig2 = {} post_betas1 = {} post_betas2 = {} out1 = [] out1.append('Estimated Genome-wide heritability: ' + str(gw_h2_ld_score_est1) + '\n') out1.append('Posterior variance for each snp: ' + str(sig_12_D1) + '\n') out2 = [] out2.append('Estimated Genome-wide heritability: ' + str(gw_h2_ld_score_est2) + '\n') out2.append('Posterior variance for each snp: ' + str(sig_12_D2) + '\n') ## main calculation, chr by chr, posterior betas and prs ## beta1_current = chrom_betas1 beta2_current = chrom_betas2 for chrom_str in chromosomes_list: if chrom_str in chr_list: print 'Preparing annotation-based priors for Chromosome %s' % ( (chrom_str.split('_'))[1]) pval_derived_betas1 = chrom_betas1[chrom_str] pval_derived_betas2 = chrom_betas2[chrom_str] sids = chrom_snpids[chrom_str] n_snps_chrom = len(sids) chri = int(chrom_str.split('_')[1]) prf_sids_chri = prf_sids[prf_chr == chri] h2_D1_chri = h2_D1[prf_chr == chri] h2_D2_chri = h2_D2[prf_chr == chri] if len(prf_sids_chri) == len(sids): if sum(prf_sids_chri == sids) == len(prf_sids_chri): pr_sig1[chrom_str] = sp.copy(h2_D1_chri) pr_sig2[chrom_str] = sp.copy(h2_D2_chri) else: print 'sorting prior files' pr_sig1[chrom_str] = sp.zeros(len(sids)) pr_sig2[chrom_str] = sp.zeros(len(sids)) for i, sid in enumerate(sids): pr_sig1[chrom_str][i] = h2_D1_chri[prf_sids_chri == sid] pr_sig2[chrom_str][i] = h2_D2_chri[prf_sids_chri == sid] else: print 'extracting prior files' pr_sig1[chrom_str] = sp.zeros(len(sids)) pr_sig2[chrom_str] = sp.zeros(len(sids)) for i, sid in enumerate(sids): pr_sig1[chrom_str][i] = h2_D1_chri[prf_sids_chri == sid] pr_sig2[chrom_str][i] = h2_D2_chri[prf_sids_chri == sid] pr_sig1[ chrom_str] = gw_h2_ld_score_est1 * pr_sig1[chrom_str] / h2_new1 pr_sig2[ chrom_str] = gw_h2_ld_score_est2 * pr_sig2[chrom_str] / h2_new2 ########################### using AnnoPred-baseline as initial values ############################### init_betas_path = '%s.pickled.gz' % init_betas_prefix if not os.path.isfile(init_betas_path): print 'No initial values for mcmc found, generating ... ' anno_post1 = {} anno_post2 = {} for chrom_str in chromosomes_list: if chrom_str in chr_list: pval_derived_betas1 = chrom_betas1[chrom_str] pval_derived_betas2 = chrom_betas2[chrom_str] annopred_betas1 = annopred_inf( pval_derived_betas1, pr_sigi=pr_sig1[chrom_str], reference_ld_mats=chrom_ref_ld_mats[chrom_str], n=n1, ld_window_size=2 * ld_radius) annopred_betas2 = annopred_inf( pval_derived_betas2, pr_sigi=pr_sig2[chrom_str], reference_ld_mats=chrom_ref_ld_mats[chrom_str], n=n2, ld_window_size=2 * ld_radius) anno_post1[chrom_str] = annopred_betas1 anno_post2[chrom_str] = annopred_betas2 init_betas = {'anno_post1': anno_post1, 'anno_post2': anno_post2} f = gzip.open(init_betas_path, 'wb') cPickle.dump(init_betas, f, protocol=2) f.close() print 'LD information is now pickled at %s' % init_betas_path else: print 'Loading initial values for mcmc from file: %s' % init_betas_path f = gzip.open(init_betas_path, 'r') init_betas = cPickle.load(f) f.close() #### initial values #### print 'Preparing initial values for MCMC' beta1_current = init_betas['anno_post1'] beta2_current = init_betas['anno_post2'] avg_betas1 = {} avg_betas2 = {} avg_PV = sp.zeros(4) for chrom_str in chromosomes_list: if chrom_str in chr_list: avg_betas1[chrom_str] = sp.zeros(len(chrom_betas1[chrom_str])) avg_betas2[chrom_str] = sp.zeros(len(chrom_betas2[chrom_str])) # Pi = sp.random.dirichlet((alpha,alpha,alpha,alpha),1).flatten() print 'Initial PV: (' + str(Pi[0]) + ', ' + str(Pi[1]) + ', ' + str( Pi[2]) + ', ' + str(Pi[3]) + ')' sp.savetxt('%s_Initial_PV' % (out_file_prefix) + '.txt', Pi) pb = 0 pbar = ProgressBar(widgets=[Percentage(), ' ', Bar(), " ", Timer()], maxval=num_iter * 22).start() for k in range(num_iter): #Big iteration A1 = 0 A2 = 0 A3 = 0 A4 = 0 for chrom_str in chromosomes_list: if chrom_str in chr_list: posterior_betas = post_betas.bi_mcmc_all_chr( chrom_betas1[chrom_str], chrom_betas2[chrom_str], Pi=Pi, pr_sig1=pr_sig1[chrom_str], pr_sig2=pr_sig2[chrom_str], start_betas1=beta1_current[chrom_str], start_betas2=beta2_current[chrom_str], h2_D1=gw_h2_ld_score_est1 * (n_snps_chrom / float(num_snps1)), n1=n1, h2_D2=gw_h2_ld_score_est2 * (n_snps_chrom / float(num_snps2)), n2=n2, ld_radius=ld_radius, zj_p=zero_jump_prob, ld_dict1=chrom_ld_dict[chrom_str], ld_dict2=chrom_ld_dict[chrom_str]) A1 += posterior_betas['A1'] A2 += posterior_betas['A2'] A3 += posterior_betas['A3'] A4 += posterior_betas['A4'] beta1_current[chrom_str] = posterior_betas['proposed_betas1'] beta2_current[chrom_str] = posterior_betas['proposed_betas2'] if k >= burn_in: avg_betas1[chrom_str] += posterior_betas[ 'curr_post_means1'] #Averaging over the posterior means instead of samples. avg_betas2[chrom_str] += posterior_betas[ 'curr_post_means2'] pb = pb + 1 pbar.update(pb) Pi = sp.random.dirichlet( (alpha[0] + A1, alpha[1] + A2, alpha[2] + A3, alpha[3] + A4), 1).flatten() if k >= burn_in: avg_PV += Pi pbar.finish() ## prs and auc ## avg_PV = avg_PV / float(num_iter - burn_in) print 'Posterior PV: (' + str(avg_PV[0]) + ', ' + str( avg_PV[1]) + ', ' + str(avg_PV[2]) + ', ' + str(avg_PV[3]) + ')' sp.savetxt('%s_Avg_PV' % (out_file_prefix) + '.txt', avg_PV) for chrom_str in chromosomes_list: if chrom_str in chr_list: avg_betas1[chrom_str] = avg_betas1[chrom_str] / float(num_iter - burn_in) avg_betas2[chrom_str] = avg_betas2[chrom_str] / float(num_iter - burn_in) if has_phenotypes1: prs_chr_D1 = sp.dot(avg_betas1[chrom_str], chrom_snps[chrom_str]) prs_D1 += prs_chr_D1 if has_phenotypes2: prs_chr_D2 = sp.dot(avg_betas2[chrom_str], chrom_snps[chrom_str]) prs_D2 += prs_chr_D2 ############ PleioPred results ############# corr_inf1 = sp.corrcoef(y1, prs_D1)[0, 1] r2_inf1 = corr_inf1**2 #results_dict[p_str]['r2_pd']=r2_inf print 'D1: the R2 prediction accuracy (observed scale) of PleioPred was: %0.4f (%0.6f)' % ( r2_inf1, ((1 - r2_inf1)**2) / num_individs1) out1.append( 'D1: the R2 prediction accuracy (observed scale) of PleioPred was: ' + str(r2_inf1) + ' (' + str(((1 - r2_inf1)**2) / num_individs1) + ')\n') if corr_inf1 < 0: prs_D1 = -1 * prs_D1 auc1 = pred_accuracy(y1, prs_D1) print 'D1: PleioPred AUC for the whole genome was: %0.4f' % auc1 out1.append('D1: PleioPred AUC for the whole genome was: ' + str(auc1) + '\n') out1.append('D1: PleioPred COR for the whole genome was: ' + str(corr_inf1) + '\n') sp.savetxt('%s_y_' % (out_file_prefix) + '_D1.txt', y1) sp.savetxt('%s_prs' % (out_file_prefix) + '_PleioPred_D1.txt', prs_D1) #Now calibration ff_inf = open('%s_auc_' % (out_file_prefix) + '_PleioPred_D1.txt', "w") ff_inf.writelines(out1) ff_inf.close() corr_inf2 = sp.corrcoef(y2, prs_D2)[0, 1] r2_inf2 = corr_inf2**2 #results_dict[p_str]['r2_pd']=r2_inf print 'D2: the R2 prediction accuracy (observed scale) of PleioPred was: %0.4f (%0.6f)' % ( r2_inf2, ((1 - r2_inf2)**2) / num_individs2) out2.append( 'D2: the R2 prediction accuracy (observed scale) of PleioPred was: ' + str(r2_inf2) + ' (' + str(((1 - r2_inf2)**2) / num_individs2) + ')\n') if corr_inf2 < 0: prs_D2 = -1 * prs_D2 auc2 = pred_accuracy(y2, prs_D2) print 'D2: PleioPred AUC for the whole genome was: %0.4f' % auc2 out2.append('D2: PleioPred AUC for the whole genome was: ' + str(auc2) + '\n') out2.append('D2: PleioPred COR for the whole genome was: ' + str(corr_inf2) + '\n') sp.savetxt('%s_y_' % (out_file_prefix) + '_D2.txt', y2) sp.savetxt('%s_prs' % (out_file_prefix) + '_PleioPred_D2.txt', prs_D2) #Now calibration ff_inf = open('%s_auc_' % (out_file_prefix) + '_PleioPred_D2.txt', "w") ff_inf.writelines(out2) ff_inf.close() f = gzip.open('%s_betas' % (out_file_prefix) + '_PleioPred_D1.pickled.gz', 'wb') cPickle.dump(avg_betas1, f, protocol=2) f.close() f = gzip.open('%s_betas' % (out_file_prefix) + '_PleioPred_D2.pickled.gz', 'wb') cPickle.dump(avg_betas2, f, protocol=2) f.close()
def progressUpdate(): pb = ProgressBar(maxval = 1, widgets = [Bar(), ' ', Percentage(), ' ', ETA()], fd = sys.stdout) while p.progress < 1: pb.update(p.progress) time.sleep(0.5) pb.finish()
def simulation(params, progressbar=True): ntry = params["ntry"] maxEpk = params["maxEpok"] normalize_data = params["normalize"] scale_data = params["scale"] crossvalidation_pct = params["cross_validation_percentage"] learningRate = params["algorithm"]["params"]["learning_rate"] moment = params["algorithm"]["params"]["momentum"] bet_simulation = params["bet_simulation"] # dataXAll, dataYAll, cotationAll = buildDataset() dataXAll, dataYAll, cotationAll = buildDataset( params["dataset"]["src"], params["dataset"]["features"], bet_simulation, mongolab=True) ################ ! dataXAll = [[float(x) for x in row] for row in dataXAll] dataYAll = [[float(x) for x in row] for row in dataYAll] nfeatures = len(dataXAll[0]) ################################# # game_issue = [1, 0, -1] # for i in range(0, len(dataXAll)): # print ",".join(str(x) for x in [dataXAll[i] + cotationAll[i] + [game_issue[max(enumerate(dataYAll[i]), key=operator.itemgetter(1))[0]]]]) # exit (-1) ################################# # stat variable init winrate_history_train = list() winrate_history = list() money_post_crossval_history = list() money_during_crossval_history = list() odds_during_crossval_history = list() prediction_during_crossval_history = list() predict_interpret_during_crossval_history = list() expected_during_crossval_history = list() init_state = { "moneyBase": params["start_money"], "pct_bet": params["percentage_bet"], "simult_bet": params["simult_bet"] } # / stat variable init # Progress bar init if progressbar is True: widgets_pb = [Percentage(), ' ', Bar(), ' ', ETA()] pbar = ProgressBar(widgets=widgets_pb, maxval=ntry) pbar.start() # / Progress bar init for n in range(0, ntry): ds = SupervisedDataSet(nfeatures, 3) dataX = list(dataXAll) dataY = list(dataYAll) cotations = list(cotationAll) # crossvalidation data construction PICK LAST datapX = list() datapY = list() cotationpHDA = list() extracti = int(len(dataX) - (crossvalidation_pct * len(dataX))) datapX = dataX[extracti:len(dataX)] datapY = dataY[extracti:len(dataY)] cotationpHDA = cotations[extracti:len(cotations)] dataX = dataX[0:extracti] dataY = dataY[0:extracti] cotations = cotations[0:extracti] # / crossvalidation data construction # crossvalidation randomization if params["cross_validation_randomize"] is True: combined_crossval_data = zip(datapX, datapY, cotationpHDA) random.shuffle(combined_crossval_data) datapX, datapY, cotationpHDA = zip(*combined_crossval_data) # / crossvalidation randomization # scalarization && normalization --> # http://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html && # http://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.Normalizer.html scalizer = None normalizer = None if scale_data == True: scalizer = preprocessing.StandardScaler().fit(dataX) dataX = scalizer.transform(dataX) if normalize_data == True: normalizer = preprocessing.Normalizer().fit(dataX) dataX = normalizer.transform(dataX) # / scalarization && normalization # training dataset construction for i in range(0, len(dataX)): ds.addSample(dataX[i], dataY[i]) # / training dataset construction # nn && trainer construction net = buildNetwork(ds.indim, (ds.indim + ds.outdim) / 2, (ds.indim + ds.outdim) / 2, ds.outdim, bias=True, outclass=SoftmaxLayer) # building the n trainer = BackpropTrainer(net, ds, learningrate=learningRate, momentum=moment, verbose=False) # building the trainer # / nn && trainer construction # training trainer.trainUntilConvergence( maxEpochs=maxEpk) # Train, until convergence # for epoch in range(0,1000): # trainer.train() # / training # crossvalidation on training data post_crossval_state = crossvalidation(net, init_state, dataX, dataY, cotations, scalizer, normalizer, False) # / crossvalidation on training data # post crossvalidation training data data register winrate = post_crossval_state["win"] / float(len(dataX)) winrate_history_train.append(winrate) # / post crossvalidation training data data register # crossvalidation post_crossval_state = crossvalidation(net, init_state, datapX, datapY, cotationpHDA, scalizer, normalizer, bet_simulation) # / crossvalidation # post unit crossvalidation data register winrate = post_crossval_state["win"] / float(len(datapX)) winrate_history.append(winrate) money_post_crossval_history.append(post_crossval_state["money"]) money_during_crossval_history.append( post_crossval_state["money_during_crossval_history"]) odds_during_crossval_history.append( post_crossval_state["odds_during_crossval_history"]) prediction_during_crossval_history.append( post_crossval_state["prediction_during_crossval_history"]) predict_interpret_during_crossval_history.append( post_crossval_state["predict_interpret_during_crossval_history"]) expected_during_crossval_history.append( post_crossval_state["expected_during_crossval_history"]) # / post unit crossvalidation data register if progressbar is True: pbar.update(n + 1) # scipy.describe instantiation winrate_history_describe_train = scipy.stats.describe( winrate_history_train) winrate_history_describe = scipy.stats.describe(winrate_history) money_post_crossval_history_describe = scipy.stats.describe( money_post_crossval_history) # / scipy.describe instantiation if progressbar is True: pbar.finish() results = { "win_percentage_training": { "median": numpy.median(numpy.array(winrate_history_train)), "standard_deviation": numpy.std(numpy.array(winrate_history_train)), "variance": numpy.var(numpy.array(winrate_history_train)), "mode": scipy.stats.mstats.mode( [round(w, 2) for w in winrate_history_train]), "describe": { "nobs": winrate_history_describe_train[0], "min": winrate_history_describe_train[1][0], "max": winrate_history_describe_train[1][1], "mean": winrate_history_describe_train[2], "variance": winrate_history_describe_train[3], "skewness": winrate_history_describe_train[4], "kurtosis": winrate_history_describe_train[5] }, "normal_test": scipy.stats.normaltest(winrate_history_train), "histogram": scipy.stats.histogram(winrate_history_train), "lst": winrate_history_train }, "win_percentage": { "median": numpy.median(numpy.array(winrate_history)), "standard_deviation": numpy.std(numpy.array(winrate_history)), "variance": numpy.var(numpy.array(winrate_history)), "mode": scipy.stats.mstats.mode([round(w, 2) for w in winrate_history]), "describe": { "nobs": winrate_history_describe[0], "min": winrate_history_describe[1][0], "max": winrate_history_describe[1][1], "mean": winrate_history_describe[2], "variance": winrate_history_describe[3], "skewness": winrate_history_describe[4], "kurtosis": winrate_history_describe[5] }, "normal_test": scipy.stats.normaltest(winrate_history), "histogram": scipy.stats.histogram(winrate_history), "lst": winrate_history }, "money_during_cross_validation": { "min": min([ item for sublist in money_during_crossval_history for item in sublist ]), "max": max([ item for sublist in money_during_crossval_history for item in sublist ]), "lst": money_during_crossval_history }, "odds_during_crossval_history": { "lst": odds_during_crossval_history }, "prediction_during_crossval_history": { "lst": prediction_during_crossval_history }, "predict_interpret_during_crossval_history": { "lst": predict_interpret_during_crossval_history }, "expected_during_crossval_history": { "lst": expected_during_crossval_history }, "money_post_cross_validation": { "median": numpy.median(numpy.array(money_post_crossval_history)), "standard_deviation": numpy.std(numpy.array(money_post_crossval_history)), "variance": numpy.var(numpy.array(money_post_crossval_history)), "mode": scipy.stats.mstats.mode( [round(m, 1) for m in money_post_crossval_history]), "describe": { "nobs": money_post_crossval_history_describe[0], "min": money_post_crossval_history_describe[1][0], "max": money_post_crossval_history_describe[1][1], "mean": money_post_crossval_history_describe[2], "variance": money_post_crossval_history_describe[3], "skewness": money_post_crossval_history_describe[4], "kurtosis": money_post_crossval_history_describe[5] }, "normal_test": scipy.stats.normaltest(money_post_crossval_history), "histogram": scipy.stats.histogram(money_post_crossval_history), "lst": money_post_crossval_history } } return results
def train_model(sess, xtrain, ytrain, tf_seed, np_seed, xvalid=False, yvalid=False, n_epochs=False): max_overfitting = setup['patience'] * setup['hyperparam_eval_interval'] best_val_acc = float("inf") last_save = 0 overfitting = 0 N, dim = xtrain.shape iter_per_epoch = int(N / 100) input_shape = [None, dim] x = tf.placeholder(tf.float32, input_shape, name='x') y_ = tf.placeholder(tf.float32, [None, 1], name='y_') model = MNFMC(N, input_shape=input_shape, flows_q=FLAGS.fq, flows_r=FLAGS.fr, use_z=not FLAGS.no_z, learn_p=FLAGS.learn_p, thres_var=FLAGS.thres_var, flow_dim_h=FLAGS.flow_h) tf.set_random_seed(tf_seed) np.random.seed(np_seed) y = model.predict(x) yd = model.predict(x, sample=False) pyx = y with tf.name_scope('KL_prior'): regs = model.get_reg() tf.summary.scalar('KL prior', regs) with tf.name_scope('cross_entropy'): cross_entropy = tf.reduce_mean(tf.losses.mean_squared_error(y, y_, weights=0.5)) tf.summary.scalar('Loglike', cross_entropy) global_step = tf.Variable(0, trainable=False) if FLAGS.anneal: number_zero, original_zero = FLAGS.epzero, FLAGS.epochs / 2 with tf.name_scope('annealing_beta'): max_zero_step = number_zero * iter_per_epoch original_anneal = original_zero * iter_per_epoch beta_t_val = tf.cast((tf.cast(global_step, tf.float32) - max_zero_step) / original_anneal, tf.float32) beta_t = tf.maximum(beta_t_val, 0.) annealing = tf.minimum(1., tf.cond(global_step < max_zero_step, lambda: tf.zeros((1,))[0], lambda: beta_t)) tf.summary.scalar('annealing beta', annealing) else: annealing = 1. with tf.name_scope('lower_bound'): lowerbound = cross_entropy + annealing * regs tf.summary.scalar('Lower bound', lowerbound) train_step = tf.train.AdamOptimizer(learning_rate=FLAGS.lr).minimize(lowerbound, global_step=global_step) with tf.name_scope('accuracy'): accuracy = tf.reduce_mean(tf.square(yd - y_)) tf.summary.scalar('Accuracy', accuracy) merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/train', sess.graph) tf.add_to_collection('logits', y) tf.add_to_collection('logits_map', yd) tf.add_to_collection('accuracy', accuracy) tf.add_to_collection('x', x) tf.add_to_collection('y', y_) saver = tf.train.Saver(tf.global_variables()) tf.global_variables_initializer().run() idx = np.arange(N) steps = 0 model_dir = './models/mnf_lenet_mnist_fq{}_fr{}_usez{}_thres{}/model/'.format(FLAGS.fq, FLAGS.fr, not FLAGS.no_z, FLAGS.thres_var) if not os.path.exists(model_dir): os.makedirs(model_dir) print('Will save model as: {}'.format(model_dir + 'model')) # Train if n_epochs: max_epochs = n_epochs else: max_epochs = FLAGS.epochs for epoch in range(max_epochs): widgets = ["epoch {}/{}|".format(epoch + 1, FLAGS.epochs), Percentage(), Bar(), ETA()] pbar = ProgressBar(iter_per_epoch, widgets=widgets) pbar.start() np.random.shuffle(idx) t0 = time.time() for j in range(iter_per_epoch): steps += 1 pbar.update(j) batch = np.random.choice(idx, 100) if j == (iter_per_epoch - 1): summary, _ = sess.run([merged, train_step], feed_dict={x: xtrain[batch], y_: ytrain[batch]}) train_writer.add_summary(summary, steps) train_writer.flush() else: sess.run(train_step, feed_dict={x: xtrain[batch], y_: ytrain[batch]}) # If using validation data if not n_epochs: # the accuracy here is calculated by a crude MAP so as to have fast evaluation # it is much better if we properly integrate over the parameters by averaging across multiple samples tacc = sess.run(accuracy, feed_dict={x: xvalid, y_: yvalid}) string = 'Epoch {}/{}, valid_acc: {:0.3f}'.format(epoch + 1, FLAGS.epochs, np.sqrt(tacc)) string += ', dt: {:0.3f}'.format(time.time() - t0) print(string) sys.stdout.flush() if tacc < best_val_acc and epoch > last_save + setup['hyperparam_eval_interval']: print('saving best at epoch %d, rmse=%f' % (epoch, np.sqrt(tacc))) last_save = epoch best_val_acc = tacc string += ', model_save: True' saver.save(sess, model_dir + 'model') if tacc > best_val_acc: overfitting += 1 else: overfitting = 0 if overfitting > max_overfitting: break if n_epochs: last_save = n_epochs saver.save(sess, model_dir + 'model') return saver, model_dir, pyx, x, last_save
def train(): (xtrain, ytrain), (xvalid, yvalid), (_, _), y_std, y_mean = get_mc_data(FLAGS.dataset_name) min_tau = setup['tau_range'][FLAGS.dataset_name][0] max_tau = setup['tau_range'][FLAGS.dataset_name][1] # FIND BEST N_EPOCHS; TAU with tf.Graph().as_default() as g: with tf.Session() as sess: saver, model_dir, pyx, x, n_epochs = train_model(sess, xtrain, ytrain, FLAGS.seed, FLAGS.seed, xvalid, yvalid) # CRPS OPTIMIZE TO FIND STD DEV print "Finding optimal Tau" saver.restore(sess, model_dir + 'model') # Make optimization run take extra arguments optimize_fun = partial(run_tau_opt, sess, pyx, x, y_std, y_mean, xvalid, yvalid, False) tau_opt = gbrt_minimize(optimize_fun, [(min_tau, max_tau)], n_random_starts=100, n_calls=200 ) opt_tau = tau_opt.x[0] # CRPS OPTIMIZE TO FIND STD DEV print "Finding optimal CU Tau" # Make optimization run take extra arguments optimize_fun = partial(run_tau_opt, sess, pyx, x, y_std, y_mean, xvalid, yvalid, True) cutau_opt = gbrt_minimize(optimize_fun, [(min_tau, max_tau)], n_random_starts=100, n_calls=200 ) cu_opt_tau = cutau_opt.x[0] print "OPT TAU: {}. CRPS: {}".format(opt_tau, tau_opt.fun) print "CU OPT TAU: {}. CRPS: {}".format(cu_opt_tau, cutau_opt.fun) tf.reset_default_graph() # TRAIN AND EVALUATE FINAL MODEL 5 TIMES WITH DIFFERENT SEED: for final_seed in range(FLAGS.seed + 1, FLAGS.seed + 6): (xtrain, ytrain), (xtest, ytest), y_std, y_mean = get_mc_data(FLAGS.dataset_name, False) with tf.Graph().as_default() as g: with tf.Session() as sess: # Write csv file column headers if not yet written. plot_file_path = os.path.join(PLOT_RESULTS_PATH, FLAGS.dataset_name + '.txt') fid = open(plot_file_path, 'a') if sum(1 for line in open(plot_file_path)) == 0: fid.write(',MNF const std dev,MNF std dev,y,yHat,run_count,dataset_split_seed\n') pll_file_path = os.path.join(RESULTS_PATH, FLAGS.dataset_name + '-pll.txt') fid_pll = open(pll_file_path, 'a') if sum(1 for line in open(pll_file_path)) == 0: fid_pll.write('dataset_split,run_count,pll result,pll baseline,pll best,pll normalized\n') crps_file_path = os.path.join(RESULTS_PATH, FLAGS.dataset_name + '-crps.txt') fid_crps = open(crps_file_path, 'a') if sum(1 for line in open(crps_file_path)) == 0: fid_crps.write('dataset_split,run_count,crps result,crps baseline,crps best,crps normalized\n') rmse_file_path = os.path.join(RESULTS_PATH, FLAGS.dataset_name + '-rmse.txt') fid_rmse = open(rmse_file_path, 'a') if sum(1 for line in open(rmse_file_path)) == 0: fid_rmse.write('dataset_split,run_count,rmse\n') saver, model_dir, pyx, x, _ = train_model(sess, xtrain, ytrain, final_seed, final_seed, xvalid=False, yvalid=False, n_epochs=n_epochs) #EVALUATE TEST SET preds = np.zeros_like(ytest) all_preds = np.zeros([len(ytest), FLAGS.L]) widgets = ["Sampling |", Percentage(), Bar(), ETA()] pbar = ProgressBar(FLAGS.L, widgets=widgets) pbar.start() for i in range(FLAGS.L): pbar.update(i) for j in range(int(xtest.shape[0] / 100)): pyxi = sess.run(pyx, feed_dict={x: xtest[j * 100:(j + 1) * 100]}) preds[j * 100:(j + 1) * 100] += pyxi / FLAGS.L all_preds[j * 100:(j + 1) * 100, i] = np.squeeze(pyxi * y_std + y_mean) pyxi = sess.run(pyx, feed_dict={x: xtest[int(xtest.shape[0] / 100) * 100:]}) preds[int(xtest.shape[0] / 100) * 100:] += pyxi / FLAGS.L all_preds[int(xtest.shape[0] / 100) * 100:, i] = np.squeeze(pyxi * y_std + y_mean) # FIND PLL AND CRPS samples = all_preds[:, :, newaxis].T.reshape(FLAGS.L, len(all_preds), 1) mean, var = np.mean(samples, axis=0), np.var(samples, axis=0) + opt_tau ** (-1) pll_res = pll(samples, ytest * y_std + y_mean, FLAGS.L, opt_tau) crps_res = crps(ytest * y_std + y_mean, mean, var) # FIND BASELINE PLL AND CRPS pll_baseline = pll(np.array([mean]), ytest * y_std + y_mean, 1, cu_opt_tau) crps_baseline = crps(ytest * y_std + y_mean, mean, cu_opt_tau**(-1)) # FIND OPTIMAL PLL AND CRPS pll_best = pll_maximum(mean, ytest * y_std + y_mean) crps_best = crps_minimum(mean, ytest * y_std + y_mean) # GET NORMALIZED SCORES pll_norm = (pll_res - pll_baseline) / (pll_best - pll_baseline) crps_norm = (crps_res - crps_baseline) / (crps_best - crps_baseline) sample_accuracy = np.sqrt(np.mean((preds-ytest)*(preds-ytest))) print('Sample test accuracy: {}'.format(sample_accuracy)) ytest_u = (ytest * y_std + y_mean) preds_u = (preds * y_std + y_mean) unnormalized_rmse = np.sqrt(np.mean((preds_u - ytest_u) * (preds_u - ytest_u))) print('Sample test accuracy (unnormalized): {}'.format(unnormalized_rmse)) print('Test uncertainty quality metrics:') print "PLL: {}, PLL LOWER: {}, PLL UPPER: {}, NORM: {}".format(pll_res, pll_baseline, pll_best, pll_norm) print "CRPS: {}, CRPS LOWER: {}, CRPS UPPER: {}, NORM: {}".format(crps_res, crps_baseline, crps_best, crps_norm) all_preds_mean = all_preds.mean(axis=1) all_preds_std = all_preds.std(axis=1) # Write results to files for i in range(len(ytest)): fid.write('%d,%f,%f,%f,%f,%d,%d\n' % (i, np.sqrt(cu_opt_tau**(-1)), all_preds_std[i], ytest_u[i], all_preds_mean[i], final_seed, FLAGS.dataset_split_seed)) fid_rmse.write('%d,%d,%f\n' % (FLAGS.dataset_split_seed, final_seed, unnormalized_rmse)) fid_pll.write('%d,%d %f,%f,%f,%f\n' % (FLAGS.dataset_split_seed, final_seed, pll_res, pll_baseline, pll_best, pll_norm)) fid_crps.write('%d,%d,%f,%f,%f,%f\n' % (FLAGS.dataset_split_seed, final_seed, crps_res, crps_baseline, crps_best, crps_norm)) fid.close() fid_rmse.close() fid_pll.close() fid_crps.close() tf.reset_default_graph()
def main(): global args args = parser.parse_args() cuda = args.cuda if cuda == 'true': cuda = True else: cuda = False task_name = args.task_name epoch_size = args.epoch_size batch_size = args.batch_size to_load = args.load_iter > 0 load_iter = args.load_iter * to_load result_path = os.path.join(args.result_path, args.task_name) if args.style_A: result_path = os.path.join(result_path, args.style_A) result_path = os.path.join(result_path, args.model_arch + str(args.image_size)) model_path = os.path.join(args.model_path, args.task_name) if args.style_A: model_path = os.path.join(model_path, args.style_A) model_path = os.path.join(model_path, args.model_arch + str(args.image_size)) data_style_A, data_style_B, test_style_A, test_style_B = get_data(args) test_A = read_images(test_style_A, args.image_size) test_B = read_images(test_style_B, args.image_size) with torch.no_grad(): test_A = Variable(torch.FloatTensor(test_A)) with torch.no_grad(): test_B = Variable(torch.FloatTensor(test_B)) if not os.path.exists(result_path): os.makedirs(result_path) if not os.path.exists(model_path): os.makedirs(model_path) if to_load: ix = str(args.load_iter) generator_A = torch.load(os.path.join(model_path, 'model_gen_A-' + ix)) generator_B = torch.load(os.path.join(model_path, 'model_gen_B-' + ix)) discriminator_A = torch.load(os.path.join(model_path, 'model_dis_A-' + ix)) discriminator_B = torch.load(os.path.join(model_path, 'model_dis_B-' + ix)) else: generator_A = Generator() generator_B = Generator() discriminator_A = Discriminator() discriminator_B = Discriminator() if cuda: test_A = test_A.cuda() test_B = test_B.cuda() generator_A = generator_A.cuda() generator_B = generator_B.cuda() discriminator_A = discriminator_A.cuda() discriminator_B = discriminator_B.cuda() data_size = min(len(data_style_A), len(data_style_B)) n_batches = (data_size // batch_size) recon_criterion = nn.MSELoss() gan_criterion = nn.BCELoss() feat_criterion = nn.HingeEmbeddingLoss() gen_params = chain(generator_A.parameters(), generator_B.parameters()) dis_params = chain(discriminator_A.parameters(), discriminator_B.parameters()) optim_gen = optim.Adam(gen_params, lr=args.learning_rate, betas=(0.5, 0.999), weight_decay=0.00001) optim_dis = optim.Adam(dis_params, lr=args.learning_rate, betas=(0.5, 0.999), weight_decay=0.00001) iters = 0 gen_loss_total = [] dis_loss_total = [] for epoch in range(epoch_size): data_style_A, data_style_B = shuffle_data(data_style_A, data_style_B) widgets = ['epoch #%d|' % epoch, Percentage(), Bar(), ETA()] pbar = ProgressBar(maxval=n_batches, widgets=widgets) pbar.start() for i in range(n_batches): pbar.update(i) generator_A.zero_grad() generator_B.zero_grad() discriminator_A.zero_grad() discriminator_B.zero_grad() A_path = data_style_A[i * batch_size: (i+1) * batch_size] B_path = data_style_B[i * batch_size: (i+1) * batch_size] A = read_images(A_path, args.image_size) B = read_images(B_path, args.image_size) A = Variable(torch.FloatTensor(A)) B = Variable(torch.FloatTensor(B)) if cuda: A = A.cuda() B = B.cuda() AB = generator_B(A) BA = generator_A(B) ABA = generator_A(AB) BAB = generator_B(BA) # Reconstruction Loss recon_loss_A = recon_criterion(ABA, A) recon_loss_B = recon_criterion(BAB, B) # Real/Fake GAN Loss (A) A_dis_real, A_feats_real = discriminator_A(A) A_dis_fake, A_feats_fake = discriminator_A(BA) dis_loss_A, gen_loss_A = get_gan_loss(A_dis_real, A_dis_fake, gan_criterion, cuda) fm_loss_A = get_fm_loss(A_feats_real, A_feats_fake, feat_criterion) # Real/Fake GAN Loss (B) B_dis_real, B_feats_real = discriminator_B(B) B_dis_fake, B_feats_fake = discriminator_B(AB) dis_loss_B, gen_loss_B = get_gan_loss(B_dis_real, B_dis_fake, gan_criterion, cuda) fm_loss_B = get_fm_loss(B_feats_real, B_feats_fake, feat_criterion) # Total Loss if iters < args.gan_curriculum: rate = args.starting_rate else: rate = args.default_rate gen_loss_A_total = (gen_loss_B*0.1 + fm_loss_B*0.9) * (1.-rate) + recon_loss_A * rate gen_loss_B_total = (gen_loss_A*0.1 + fm_loss_A*0.9) * (1.-rate) + recon_loss_B * rate if args.model_arch == 'discogan': gen_loss = gen_loss_A_total + gen_loss_B_total dis_loss = dis_loss_A + dis_loss_B elif args.model_arch == 'recongan': gen_loss = gen_loss_A_total dis_loss = dis_loss_B elif args.model_arch == 'gan': gen_loss = (gen_loss_B*0.1 + fm_loss_B*0.9) dis_loss = dis_loss_B if iters % args.update_interval == 0: dis_loss.backward() optim_dis.step() else: gen_loss.backward() optim_gen.step() if iters % args.log_interval == 0: print("---------------------") print("GEN Loss:", as_np(gen_loss_A.mean()), as_np(gen_loss_B.mean())) print("Feature Matching Loss:", as_np(fm_loss_A.mean()), as_np(fm_loss_B.mean())) print("RECON Loss:", as_np(recon_loss_A.mean()), as_np(recon_loss_B.mean())) print("DIS Loss:", as_np(dis_loss_A.mean()), as_np(dis_loss_B.mean())) if iters % args.image_save_interval == 0: AB = generator_B(test_A) BA = generator_A(test_B) ABA = generator_A(AB) BAB = generator_B(BA) n_testset = min(test_A.size()[0], test_B.size()[0]) subdir_path = os.path.join(result_path, str(iters / args.image_save_interval)) if os.path.exists(subdir_path): pass else: os.makedirs(subdir_path) for im_idx in range(n_testset): #A_val = test_A[im_idx].cpu().data.numpy().transpose(1,2,0) * 255. #B_val = test_B[im_idx].cpu().data.numpy().transpose(1,2,0) * 255. #BA_val = BA[im_idx].cpu().data.numpy().transpose(1,2,0)* 255. #ABA_val = ABA[im_idx].cpu().data.numpy().transpose(1,2,0)* 255. #AB_val = AB[im_idx].cpu().data.numpy().transpose(1,2,0)* 255. #BAB_val = BAB[im_idx].cpu().data.numpy().transpose(1,2,0)* 255. filename_prefix = os.path.join (subdir_path, str(im_idx)) scipy.misc.imsave(filename_prefix + '.A.jpg', img4save(test_A[im_idx])) scipy.misc.imsave(filename_prefix + '.B.jpg', img4save(test_B[im_idx])) scipy.misc.imsave(filename_prefix + '.BA.jpg', img4save(BA[im_idx])) scipy.misc.imsave(filename_prefix + '.AB.jpg', img4save(AB[im_idx])) scipy.misc.imsave(filename_prefix + '.ABA.jpg', img4save(ABA[im_idx])) scipy.misc.imsave(filename_prefix + '.BAB.jpg', img4save(BAB[im_idx])) if iters % args.model_save_interval == 0: torch.save(generator_A, os.path.join(model_path, 'model_gen_A-' + str((iters / args.model_save_interval) + load_iter))) torch.save(generator_B, os.path.join(model_path, 'model_gen_B-' + str((iters / args.model_save_interval) + load_iter))) torch.save(discriminator_A, os.path.join(model_path, 'model_dis_A-' + str((iters / args.model_save_interval) + load_iter))) torch.save(discriminator_B, os.path.join(model_path, 'model_dis_B-' + str((iters / args.model_save_interval) + load_iter))) iters += 1
def __init__(self, maxval=0): widgets = [Percentage(), ' ', Bar(marker='=', left='[', right=']'), ' ', ETA()] super(ProgressBarContext, self).__init__(widgets=widgets, maxval=maxval, fd=sys.stdout)
def do_distribute(self, args): """distribute:\n copy local file to a group of servers into a specified directory""" args = args.split() if len(args) < 2: error( "Usage: distribute <conductor_expression> <local_filename> [remote_dir=%s]" % self.default_remote_dir) return expr, filename = args[:2] try: hosts = self.conductor.resolve(expr) except ParseException as e: error("Invalid conductor expression: %s" % str(e)) return if len(hosts) == 0: error("Empty hostlist") return if not os.path.isfile(filename): error("%s is not a file or doesn't exist" % filename) return if len(args) > 2: remote_dir = args[2] else: remote_dir = self.default_remote_dir results = {"error": [], "success": [], "total": 0} errors = defaultdict(list) progress = None if self.progressbar: from progressbar import ProgressBar, Percentage, Bar, ETA, FileTransferSpeed progress = ProgressBar(widgets=[ "Running: ", Percentage(), ' ', Bar(marker='.'), ' ', ETA(), ' ', FileTransferSpeed() ], maxval=len(hosts)) def worker(host): p = Popen( [ "scp", "-B", # prevents asking for passwords filename, "%s@%s:%s" % (self.user, host, remote_dir) ], stdout=PIPE, stderr=PIPE) o, e = p.communicate() if p.poll() == 0: results["success"].append(host) else: results["error"].append(host) errors[e].append(host) results["total"] += 1 if self.progressbar: progress.update(results["total"]) if self.progressbar: progress.start() pool = Pool() for host in hosts: pool.start(Greenlet(worker, host)) pool.join() if self.progressbar: progress.finish() if len(results["success"]) > 0: msg = "Successfully distributed to %d hosts" % len( results["success"]) cprint(msg, "green") if len(results["error"]) > 0: cprint("There were errors distributing file", "red") for output, hosts in errors.items(): msg = " %s " % ','.join(hosts) table_width = min([len(msg) + 2, terminal_size()[0]]) cprint("=" * table_width, "blue", attrs=["bold"]) cprint(msg, "blue", attrs=["bold"]) cprint("=" * table_width, "blue", attrs=["bold"]) print(output)
def populate(self, target_model_name, target_database, source_model_name, source_database, size, limit, offset, inchi_conversion, idx, display_offset): from django.db import connections from django.db import transaction from django.db.models import get_model from django.db.utils import DatabaseError, IntegrityError source_model = get_model(self.app_name, source_model_name) target_model = get_model(self.app_name, target_model_name) target_conn = connections[target_database] source_pk = source_model._meta.pk.name writer = Writer(self.term, (idx, 0)) pbar = ProgressBar(widgets=[ '{0} ({1}) [{2}-{3}]: '.format(target_model_name, idx - display_offset + 1, offset, offset + limit), Percentage(), ' (', Counter(), ') ', Bar(marker=RotatingMarker()), ' ', ETA() ], fd=writer, maxval=limit).start() inchi_kwargs = {} if inchi_conversion == 'indigo': indigo_obj = indigo.Indigo() indigo_inchi_obj = indigo_inchi.IndigoInchi(indigo_obj) inchi_kwargs = {"inchiObj": indigo_inchi_obj} elif inchi_conversion == 'rdkit' and self.verbosity < 1: from rdkit import rdBase rdBase.DisableLog('rdApp.error') from rdkit import RDLogger lg = RDLogger.logger() lg.setLevel(RDLogger.CRITICAL) inchi_converter = inchi_converters.get(inchi_conversion) last_pk = None for i in range(offset, offset + limit, size): success = 0 failure = 0 empty = 0 ignored = 0 transaction.commit_unless_managed(using=target_database) transaction.enter_transaction_management(using=target_database) transaction.managed(True, using=target_database) with target_conn.constraint_checks_disabled(): try: chunk_size = min(size, limit + offset - i) original_data = None if not last_pk: if i: last_pk = source_model.objects.using( source_database).order_by(source_pk).only( source_pk).values_list(source_pk)[i][0] else: original_data = source_model.objects.using( source_database).order_by( source_pk).values_list( 'pk', 'standardinchi')[:chunk_size] if not original_data: original_data = source_model.objects.using( source_database).order_by(source_pk).values_list( 'pk', 'standardinchi').filter( pk__gt=last_pk)[:chunk_size] last_pk = original_data[chunk_size - 1][0] target_data = [] for pk, inchi in original_data: if not inchi: empty += 1 continue if int(pk) in self.ignores: ignored += 1 continue ctab = self.convert_inchi(inchi_converter, pk, inchi, inchi_kwargs) if not ctab: failure += 1 continue target_data.append( target_model(pk=int(pk), molfile=ctab)) success += 1 target_model.objects.using(target_database).bulk_create( target_data) except IntegrityError as e: if self.verbosity >= 1: self.stderr.write( "ERROR: integrity error ({0}) occurred when processing chunk {1}-{2}" .format(e.message, i, i + size)) transaction.rollback(using=target_database) transaction.leave_transaction_management( using=target_database) continue except DatabaseError as e: if self.verbosity >= 1: self.stderr.write( "ERROR: database error ({0}) occurred when processing chunk {1}-{2}" .format(e.message, i, i + size)) transaction.rollback(using=target_database) transaction.leave_transaction_management( using=target_database) raise e pbar.update(i - offset + 1) transaction.commit(using=target_database) transaction.leave_transaction_management(using=target_database) self.total_success += success self.total_failure += failure self.total_empty += empty self.total_ignored += ignored pbar.update(limit) pbar.finish()
def fuzz_file(num_iterations, file_path, mcalls, validator=None): ''' Call an external fuzzer (hardcoded with radamsa, for now) to fuzz/mutate the input file (file_path) for a number of times (num_iterations). Each time it makes a multicall on different engines. ''' bar = ProgressBar(widgets=[ 'fuzzing ' + file_path + ' ', Percentage(), ' ', Bar(marker=RotatingMarker()), ' ', ETA(), ' ' ]) #pylint: disable=W0612 num_it = 1 while num_it <= num_iterations: # fuzz the file with radamsa fuzzed_file_path = os.path.join(tempfile.gettempdir(), 'temp_filefuzzed') cmd = "radamsa --output {} {}".format(fuzzed_file_path, file_path) args = shlex.split(cmd) try: p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) p.communicate() except FileNotFoundError as error: if 'radamsa' in str(error): raise Exception( 'Please check if radamsa is installed on your environment (see README.md file).' ) except Exception as error: raise Exception('Error:', error) # check if file is valid if validator is not None: validation_error = validator(fuzzed_file_path) if validation_error: res = multicall.Results(fuzzed_file_path, validation_error) mcalls.notify(res) continue # skip this file # check discrepancy try: res = multicall.callAll(fuzzed_file_path) res.path_name = os.path.join( constants.logs_dir, 'fuzzed_' + ntpath.basename(file_path)) if mcalls.notify( res ): # true if it is interesting and distinct. in this case, save the file ## get first name of file... shutil.copy(fuzzed_file_path, res.path_name) except UnicodeDecodeError as exc: # TODO: It is silly but we can't handle properly non-unicode outputs # just because the .decode('utf-8') to convert bytes into strings # raises this exception when the non-unicode char is mapped. continue bar.update(num_it) num_it += 1 bar.finish()
import multiprocessing import mmap from atpbar import atpbar from progressbar import ProgressBar, Percentage, Bar, ETA, AdaptiveETA from concurrent.futures import ThreadPoolExecutor import numpy as np from randmac import RandMac from scapy.layers.inet import IP, TCP, UDP, ICMP from scapy.layers.l2 import Ether from scapy.packet import Raw from scapy.utils import wrpcap from scapy.volatile import RandIP, RandString from scapy.all import * widgets = [Percentage(), ' ', Bar(), ' ', ETA(), ' ', AdaptiveETA()] pbar_update_value = 0 total_tasks = 0 class RawPcapReaderFD(RawPcapReader): """A stateful pcap reader. Each packet is returned as a string""" def __init__(self, fd): self.filename = "dummy" try: self.f = fd magic = self.f.read(4) except IOError: self.f = fd magic = self.f.read(4)
def device_29c0x0_read(ser, max_num_of_bytes_read, color): widgets = ['Reading EEPROM: ', Percentage(), ' ', Bar(marker=RotatingMarker()), ' ', ETA(), ' ', FileTransferSpeed()] # Initiate READ sequence. ser.write('READ;') time.sleep(0.1) while ser.inWaiting() == 0: pass in_buffer = '' while ser.inWaiting() > 0: in_buffer += ser.read(1) print('\nStart reading sequence...') if in_buffer == 'START': print('OK.') else: print(color.TEXT_RED + 'Start reading sequence - error.' + color.TEXT_GREEN) print(in_buffer + '\n') max_num_of_bytes_read_str = '%s;' % max_num_of_bytes_read # Initiate a number of bytes to be read (max address) - (for example: 5931;). ser.write(max_num_of_bytes_read_str) time.sleep(0.1) while ser.inWaiting() == 0: pass in_buffer = '' while ser.inWaiting() > 0: in_buffer += ser.read(1) print('\nThe number of bytes to be read accepted...') if in_buffer == 'DONE': print('OK.') else: print(color.TEXT_RED + 'Bytes-to-be-read receiving - error.' + color.TEXT_GREEN) print(in_buffer + '\n') # in_buffer = '' read_data_string = '' num_of_bytes_read = 0 print(color.TEXT_CYAN + color.TEXT_BRIGHT) # maxval=max_num_of_bytes_read tested! :) pbar = ProgressBar(widgets=widgets, maxval=max_num_of_bytes_read).start() # Start reading data from EEPROM. while num_of_bytes_read < max_num_of_bytes_read: while ser.inWaiting() > 0: read_data_string += ser.read(1) # time.sleep(0.01) num_of_bytes_read += 1 pbar.update(num_of_bytes_read) pbar.finish() print('%s' % num_of_bytes_read) print(color.TEXT_GREEN) memory_chksum = twos_complement_chksum(read_data_string) return memory_chksum
def classify(model_dir, n_inference_steps=20, n_inference_samples=20, dim_hs=[100], h_act='T.nnet.softplus', learning_rate=0.0001, learning_rate_schedule=None, dropout=0.1, batch_size=100, l2_decay=0.002, epochs=100, optimizer='rmsprop', optimizer_args=dict(), center_input=True, name='classifier'): out_path = model_dir inference_args = dict( inference_method='adaptive', inference_rate=0.1, ) # ======================================================================== print 'Loading model' model_file = glob(path.join(model_dir, '*best*npz'))[0] models, model_args = load_model(model_file, unpack_sbn, **inference_args) model = models['sbn'] model.set_tparams() dataset = model_args['dataset'] dataset_args = model_args['dataset_args'] if dataset == 'mnist': dataset_args['binarize'] = True dataset_args['source'] = '/export/mialab/users/dhjelm/data/mnist.pkl.gz' train, valid, test = load_data(dataset, batch_size, batch_size, batch_size, **dataset_args) mlp_args = dict( dim_hs=dim_hs, h_act=h_act, dropout=dropout, out_act=train.acts['label'] ) X = T.matrix('x', dtype=floatX) Y = T.matrix('y', dtype=floatX) trng = RandomStreams(random.randint(0, 1000000)) if center_input: print 'Centering input with train dataset mean image' X_mean = theano.shared(train.mean_image.astype(floatX), name='X_mean') X_i = X - X_mean else: X_i = X # ======================================================================== print 'Loading MLP and forming graph' (qs, i_costs), _, updates = model.infer_q( X_i, X, n_inference_steps, n_inference_samples=n_inference_samples) q0 = qs[0] qk = qs[-1] constants = [q0, qk] dim_in = model.dim_h dim_out = train.dims['label'] mlp0_args = deepcopy(mlp_args) mlp0 = MLP(dim_in, dim_out, name='classifier_0', **mlp0_args) mlpk_args = deepcopy(mlp_args) mlpk = MLP(dim_in, dim_out, name='classifier_k', **mlpk_args) mlpx_args = deepcopy(mlp_args) mlpx = MLP(train.dims[str(dataset)], dim_out, name='classifier_x', **mlpx_args) tparams = mlp0.set_tparams() tparams.update(**mlpk.set_tparams()) tparams.update(**mlpx.set_tparams()) print_profile(tparams) p0 = mlp0(q0) pk = mlpk(qk) px = mlpx(X_i) # ======================================================================== print 'Getting cost' cost0 = mlp0.neg_log_prob(Y, p0).sum(axis=0) costk = mlpk.neg_log_prob(Y, pk).sum(axis=0) costx = mlpx.neg_log_prob(Y, px).sum(axis=0) cost = cost0 + costk + costx extra_outs = [] extra_outs_names = ['cost'] if l2_decay > 0.: print 'Adding %.5f L2 weight decay' % l2_decay mlp0_l2_cost = mlp0.get_L2_weight_cost(l2_decay) mlpk_l2_cost = mlpk.get_L2_weight_cost(l2_decay) mlpx_l2_cost = mlpx.get_L2_weight_cost(l2_decay) cost += mlp0_l2_cost + mlpk_l2_cost + mlpx_l2_cost extra_outs += [mlp0_l2_cost, mlpk_l2_cost, mlpx_l2_cost] extra_outs_names += ['MLP0 L2 cost', 'MLPk L2 cost', 'MLPx L2 cost'] # ======================================================================== print 'Extra functions' error0 = (Y * (1 - p0)).sum(1).mean() errork = (Y * (1 - pk)).sum(1).mean() errorx = (Y * (1 - px)).sum(1).mean() f_test_keys = ['Error 0', 'Error k', 'Error x', 'Cost 0', 'Cost k', 'Cost x'] f_test = theano.function([X, Y], [error0, errork, errorx, cost0, costk, costx]) # ======================================================================== print 'Setting final tparams and save function' all_params = OrderedDict((k, v) for k, v in tparams.iteritems()) tparams = OrderedDict((k, v) for k, v in tparams.iteritems() if (v not in updates.keys() or v not in excludes)) print 'Learned model params: %s' % tparams.keys() print 'Saved params: %s' % all_params.keys() def save(tparams, outfile): d = dict((k, v.get_value()) for k, v in all_params.items()) d.update( dim_in=dim_in, dim_out=dim_out, dataset=dataset, dataset_args=dataset_args, **mlp_args ) np.savez(outfile, **d) # ======================================================================== print 'Getting gradients.' grads = T.grad(cost, wrt=itemlist(tparams), consider_constant=constants) # ======================================================================== print 'Building optimizer' lr = T.scalar(name='lr') f_grad_shared, f_grad_updates = eval('op.' + optimizer)( lr, tparams, grads, [X, Y], cost, extra_ups=updates, extra_outs=extra_outs, **optimizer_args) monitor = SimpleMonitor() try: epoch_t0 = time.time() s = 0 e = 0 widgets = ['Epoch {epoch} ({name}, '.format(epoch=e, name=name), Timer(), '): ', Bar()] epoch_pbar = ProgressBar(widgets=widgets, maxval=train.n).start() training_time = 0 while True: try: x, y = train.next() if train.pos == -1: epoch_pbar.update(train.n) else: epoch_pbar.update(train.pos) except StopIteration: print epoch_t1 = time.time() training_time += (epoch_t1 - epoch_t0) valid.reset() widgets = ['Validating: ', Percentage(), ' (', Timer(), ')'] pbar = ProgressBar(widgets=widgets, maxval=valid.n).start() results_train = OrderedDict() results_valid = OrderedDict() while True: try: x_valid, y_valid = valid.next() x_train, y_train = train.next() r_train = f_test(x_train, y_train) r_valid = f_test(x_valid, y_valid) results_i_train = dict((k, v) for k, v in zip(f_test_keys, r_train)) results_i_valid = dict((k, v) for k, v in zip(f_test_keys, r_valid)) update_dict_of_lists(results_train, **results_i_train) update_dict_of_lists(results_valid, **results_i_valid) if valid.pos == -1: pbar.update(valid.n) else: pbar.update(valid.pos) except StopIteration: print break def summarize(d): for k, v in d.iteritems(): d[k] = np.mean(v) summarize(results_train) summarize(results_valid) monitor.update(**results_train) monitor.update(dt_epoch=(epoch_t1-epoch_t0), training_time=training_time) monitor.update_valid(**results_valid) monitor.display() monitor.save(path.join( out_path, '{name}_monitor.png').format(name=name)) monitor.save_stats(path.join( out_path, '{name}_monitor.npz').format(name=name)) monitor.save_stats_valid(path.join( out_path, '{name}_monitor_valid.npz').format(name=name)) e += 1 epoch_t0 = time.time() valid.reset() train.reset() if learning_rate_schedule is not None: if e in learning_rate_schedule.keys(): lr = learning_rate_schedule[e] print 'Changing learning rate to %.5f' % lr learning_rate = lr widgets = ['Epoch {epoch} ({name}, '.format(epoch=e, name=name), Timer(), '): ', Bar()] epoch_pbar = ProgressBar(widgets=widgets, maxval=train.n).start() continue if e > epochs: break rval = f_grad_shared(x, y) if check_bad_nums(rval, extra_outs_names): print rval print np.any(np.isnan(mlpk.W0.get_value())) print np.any(np.isnan(mlpk.b0.get_value())) print np.any(np.isnan(mlpk.W1.get_value())) print np.any(np.isnan(mlpk.b1.get_value())) raise ValueError('Bad number!') f_grad_updates(learning_rate) s += 1 except KeyboardInterrupt: print 'Training interrupted' test.reset() widgets = ['Testing: ', Percentage(), ' (', Timer(), ')'] pbar = ProgressBar(widgets=widgets, maxval=test.n).start() results_test = OrderedDict() while True: try: x_test, y_test = test.next() r_test = f_test(x_test, y_test) results_i_test = dict((k, v) for k, v in zip(f_test_keys, r_test)) update_dict_of_lists(results_test, **results_i_test) if test.pos == -1: pbar.update(test.n) else: pbar.update(test.pos) except StopIteration: print break def summarize(d): for k, v in d.iteritems(): d[k] = np.mean(v) summarize(results_test) print 'Test results:' monitor.simple_display(results_test) if out_path is not None: outfile = path.join(out_path, '{name}_{t}.npz'.format(name=name, t=int(time.time()))) last_outfile = path.join(out_path, '{name}_last.npz'.format(name=name)) print 'Saving' save(tparams, outfile) save(tparams, last_outfile) print 'Done saving.' print 'Bye bye!'
def build_progressbar(name, **kwargs): """Return configured :class:`ProgressBar` instance""" from progressbar import Counter, ProgressBar, Timer, Percentage, ETA widgets = [name, Percentage(), ' ', ETA(), ' ', Counter(), ' results(', Timer(), ')'] return ProgressBar(widgets=widgets, **kwargs)
def fit(self, X, y, Xtest=None, ytest=None): """Fit.""" input_dim = X.shape[1] # set different data preparation schemes basing on what kind of NN is it layers = [i.keys()[0] for i in self.architecture] self.isCNN = 'Conv' in layers self.isRecurrent = 'GRU' in layers or 'LSTM' in layers if self.isCNN: self.addDelay = delay_preds self.training_params['num_strides'] = self.delay // self.skip elif self.isRecurrent: self.addDelay = delay_preds_2d else: input_dim *= self.delay / self.skip input_dim = int(input_dim) self.addDelay = delay_preds # create the model self.model = buildNN(self.architecture, self.training_params, input_dim) # print self.model.get_config() widgets = [ 'Training : ', Percentage(), ' ', Bar(marker=RotatingMarker()), ' ', ETA(), ' ' ] pbar = ProgressBar(widgets=widgets, maxval=self.majorEpochs) pbar.start() # train the model on a portion of training data; that portion is changed each majorEpoch for majorEpoch in range(self.majorEpochs): startingPoint = majorEpoch % self.partsTrain or self.mdlNr % self.partsTrain if self.jump is not None: trainData = self.addDelay(X, delay=self.delay, skip=self.skip, subsample=self.partsTrain, start=startingPoint, jump=self.jump) else: trainData = self.addDelay(X, delay=self.delay, skip=self.skip, subsample=self.partsTrain, start=startingPoint) if self.isCNN: trainData = trainData.reshape( (trainData.shape[0], 1, trainData.shape[1], 1)) targets = y[startingPoint::self.partsTrain] trainData = trainData[::self.subsample] targets = targets[::self.subsample] # print 'trainData:', trainData.shape # print 'targets:', targets.shape # print theano.printing.debugprint(self.model) self.model.fit(trainData, targets, epochs=self.smallEpochs, batch_size=512, verbose=0) trainData = None pbar.update(majorEpoch) if self.verbose and majorEpoch % self.checkEveryEpochs == 0: print("Total epochs: %d" % (self.smallEpochs * (majorEpoch + 1))) if Xtest is not None and ytest is not None: pred = self._predict_proba_train(Xtest) score = np.mean( roc_auc_score(ytest[0::self.partsTest], pred)) print("Test AUC : %.5f" % (score)) pred = None if self.verbose: print('Training finished after %d epochs' % (self.smallEpochs * (majorEpoch + 1)))
def start(self, name, count): self.pbar = ProgressBar(widgets=[name, Percentage(), Bar()], maxval=count) self.pbar.start()
def migrate2(self): session = Session() try: from progressbar import ProgressBar, Percentage, Bar, ETA except: print 'Critical: progressbar library not found, try running `bin/easy_install progressbar` ?' return class Seen(Base): __tablename__ = 'seen' id = Column(Integer, primary_key=True) field = Column(String) value = Column(String, index=True) task = Column('feed', String) added = Column(DateTime) def __init__(self, field, value, task): self.field = field self.value = value self.task = task self.added = datetime.now() def __str__(self): return '<Seen(%s=%s)>' % (self.field, self.value) print '' # REPAIR / REMOVE DUPLICATES index = 0 removed = 0 total = session.query(Seen).count() + 1 widgets = [ 'Repairing - ', ETA(), ' ', Percentage(), ' ', Bar(left='[', right=']') ] bar = ProgressBar(widgets=widgets, maxval=total).start() for seen in session.query(Seen).all(): index += 1 if index % 10 == 0: bar.update(index) amount = 0 for dupe in session.query(Seen).filter(Seen.value == seen.value): amount += 1 if amount > 1: removed += 1 session.delete(dupe) bar.finish() # MIGRATE total = session.query(Seen).count() + 1 widgets = [ 'Upgrading - ', ETA(), ' ', Percentage(), ' ', Bar(left='[', right=']') ] bar = ProgressBar(widgets=widgets, maxval=total).start() index = 0 for seen in session.query(Seen).all(): index += 1 if not index % 10: bar.update(index) se = SeenEntry(u'N/A', seen.task, u'migrated') se.added = seen.added se.fields.append(SeenField(seen.field, seen.value)) session.add(se) bar.finish() session.execute('drop table seen;') session.commit()
def enumSMB(self): progBar = ProgressBar( widgets=['SMBConnection test: ', Percentage(), Bar(), ETA()], maxval=len(self.smbShareCandidates)).start() prog = 0 try: for dnsname in self.smbShareCandidates: try: # Changing default timeout as shares should respond withing 5 seconds if there is a share # and ACLs make it available to self.user with self.passwd smbconn = smbconnection.SMBConnection('\\\\' + str(dnsname), str(dnsname), timeout=5) smbconn.login(self.domuser, self.passwd) dirs = smbconn.listShares() self.smbBrowseable[str(dnsname)] = {} for share in dirs: self.smbBrowseable[str(dnsname)][str( share['shi1_netname']).rstrip('\0')] = '' try: _ = smbconn.listPath( str(share['shi1_netname']).rstrip('\0'), '*') self.smbBrowseable[str(dnsname)][str( share['shi1_netname']).rstrip('\0')] = True except (SessionError, UnicodeEncodeError, NetBIOSError): # Didnt have permission, all good # Im second guessing the below adding to the JSON file as we're only interested in the listable directories really #self.smbBrowseable[str(dnsname)][str(share['shi1_netname']).rstrip('\0')] = False continue smbconn.logoff() progBar.update(prog + 1) prog += 1 except (socket.error, NetBIOSTimeout, SessionError, NetBIOSError): # TODO: Examine why we sometimes get: # impacket.smbconnection.SessionError: SMB SessionError: STATUS_PIPE_NOT_AVAILABLE # on healthy shares. It seems to be reported with CIF shares progBar.update(prog + 1) prog += 1 continue except ValueError: # We reached end of progressbar, continue since we finish below pass progBar.finish() print('') availDirs = [] for key, value in self.smbBrowseable.items(): for _, v in value.items(): if v: availDirs.append(key) if len(self.smbShareCandidates) == 1: print( '[ ' + colored('OK', 'green') + ' ] Searched {0} share and {1} with {2} subdirectories/files is browseable by {3}' .format(len(self.smbShareCandidates), len(self.smbBrowseable.keys()), len(availDirs), self.domuser)) else: print( '[ ' + colored('OK', 'green') + ' ] Searched {0} shares and {1} with {2} subdirectories/file sare browseable by {3}' .format(len(self.smbShareCandidates), len(self.smbBrowseable.keys()), len(availDirs), self.domuser)) if len(self.smbBrowseable.keys()) > 0: with open('{0}-open-smb.json'.format(self.server), 'w') as f: json.dump(self.smbBrowseable, f, indent=4, sort_keys=False) print('[ ' + colored('OK', 'green') + ' ] Wrote browseable shares to {0}-open-smb.json'.format( self.server))
def NodeDic(results, edge_info, node_info): ''' Function takes the results of running a query, NETS edge label information, and a list of node information (list[0] contains the NETS nodes label triples, list[1] contains the contains the NETS nodes identifier triples). The function returns a list of dictionaries where list[0] contains a nested dictionary where keys are bio entity identifiers and the values are the the human readable labels and database identifiers; list[1] contains a dictionary where the bio node is the key and the value is a set of possible NETS node types for that node. :param results: json file containing the query results from endpoint :param edge_info: dictionary where the keys are the NETS edges and the values are the edge labels :param node_info: a list of node information (list[0] contains the NETS nodes label triples, list[1] contains the contains the NETS nodes identifier triples) :return: a list of dictionaries: list[0] contains a nested dictionary where keys are bio entity identifiers and the values are the the human readable labels and database identifiers; list[1] contains a dictionary where the bio node is the key and the value is a set of possible NETS node types for that node ''' print 'Start building OWL-NETs metadata dictionary' # creates a map to store NETS node type information node_type = {} # creates a map to identify which query variables represent the BIO world ID, label, and ICE ID node_labeler = {} # assign variables needed for node dictionary NETS = set([x.strip('?') for y in edge_info[0].keys() for x in y]) labels = [[re.sub('[?|"\n"]', '', x.split(' ')[0]), re.sub('[?|"\n"]', '', x.split(' ')[2])] for x in node_info[0]] ids = [[x.split(' ')[0].strip('?'), x.split(' ')[2].strip('?')] for x in node_info[1]] # initialize progress bar progress bar widgets = [Percentage(), Bar(), FormatLabel('(elapsed: %(elapsed)s)')] pbar = ProgressBar(widgets=widgets, maxval=len(NETS)) for node in pbar(NETS): node_labeler[node] = {} for res in results['results']['bindings']: node_key = str(res[node]['value']) label_value = str([x[1] for x in labels if x[0] == node][0].encode('utf8')) id_value = str([x[0] for x in ids if x[1] == node][0].encode('utf8')) # NODE TYPE: setting node type information if node_key in node_type.keys(): node_type[node_key].add(node) else: node_type[node_key] = set() node_type[node_key].add(node) # NODE METADATA: setting node attributes by NETS node type if node_key in node_labeler[node].keys(): # order matters - not using a set so that each ICE can be mapped to the label with the same index node_labeler[node][node_key]['label'].append(res[label_value]['value'].encode('utf8')) node_labeler[node][node_key]['id'].append(res[id_value]['value'].encode('utf8')) else: node_labeler[node][node_key] = {} node_labeler[node][node_key]['label'] = [res[label_value]['value'].encode('utf8')] node_labeler[node][node_key]['id'] = [res[id_value]['value'].encode('utf8')] # close progress bar pbar.finish() print 'Finished building OWL-NETs metadata dictionary' print '\n' # CHECK: verify that the counts are correct for node in NETS: res_count = set() for res in results['results']['bindings']: res_count.add(res[node]['value']) if len(node_labeler[node].keys()) != len(res_count): # verify the number of nodes in graph is correct raise ValueError('The count of results for the ' + str(node) + ' NETS node in the node dictionary differ ' 'from the query output') return node_labeler, node_type
def sample(self, nsample=1000, from_estimate=False, test=False): if (nsample == 1) and (from_estimate == False): raise ValueError('for nsample = 1, use .estimate() method!') elif (1 < nsample < 100) and (test == False): raise ValueError('need at least 100 samples!') elif (1 < nsample < 100) and (test == True): print 'not enough samples, remember to only ' + \ 'use this as a test-bed!' # increasing sample by 10% to ensure # robustness against rejected samples nsample = int(nsample) # set up a dictionary to store tables of relevant data for each spaxel res_d = {} tfcnames = [ k for k in self.flux.colnames if len(self.flux[k]) > np.isnan(self.flux[k]).sum() ] self.tfcnames = tfcnames #looping over nm measurements pbar = ProgressBar(widgets=[Percentage(), Bar(), ETA()], maxval=self.nm).start() for i in range(self.NM0, self.nm): blockPrint() galnum = self.flux['galnum'][i] fr = self.flux[i] er = self.err[i] fluxi = { k: np.random.normal(fr[k], er[k], nsample) for k in fr.colnames if ((k != 'galnum') and (~np.isnan(fr[k]))) } # set up a table for a given galnum res_d[galnum] = t.Table() # add a column for flux information for n in tfcnames: if (n != 'galnum'): if (np.isnan(self.flux[n]).sum() != len(self.flux[n])): res_d[galnum][n] = fluxi[n] res_d[galnum][n].unit = u.Unit('1e-17 erg cm^-2 s^-1') scales = ms.diagnostics(nsample, None, self.nps) with warnings.catch_warnings(): warnings.simplefilter('ignore') success = metallicity.calculation(scales, fluxi, self.nm, 'all', 1, self.logf, disp=self.verbose, dust_corr=self.dust_corr, verbose=self.verbose) if success == -1: raise ValueError('MINIMUM REQUIRED LINES: [OII]3727 ' + \ '& [OIII] + 5007, or [NII]6584, and Ha & Hb if ' + \ 'you want dereddening') for k, v in scales.mds.iteritems(): if type(v) == np.ndarray: if np.isnan(v).sum() != len(v): res_d[galnum][k] = v enablePrint() pbar.update(i) pbar.finish() self.res_d = res_d self.nsample = nsample self.Zdiags = res_d[galnum].colnames
semester_codes={'0':'INTERIM', '1':'SPRING', '6':'SUMMER', '9':'FALL'} default_pickup = {'GEN' : 'MUSME', 'BUS' :'BUS', 'MM' : 'MUSME', 'HEALTH' : 'HLTH', 'OXF' :'OXFD', 'CHEM' : 'CHEM', 'THE' : 'THEO', 'LAW' : 'LAW'} # widget for progress bar pbar_widget = [Percentage(), ' ', ETA(), Bar()] def unnone(str): return str if str is not None else '' # get notes by type for later reference so it will not take 2 hours to run 82,000 seperate queries def get_notes(type, sep='; '): query = ''' SELECT n.target_id id, IFNULL(group_concat(n.note separator %s), '') notes FROM notes n WHERE n.type = %s GROUP BY n.target_id '''
def __init__(self): self.logger = logging.getLogger(__name__) if hasattr(sys, "frozen"): resp = request.urlopen( "https://api.github.com/repos/xKynn/PathOfExileRPC/releases/latest" ) data = json.load(resp) info = win32api.GetFileVersionInfo('launcher.exe', "\\") ms = info['FileVersionMS'] ls = info['FileVersionLS'] version = "%d.%d.%d.%d" % (win32api.HIWORD(ms), win32api.LOWORD( ms), win32api.HIWORD(ls), win32api.LOWORD(ls)) latest_ver = parse_version(data['tag_name']) current_ver = parse_version(version) download_url = data["assets"][0]["browser_download_url"] if latest_ver > current_ver: print("Found a newer release, would you like to update? (y/n)") reply = input() if reply.startswith("n"): sys.exit() print("Starting Update Process") print(f"Update Notes: {data['body']}") if not os.path.isdir( os.path.join(os.path.dirname(sys.executable), 'updates')): os.mkdir( os.path.join(os.path.dirname(sys.executable), 'updates')) widgets = [ f'{data["assets"][0]["name"]}: ', Percentage(), ' ', Bar(marker=RotatingMarker()), ' ', ETA(), ' ', FileTransferSpeed() ] pbar = ProgressBar(widgets=widgets) def dl_progress(count, blockSize, totalSize): if pbar.maxval is None: pbar.maxval = totalSize pbar.start() pbar.update(min(count * blockSize, totalSize)) request.urlretrieve(download_url, os.path.join( os.path.dirname(sys.executable), 'updates', data["assets"][0]["name"]), reporthook=dl_progress) atexit.register(os.execl, "updater.exe", "updater.exe") sys.exit() try: with open('config.json') as f: js = json.load(f) except: js = {"name": "", "private": False, "sessid": ""} if not js['name']: js['name'] = input( "Please enter your path of exile account name: ") while 1: reply = input( "Is your path of exile profile private or is character tab hidden? (y/n): " ) if reply in ["y", "n"]: break if reply == "y": while 1: sessid = input("Input your POESESSID here: ") confirm = input("Confirm? (y/n)") if confirm in ["y", "n"]: if confirm == "n": continue else: break js['sessid'] = sessid js['private'] = True else: js['private'] = False if (Path().cwd() / "launcher.exe").is_file(): while 1: rep = input( "Would you like to setup PathOfExileRPC to start on startup? " "It will start in the background without a window. (y/n)" ) if rep in ["y", "n"]: break if rep == "y": user = os.getlogin() with open('launcher.vbs', 'w') as f: f.write('Set oShell = CreateObject ("Wscript.Shell")\n' 'Dim strArgs\n' f'strArgs = "{Path().cwd()}\launcher.exe"\n' 'oShell.Run strArgs, 0, false') with open('poestartup.bat', 'w') as f: f.write( f"{Path().cwd().as_posix()}/launcher.vbs\nexit") found = False p = Path( shell.SHGetFolderPath(0, shellcon.CSIDL_STARTUP, 0, 0)) print(p) if p.is_dir(): found = True if not found: print( "The startup folder could not be located, you can set this up manually by:\n" "1. Copy the newly created poestartup.bat file in this directory\n" "2. Hold down the windows key and press R, in this window type in shell:startup\n" "3. In the opened folder paste the file you copied earlier" ) else: cp('poestartup.bat', f"{p.as_posix()}/poestartup.bat") print( "Done! PathOfExileRPC will now startup when you log into windows." ) print( "Setup is done and your settings will be saved, to go through " "setup again just delete the file called config.json") with open('config.json', 'w') as f: json.dump(js, f) self.loop = asyncio.ProactorEventLoop() cookies = None if js['private']: cookies = {'POESESSID': js['sessid']} self.cl = PoeRPC(self.loop, js['name'], cookies, self.logger)
def _do_percentile(self, data, spectrograms, name): pbar = None try: from progressbar import ProgressBar, Percentage, Bar pbar = ProgressBar(widgets=[Percentage(), Bar()], maxval=len(data.track_names)*101) pbar.start() except Exception as e: pass k = 0 if config.learn_phase: y_real = [[] for _ in range(101)] y_imag = [[] for _ in range(101)] for track in sorted(data.track_names): t = data.prepare_spectrogram(spectrograms[track]) median_real = np.median(t[:, :, 0]) median_imag = np.median(t[:, :, 1]) for i in range(101): if pbar is not None: pbar.update(k) k += 1 v = np.percentile(t[:, :, 0], i) y_real[i].append(v-median_real) v = np.percentile(t[:, :, 1], i) y_imag[i].append(v-median_imag) if not os.path.exists(self.analysisPath): os.mkdir(self.analysisPath) h5f_path = os.path.join(self.analysisPath, "ir_percentile_%s.hdf5" % name) h5f = h5py.File(h5f_path, "w") h5f.create_dataset(name="real", data=y_real) h5f.create_dataset(name="imag", data=y_imag) h5f.close() plt.figure(figsize=(15, 15)) plt.subplot(211) result = plt.boxplot(y_real, labels=range(101)) print([l.get_ydata()[0] for l in result["medians"]]) plt.xticks(rotation=90) plt.title("Real") plt.xlabel("percentile") plt.ylabel("difference from median") plt.subplot(212) result = plt.boxplot(y_imag, labels=range(101)) print([l.get_ydata()[0] for l in result["medians"]]) plt.xticks(rotation=90) plt.title("Imag") plt.xlabel("percentile") plt.ylabel("difference from median") if not os.path.exists(self.analysisPath): os.mkdir(self.analysisPath) plt.savefig(os.path.join(self.analysisPath, "percentile_%s_ir.png" % name)) plt.close() else: y = [[] for _ in range(101)] for track in data.track_names: t = data.prepare_spectrogram(spectrograms[track]) median = np.median(t) for i in range(101): if pbar is not None: pbar.update(k) k += 1 v = np.percentile(t, i) y[i].append(v-median) if not os.path.exists(self.analysisPath): os.mkdir(self.analysisPath) h5f_path = os.path.join(self.analysisPath, "amp_percentile_%s.hdf5" % name) h5f = h5py.File(h5f_path, "w") h5f.create_dataset(name="value", data=y) h5f.close() plt.figure(figsize=(15, 15)) result = plt.boxplot(y, labels=range(101)) print([l.get_ydata()[0] for l in result["medians"]]) plt.xticks(rotation=90) plt.title("Amplitude") plt.xlabel("percentile") plt.ylabel("difference from median") if not os.path.exists(self.analysisPath): os.mkdir(self.analysisPath) plt.savefig(os.path.join(self.analysisPath, "percentile_%s_amplitude.png" % name)) plt.close()
def main(): transferLearning = False fineTuning = False cuda = True model_save_interval = 1000 image_save_interval = 1000 update_interval = 85 log_interval = 100 testingAccuracyList = [] trainingAccuracyList = [] trainingLossList = [] testingLossList = [] modelNumList = [] # epoch_size = 30 batch_size = 50 result_path = "transfer2_toxicity_classifier_results" model_path = "transfer2_toxicity_classifier" # saved_model_path = "toxicity_classifier_models" saved_dis_A = "model_dis-14.0" # unload the data files train, trainLabels, test, testLabels, feature, featureLabels = get_data() np.save("train", train) np.save("trainLabels", trainLabels) np.save("test", test) np.save("testLabels", testLabels) np.save("feature", feature) np.save("featureLabels", featureLabels) train = Variable(torch.FloatTensor(train)) trainLabels = Variable(torch.FloatTensor(trainLabels)) test = Variable(torch.FloatTensor(test)) testLabels = Variable(torch.FloatTensor(testLabels)) feature = Variable(torch.FloatTensor(feature)) featureLabels = Variable(torch.FloatTensor(featureLabels)) # Initialize Learning Network discriminator = Discriminator() if transferLearning or fineTuning: device = None # saved_dis_A_path = os.path.join(saved_model_path, saved_dis_A) if not cuda: device = torch.device('cpu') dis_A_state_dict = torch.load("transfer1_model_dis-14.0", map_location="cpu") else: device = torch.device('cuda') dis_A_state_dict = torch.load("transfer1_model_dis-14.0") # obtain the state dictionary of a previously trained model discriminator.load_state_dict(dis_A_state_dict, strict=False) # send dictionary to device discriminator.to(device) # Enable GPUs if cuda: train = train.cuda() test = test.cuda() feature = feature.cuda() discriminator = discriminator.cuda() data_size = len(train) n_batches = (data_size // batch_size) # Set up loss function dis_criterion = nn.BCELoss() # Obtain parameters to pass to optimiser dis_params = discriminator.parameters() # Setting up gradient descent (optimiser, using the Adam algorithm) optim_dis = optim.Adam(dis_params, lr=0.000005, betas=(0.5, 0.999), weight_decay=0.000007) iters = 0 for epoch in range(epoch_size): # Shuffle the order of all the data train, trainLabels = shuffle_data(train, trainLabels) # Progression bar widgets = ['epoch #%d|' % epoch, Percentage(), Bar(), ETA()] pbar = ProgressBar(maxval=n_batches, widgets=widgets) pbar.start() # for each batch for i in range(n_batches - 1): pbar.update(i) # Reset gradients discriminator.zero_grad() # Get the batches batch, batchLabels = getBatch( train, trainLabels, i, batch_size ) # This returns a batch of dimension batch_size, in_chanels, height, width (30,1,25,8) # Enable GPUs if cuda: batch = batch.cuda() batchLabels.cuda() trainingClassifications = discriminator( batch, epoch) # How well does the real A image fit the A domain? trainingLoss = get_dis_loss(trainingClassifications, batchLabels, dis_criterion, cuda) # UPDATE EDGES BASED ON LOSSES ***************************************************** trainingLoss.backward() optim_dis.step() if iters % log_interval == 0: if cuda: test = test.cuda() testLabels = testLabels.cuda() startIndex, stopIndex = getStartStop(testLabels) testingClassifications = discriminator( test[startIndex:stopIndex], 0) testingLoss = get_dis_loss(testingClassifications, testLabels[startIndex:stopIndex], dis_criterion, cuda) testingAccuracy = getAccuracy(testingClassifications, testLabels[startIndex:stopIndex]) modelNum = iters / model_save_interval print() print("---------------------") print("Model Number: " + str(modelNum)) modelNumList.append(modelNum) print("Training Loss:", as_np(trainingLoss.mean())) trainingLossList.append(as_np(trainingLoss.mean())) print("Training Accuracy:", getAccuracy(trainingClassifications, batchLabels)) trainingAccuracyList.append( getAccuracy(trainingClassifications, batchLabels)) print("Testing Loss:", as_np(testingLoss.mean())) testingLossList.append(as_np(testingLoss.mean())) print("Testing Accuracy: ", testingAccuracy) testingAccuracyList.append(testingAccuracy) # save models at the save interval if iters % model_save_interval == 0: # if os.path.exists(model_subdir_path): # pass # else: # os.makedirs(model_subdir_path) torch.save( discriminator.state_dict(), os.path.join('transfer2_model_dis-' + str(iters / model_save_interval))) iters += 1 print("assigningDictionary") dictionary = { "TrainingLoss": trainingLossList, "TrainingAccuracy": trainingAccuracyList, "TestingLoss": testingLossList, "TestingAccuracy": testingAccuracyList } print(dictionary) import pickle outFile = open("transferDataDict.pickle", "wb") pickle.dump(dictionary, outFile) df = pd.DataFrame(dictionary) print(df) df.plot.line()