def run(): print("Running @", datetime.now()) print("Downloading data..") collection.run() print("Aggregating data..") aggregation.run() print("Analyzing data..") analysis.run() print("Creating plots..") visualization.run() print("Done!")
def run(i,j): start = time.time() if i == 9 and j == 9: setcurrent = analysis.run([],[],[],None) else: setcurrent = analysis.run([],[],[],None, pos_tagged = han.POS_TAGGED[i:j], results = han.RESULTS[i:j]) end = time.time() print("Time: %d" %(end - start)) return setcurrent
def run_commandline_program(sys_argv, current_version, latest_version): def print_help(): print( 'There are two options for running PySoar from the commandline:\n' '1. `python main_python` for GUI\n' '2. `python main_pysoar [url]` - where [url] is the daily competition url' ) def status_handle(message): print(message) def download_handle(new, total=None): if total is not None: analysis_str = 'Downloaded: %s/%s' % (new, total) else: analysis_str = 'Downloaded: %s' % new print(analysis_str) def on_success(): print('Analysis complete') def on_failure(msg): print('Error: %s' % msg) def analysis_handle(new, total=None): if total is not None: analysis_str = 'Analyzed: %s/%s' % (new, total) else: analysis_str = 'Analyzed: %s' % new print(analysis_str) if latest_version and latest_version.lstrip('v') != current_version: print('Latest version is %s! Current: %s' % (latest_version, current_version)) if len(sys_argv) == 2: if sys_argv[1] == '--help': print_help() else: url = sys_argv[1] if url_format_correct(url, status_handle): source = get_url_source(url) run(url, source, download_progress=download_handle, analysis_progress=analysis_handle, on_success=on_success, on_failure=on_failure) else: print_help()
def hello_world(text): #param = request.args.get('text') param=json.loads(text) print param # fc=subprocess.check_output('/usr/bin/python analysis.py ' + param['text'],shell=True) fc=analysis.run(param['text']) # print fc return fc #input
def hello_world(text): #param = request.args.get('text') param = json.loads(text) print param # fc=subprocess.check_output('/usr/bin/python analysis.py ' + param['text'],shell=True) fc = analysis.run(param['text']) # print fc return fc #input
def compare_feature_selectors(p): feature_selectors = [ SymmetricalUncertainty(), Relief(), SVM_RFE(percentage_features_to_select=p), ] e_methods = [ ensemble_methods.Mean(data_set_feature_selectors=feature_selectors), ensemble_methods.MeanNormalizedSum(data_set_feature_selectors=feature_selectors), ensemble_methods.MeanWithClassifier(data_set_feature_selectors=feature_selectors, classifiers=analysis.default_classifiers), ] fs = feature_selectors + [LassoFeatureSelector(), Random()] + e_methods data_sets = ["colon", "arcene", "dexter", "gisette"] # analysis.artificial(fs, jaccard_percentage=p) analysis.run(data_sets, fs, jaccard_percentage=p)
def do_analysis(): """Call the analysis and log meta information.""" # check what data files are available data_dir = flask.request.form['datadir'] data_files = glob.glob(data_dir + "*") print("found those datafiles:") for file in data_files: print('- ' + file) print('perform analysis {} ...'.format(flask.request.form['analysis'])) result = { "id": flask.request.form['id'], "time_start_wall": f"{datetime.datetime.now():%Y-%m-%d %H:%M:%S}", "URL": flask.request.form['url'], "domain": flask.request.form['domain'], "analysis": flask.request.form['analysis'], "info": "Done", } # run the analysis wall_start = time.time() cpu_start = time.process_time() result['result'] = analysis.run(data_dir) wall_end = time.time() cpu_end = time.process_time() result['time_duration_wall'] = wall_end - wall_start result['time_duration_cpu'] = cpu_end - cpu_start result['time_end_wall'] = f"{datetime.datetime.now():%Y-%m-%d %H:%M:%S}" # save results outfilepath = flask.request.form['outfile'] print('Storing results in ' + outfilepath) with open(outfilepath, 'w') as outfile: json.dump(result, outfile) # return something return 'Finished analysis: ' + flask.request.form['id']
def analyze(args): keys = [ 'company', ] lost_keys = get_lost_keys(args, keys) if lost_keys: return { 'respCode': '9999', 'respMsg': '缺少参数: %s' % ' '.join(lost_keys) } try: company = args['company'] except Exception as e: return { 'respCode': '9999', 'respMsg': '数据类型错误: %s' % str(e), 'sample_args': { 'company': 'aitai', } # 后端传递入参都是字符, 需要检查数据类型 } data = analysis.run(company) res = {'respCode': '0000', 'respMsg': 'success', 'data': {'data': data}} return res
def fig_Pcolor(outfile='fig_Pcolor.png'): """ Args: outfile: Returns: """ set_mplrc() # Load f_mL f_mL = analysis.load_f_mL() frb_pre = analysis.get_candidates() # Init prior prior = associate_defs.adopted.copy() # Plot plt.figure(figsize=(7, 7)) # Bayesian + parse _, model_mags, model_theta, max_PMix = analysis.run(prior, frb_pre=frb_pre) #bins_L = np.linspace(0.01, 10., 20) bins_L = np.linspace(-2., 1, 15) #bins_L = np.linspace(0.1, 2., 20) #bins_L = np.linspace(16., 25., 20) # Testing only # High confidence high_conf_Mstar, high_conf_ur = [], [] for frbA in frb_pre.frbA: if np.max(frbA.candidates.P_Ox) > associate_defs.POx_secure: # This is klunky hg = frbA.frb.grab_host() if 'z_spec' not in hg.redshift.keys(): continue imax = np.argmax(frbA.candidates.P_Ox) # u-r if 'u-r' not in hg.derived.keys(): continue ur = hg.derived['u-r'] Mstar = hg.derived['Mstar'] # Save high_conf_Mstar.append(np.log10(Mstar)) high_conf_ur.append(ur) high_conf_Mstar = np.array(high_conf_Mstar) high_conf_ur = np.array(high_conf_ur) # Plot m_r vs. z df = pandas.DataFrame(dict(Mstar=high_conf_Mstar, ur=high_conf_ur)) jg = sns.jointplot(data=df, x='Mstar', y='ur') jg.ax_marg_x.set_xlim(8, 10.5) jg.ax_marg_y.set_ylim(0.5, 2.0) jg.ax_joint.set_xlabel(r'$\log_{10} \, (M*/M_\odot)$') jg.ax_joint.set_ylabel(r'$u-r$') jg.ax_joint.minorticks_on() jg.ax_joint.yaxis.set_major_locator(plt.MultipleLocator(0.5)) jg.ax_joint.xaxis.set_major_locator(plt.MultipleLocator(1.0)) #jg.ax_joint.yaxis.set_major_formatter(FormatStrFormatter(r'$%.3f$')) # Font size set_fontsize(jg.ax_joint, 15.) # End plt.tight_layout(pad=0.2, h_pad=0., w_pad=0.1) print('Writing {:s}'.format(outfile)) kwargs = {} if 'png' in outfile: kwargs['dpi'] = 700 plt.savefig(outfile, **kwargs) plt.close()
def fig_PL(outfile='fig_PL.png'): """ Args: outfile: Returns: """ set_mplrc() sns.set_theme() sns.set_style('whitegrid') sns.set_context('paper') # Load f_mL f_mL = analysis.load_f_mL() frb_pre = analysis.get_candidates() # Init prior prior = associate_defs.adopted.copy() # Plot plt.figure(figsize=(4, 8)) gs = gridspec.GridSpec(2, 1) # Bayesian + parse _, model_mags, model_theta, max_PMix = analysis.run(prior, frb_pre=frb_pre) #bins_L = np.linspace(0.01, 10., 20) bins_L = np.linspace(-2., 1, 15) #bins_L = np.linspace(0.1, 2., 20) #bins_L = np.linspace(16., 25., 20) # Testing only # High confidence high_conf_L, high_conf_z, high_conf_mr = [], [], [] for frbA in frb_pre.frbA: if np.max(frbA.candidates.P_Ox) > associate_defs.POx_secure: # This is klunky hg = frbA.frb.grab_host() if 'z_spec' not in hg.redshift.keys(): continue imax = np.argmax(frbA.candidates.P_Ox) # Magnitude m = frbA.candidates.iloc[imax][frbA.filter] # m_r(L*) m_r_Lstar = float(f_mL(frbA.frb.z)) # Now magnitude time log10_Lstar = (m_r_Lstar - m) / 2.5 print('FRB, z, m, m_L*, log10_L: ', frbA.frb.frb_name, frbA.frb.z, m, m_r_Lstar, log10_Lstar) # Save high_conf_L.append(log10_Lstar) high_conf_z.append(frbA.frb.z) high_conf_mr.append(m) high_conf_L = np.array(high_conf_L) high_conf_z = np.array(high_conf_z) high_conf_mr = np.array(high_conf_mr) # Plot m_r vs. z ax = plt.subplot(gs[0]) sns.scatterplot(x=high_conf_z, y=high_conf_mr, ax=ax) ax.set_xlabel(r'$z$') ax.set_ylabel(r'$m_r$') # Add a line zs = np.linspace(0.02, 0.5, 100) mrss = f_mL(zs) ax.plot(zs, mrss, 'k--') fsz = 15. set_fontsize(ax, fsz) # Plot ax = plt.subplot(gs[1]) weights = np.ones_like(high_conf_L) / high_conf_L.size lbl = r'$P(O_i) > ' + '{}'.format(associate_defs.POx_secure) + '$ FRBs' ax.hist(high_conf_L, bins=bins_L, weights=weights, color='b', label=lbl, histtype='stepfilled') # Stats print("Median L/L* = {}".format(np.median(10**high_conf_L))) print("RMS log(L/L*) = {}".format(np.std(high_conf_L))) # Label me ax.set_xlabel(r'$\log_{10} \, (L/L*)$') ax.set_ylabel('PDF') ax.xaxis.set_major_locator(plt.MultipleLocator(1.)) #ax.xaxis.set_major_formatter(FormatStrFormatter(r'$%.3f$')) ax.set_ylim(0., 0.3) # Legend legend = ax.legend(loc='upper right', scatterpoints=1, borderpad=0.2, handletextpad=handletextpad, fontsize=13.) # Font size set_fontsize(ax, fsz) # End plt.tight_layout(pad=0.2, h_pad=0., w_pad=0.1) print('Writing {:s}'.format(outfile)) kwargs = {} if 'png' in outfile: kwargs['dpi'] = 700 plt.savefig(outfile, **kwargs) plt.close()
def fig_mag_vs_DM(outfile='fig_mag_vs_DM.png'): """ mini Maquart relation Args: outfile: Returns: """ set_mplrc() # Load f_mL f_mL = analysis.load_f_mL() # Init prior prior = associate_defs.adopted.copy() # Plot plt.figure(figsize=(8, 5)) gs = gridspec.GridSpec(1, 1) cm = plt.get_cmap('jet') # Bayesian + parse frbA_tbl, model_mags, model_theta, max_PMix = analysis.run(prior) # Colors N = len(frbA_tbl) + 1 plt.rcParams["axes.prop_cycle"] = plt.cycler("color", cm(np.linspace(0, 1, N))) # Plot -- must come after colors ax = plt.subplot(gs[0]) # Plot em Pmin = 0.01 unit_size = 150. order = [0, 1, 6, 3, 2, 4, 5, 7, 8, 9, 10, 11, 12] DMcosmic = [ frb_row.frbA.frb.DM.value - frb_row.frbA.frb.DMISM.value - 100 for _, frb_row in frbA_tbl.iterrows() ] order = np.argsort(DMcosmic) #for _, frb_row in frbA_tbl.iterrows(): for ss in order: # Restrict to candiates with min frb_row = frbA_tbl.iloc[ss] ok_c = frb_row.frbA.candidates.P_Ox > Pmin N_c = np.sum(ok_c) # Scatter plot em ax.scatter( [frb_row.frbA.frb.DM.value - frb_row.frbA.frb.DMISM.value - 100] * N_c, frb_row.frbA.candidates[ok_c][frb_row.frbA.filter], label=frb_row.frb, edgecolor='darkslategrey', marker='o', s=unit_size * frb_row.frbA.candidates[ok_c].P_Ox, ) # Fiducial relation L_Lstar = 0.270 # Grabbed from PL numL = int(np.round(1. / L_Lstar)) log10_L_Lstar = np.log10(L_Lstar) std_log10_L_Lstar = 0.47 # Grabbed from fig_PL # Load up DM_cosmic dm_cosmic, z = igm.average_DM(1., cumul=True) f_DMz = interp1d(z, dm_cosmic.value) zval = np.linspace(0.02, 1., 100) DMs = f_DMz(zval) mLstar = f_mL(zval) m_FRB = mLstar - 2.5 * log10_L_Lstar ax.plot(DMs, m_FRB, 'k-', label=r'$L=L*/${}'.format(numL)) # Label me ax.set_xlabel( r'DM$_{\rm cosmic} \equiv$ DM$_{\rm FRB}$ - DM$_{\rm ISM}$ - 100') ax.set_ylabel(r'$m_r$') #ax.xaxis.set_major_locator(plt.MultipleLocator(1.)) #ax.xaxis.set_major_formatter(FormatStrFormatter(r'$%.3f$')) ax.set_xlim(0., 800.) # Legend legend = ax.legend(loc='lower right', scatterpoints=1, borderpad=0.0, handletextpad=handletextpad, fontsize=10.) # Font size set_fontsize(ax, 15.) # End plt.tight_layout(pad=0.2, h_pad=0., w_pad=0.1) print('Writing {:s}'.format(outfile)) kwargs = {} if 'png' in outfile: kwargs['dpi'] = 700 plt.savefig(outfile, **kwargs) plt.close()
def execfile(): channel_id = e1.get() from analysis import run an = run(channel_id) an.rest()
def fig_Phalflight(outfile='fig_Phalflight.png'): """ Args: outfile: Returns: """ set_mplrc() # Load f_mL f_mL = analysis.load_f_mL() frb_pre = analysis.get_candidates() # Init prior prior = associate_defs.adopted.copy() # Plot plt.figure(figsize=(7, 7)) # Bayesian + parse _, model_mags, model_theta, max_PMix = analysis.run(prior, frb_pre=frb_pre) #bins_L = np.linspace(0.01, 10., 20) bins_L = np.linspace(-2., 1, 15) #bins_L = np.linspace(0.1, 2., 20) #bins_L = np.linspace(16., 25., 20) # Testing only # High confidence high_conf_z, high_conf_hl = [], [] for frbA in frb_pre.frbA: if np.max(frbA.candidates.P_Ox) > associate_defs.POx_secure: # Half light imax = np.argmax(frbA.candidates.P_Ox) # Save high_conf_z.append(frbA.frb.z) high_conf_hl.append(frbA.candidates.iloc[imax].half_light) high_conf_z = np.array(high_conf_z) high_conf_hl = np.array(high_conf_hl) # Plot m_r vs. z df = pandas.DataFrame(dict(z=high_conf_z, hl=high_conf_hl)) jg = sns.jointplot(data=df, x='z', y='hl') jg.ax_marg_x.set_xlim(0, 0.6) #jg.ax_marg_y.set_ylim(0.5, 2.5) jg.ax_joint.set_xlabel(r'$z$') jg.ax_joint.set_ylabel(chalf + ' (arcsec)') #jg.ax_joint.yaxis.set_major_locator(plt.MultipleLocator(0.5)) #jg.ax_joint.yaxis.set_major_formatter(FormatStrFormatter(r'$%.3f$')) # Font size #set_fontsize(ax, 13.) # End plt.tight_layout(pad=0.2, h_pad=0., w_pad=0.1) print('Writing {:s}'.format(outfile)) kwargs = {} if 'png' in outfile: kwargs['dpi'] = 700 plt.savefig(outfile, **kwargs) plt.close()
def combinations(): fs = [ SymmetricalUncertainty(), Relief(), SVM_RFE(), LassoFeatureSelector(), ] e_methods = [ ensemble_methods.Mean(data_set_feature_selectors=fs), ensemble_methods.Influence(data_set_feature_selectors=fs), ensemble_methods.MeanNormalizedSum(data_set_feature_selectors=fs), ensemble_methods.MeanWithClassifier( data_set_feature_selectors=fs, classifiers=analysis.default_classifiers ), ensemble_methods.InfluenceWithClassifier( data_set_feature_selectors=fs, classifiers=analysis.default_classifiers ), ensemble_methods.MeanNormWithClassifier( data_set_feature_selectors=fs, classifiers=analysis.default_classifiers ), ] for comb in itertools.combinations(list(range(4)), 3): comb_fs = [fs[i] for i in comb] e_methods.extend([ ensemble_methods.Mean(data_set_feature_selectors=comb_fs), ensemble_methods.Influence(data_set_feature_selectors=comb_fs), ensemble_methods.MeanNormalizedSum(data_set_feature_selectors=comb_fs), ensemble_methods.MeanWithClassifier( data_set_feature_selectors=comb_fs, classifiers=analysis.default_classifiers ), ensemble_methods.InfluenceWithClassifier( data_set_feature_selectors=comb_fs, classifiers=analysis.default_classifiers ), ensemble_methods.MeanNormWithClassifier( data_set_feature_selectors=comb_fs, classifiers=analysis.default_classifiers ), ]) for comb in itertools.combinations(list(range(4)), 2): comb_fs = [fs[i] for i in comb] e_methods.extend([ ensemble_methods.Mean(data_set_feature_selectors=comb_fs), ensemble_methods.Influence(data_set_feature_selectors=comb_fs), ensemble_methods.MeanNormalizedSum(data_set_feature_selectors=comb_fs), ensemble_methods.MeanWithClassifier( data_set_feature_selectors=comb_fs, classifiers=analysis.default_classifiers ), ensemble_methods.InfluenceWithClassifier( data_set_feature_selectors=comb_fs, classifiers=analysis.default_classifiers ), ensemble_methods.MeanNormWithClassifier( data_set_feature_selectors=comb_fs, classifiers=analysis.default_classifiers ), ]) data_sets = ["artificial", "colon", "arcene", "dexter", "gisette"] analysis.run(data_sets, fs + e_methods, prefix="combinations")
# # START # answer = "n" # input("Start Generate Data? (y/n)") if (answer.lower() == "y" or 'generate' in sys.argv): import generate if ('test' in sys.argv): generate.test() else: generate.run() answer = "n" # input("Start Prepare Data? (y/n)") if (answer.lower() == "y" or 'prepare' in sys.argv): import prepare if ('analyze' in sys.argv): prepare.analyze() else: prepare.run() answer = "n" # input("Start Training? (y/n)") if (answer.lower() == "y" or 'train' in sys.argv): import train train.run() answer = "n" #input("Start Visualize? (y/n)") if (answer.lower() == "y" or 'visualize' in sys.argv): import analysis analysis.run()
def main(analysis_type, command_variables, threshold): pdb_dir = fopen( "D:\\xampp\\htdocs\\python\\SSP\\absolute_path_of_pdb_files")[0].strip( ) ## EDIT dssp_dir = None #dssp_dir = "..\\pdb_files\\dsspout\\" #pdb_dir = "..\\pdb_files\\" x = Workbook() x.add_sheet("X-RAY") x.add_sheet("X-RAY 2") x.add_sheet("X-RAY 3") x.add_sheet("NMR averaged model") x.add_sheet("NMR multiple models 1") x.add_sheet("NMR multiple models 2") x.add_sheet("Other") date_ = time.strftime("%d%b%Y_%H%M%S") write_path = "D:\\xampp\\htdocs\\python\\Analyses\\" + str( int(random.random() * 100000000)) + "_" + date_ + ".xls" inA = 0 if len(command_variables) == 2: if command_variables[1] == '-A': date_ = time.strftime("%d_%b_%Y") write_path = "D:\\xampp\\htdocs\\python\\Analyses\\disulfide_analysis_" + date_ + ".xls" command_variables.pop(1) inA = 1 ## EDIT #x.save(write_path) #x.save("..\\Analyses\\test.xls") # file pdb1a5n is missing, possibly because 1a5n does not contain ssbonds (correct) #"pdb1tdy.ent","pdb1a5n.ent","pdb1cb6.ent","pdb2ac5.ent" #for file in [pdb_dir+name for name in ["pdb1kdg.ent", "pdb1kdk.ent", "pdb1kdm.ent", "pdb1kdq.ent", "pdb1kdu.ent", "pdb1kdv.ent", "pdb1kdy.ent", "pdb1kdz.ent", "pdb1ke1.ent", "pdb1ke2.ent", "pdb1keb.ent", "pdb1keg.ent", "pdb1kek.ent", "pdb1kel.ent", "pdb1kem.ent", "pdb1ken.ent", "pdb1keo.ent", "pdb1kex.ent", "pdb1kf2.ent", "pdb1kf3.ent", "pdb1kf4.ent", "pdb1kf5.ent", "pdb1kf7.ent"]]: # run(file,x) #try: # open("..\\Analyses\\basic_analysis_"+time.strftime("%a_%d_%b_%Y")+".xls","r") #except IOError: #for file in os.listdir(pdb_dir): #for file in ["pdb1axi.ent"]: # run(pdb_dir+file,x) #n = len(os.listdir(pdb_dir)) erfile = open("D:\\xampp\\htdocs\\python\\Errors\\error_" + analysis_type + "_" + date_ + ".log", "w") ## EDIT runlist = None #check command parameters if len(command_variables) < 2: #command_variables.pop(0) #run_list = [] run_list = os.listdir(pdb_dir) run_list.remove("dsspout") elif len(command_variables) >= 2: command_variables.pop(0) run_list = [] for i in command_variables: if i.find("\\") == -1: i = ".\\" + i if i.lower().find('.ent') != -1 or i.lower().find('.pdb') != -1: if not os.path.isfile(i): erfile.write("The file " + i + " was not found.") sys.exit(1) run_list.append(i) pdb_dir = "" dssp_dir = pdb_dir + os.path.dirname(i) + "\\dsspout" else: run_list = [] run_list = command_variables[:] print "Checking DSSP files... " if dssp_dir == None: dssp_dir = pdb_dir + os.path.dirname(run_list[0]) + "\\dsspout" if not os.path.isdir(dssp_dir): dssplist = [] else: dssplist = os.listdir(dssp_dir) newlist = {} for j in dssplist: newlist[j.split(".")[0].split("_")[0]] = 1 print len(newlist) print len(run_list) for i in run_list: found = 0 print i[:-3].split("\\")[-1].rstrip(".") if newlist.has_key(i[:-3].split("\\")[-1].rstrip(".")): found = 1 if found == 0: if not os.path.isdir(pdb_dir + os.path.dirname(i) + "\\dsspout"): os.mkdir(pdb_dir + os.path.dirname(i) + "\\dsspout") outpath = pdb_dir + os.path.dirname(i) + "\\dsspout" + "\\" erfile.write("python create_dssp.py " + pdb_dir + os.path.dirname(i) + "\\" + os.path.basename(i) + " " + outpath) os.system("python D:\\xampp\htdocs\python\SSP\create_dssp.py " + pdb_dir + os.path.dirname(i) + "\\" + os.path.basename(i) + " " + outpath) print "CREATE " print "Done" # print "Checking DSSP files... " # for i in run_list: # dssp_dir = pdb_dir+os.path.dirname(i)+"\\dsspout" # if not os.path.isdir(dssp_dir): # dssplist = [] # else: # dssplist = os.listdir(dssp_dir) # found = 0 # for j in dssplist: # if j.find(i[:-3].split("\\")[-1]) != -1: # found = 1 # break; # if found == 0: # if not os.path.isdir(pdb_dir+os.path.dirname(i)+"\\dsspout"): # os.mkdir(pdb_dir+os.path.dirname(i)+"\\dsspout") # os.system("D:\\xampp\\htdocs\\python\\SSP\\dsspcmbi.exe "+pdb_dir+os.path.dirname(i)+"\\"+os.path.basename(i)+" > "+pdb_dir+os.path.dirname(i)+"\\dsspout\\"+i[:-3].split("\\")[-1]+"dssp") ## EDIT # print "DSSP -> "+pdb_dir+os.path.basename(i) # print "Done" #else: # try: # run_list = [z.strip() for z in fopen("new_files_downloaded_on_"+command_variables[1])] # except IOError: # tag = "new_files_downloaded_on_" # print "The file \""+tag+command_variables[1]+"\" does not exist. Possible options are:" # for line in [z for z in os.listdir(".") if z[:24] == tag]: # print line # sys.exit(1) ### ###temp # import string # run_list = [z.replace(".dssp",".ent") for z in open("dssplist","r").read().split("\n") if z != ""] # aer = run_list.index("pdb2gi7.ent") # run_list = run_list[aer:] # print run_list # run_list = ["pdb2gi7.ent"] ### #histogram - for analysing relationships between secondary structures and bond types hist_ = {} #run with specified command parameters in run list # run_list = ["pdb1gpq.ent","pdb1gps.ent","pdb1gpt.ent","pdb1gpz.ent","pdb1gqb.ent","pdb1gqr.ent","pdb1gqs.ent","pdb1gqv.ent","pdb1gqz.ent","pdb1gr2.ent","pdb1gra.ent","pdb1grn.ent","pdb1grt.ent","pdb1gsk.ent","pdb1gsm.ent","pdb1gsn.ent","pdb1gsp.ent","pdb1gt6.ent","pdb1gtp.ent","pdb1gts.ent","pdb1gtt.ent","pdb1gu2.ent","pdb1gu3.ent","pdb1guj.ent","pdb1gur.ent","pdb1guv.ent","pdb1gv7.ent","pdb1gv8.ent","pdb1gv9.ent","pdb1gvc.ent","pdb1gvk.ent","pdb1gvl.ent","pdb1gvt.ent","pdb1gvu.ent","pdb1gvv.ent","pdb1gvw.ent","pdb1gvx.ent","pdb1gvz.ent","pdb1gw0.ent","pdb1gw2.ent","pdb1gwa.ent","pdb1gwb.ent","pdb1gwd.ent","pdb1gwn.ent","pdb1gwo.ent","pdb1gwt.ent","pdb1gwu.ent","pdb1gx2.ent","pdb1gx8.ent","pdb1gx9.ent","pdb1gxa.ent","pdb1gxd.ent","pdb1gxs.ent","pdb1gxv.ent","pdb1gxx.ent","pdb1gxy.ent","pdb1gxz.ent","pdb1gy0.ent","pdb1gyc.ent","pdb1gyd.ent","pdb1gye.ent","pdb1gyh.ent","pdb1gyo.ent","pdb1gz1.ent","pdb1gz2.ent","pdb1gz7.ent","pdb1gza.ent","pdb1gzb.ent","pdb1gzj.ent","pdb1gzm.ent","pdb1gzp.ent","pdb1gzq.ent","pdb1gzr.ent","pdb1gzy.ent","pdb1gzz.ent","pdb1h02.ent","pdb1h03.ent","pdb1h04.ent","pdb1h0b.ent","pdb1h0d.ent","pdb1h0g.ent","pdb1h0h.ent","pdb1h0i.ent","pdb1h0j.ent","pdb1h0l.ent","pdb1h0z.ent","pdb1h12.ent","pdb1h13.ent","pdb1h14.ent","pdb1h15.ent","pdb1h1b.ent","pdb1h1h.ent","pdb1h1n.ent","pdb1h20.ent","pdb1h22.ent","pdb1h23.ent","pdb1h2b.ent","pdb1h2p.ent","pdb1h2q.ent","pdb1h30.ent","pdb1h34.ent","pdb1h3j.ent","pdb1h3p.ent","pdb1h3t.ent","pdb1h3u.ent","pdb1h3v.ent","pdb1h3w.ent","pdb1h3x.ent","pdb1h3y.ent","pdb1h43.ent","pdb1h44.ent","pdb1h45.ent","pdb1h46.ent","pdb1h49.ent","pdb1h4i.ent","pdb1h4j.ent","pdb1h4p.ent","pdb1h4u.ent","pdb1h4w.ent","pdb1h52.ent","pdb1h53.ent","pdb1h55.ent","pdb1h57.ent","pdb1h58.ent","pdb1h59.ent","pdb1h5a.ent","pdb1h5b.ent","pdb1h5c.ent","pdb1h5d.ent","pdb1h5e.ent","pdb1h5f.ent","pdb1h5g.ent","pdb1h5h.ent","pdb1h5i.ent","pdb1h5j.ent","pdb1h5k.ent","pdb1h5l.ent","pdb1h5m.ent","pdb1h5o.ent","pdb1h5x.ent","pdb1h6m.ent","pdb1h6r.ent","pdb1h6v.ent","pdb1h75.ent","pdb1h76.ent","pdb1h7l.ent","pdb1h7q.ent","pdb1h80.ent","pdb1h81.ent","pdb1h82.ent","pdb1h83.ent","pdb1h84.ent","pdb1h86.ent","pdb1h87.ent","pdb1h8d.ent","pdb1h8i.ent","pdb1h8l.ent","pdb1h8n.ent","pdb1h8o.ent","pdb1h8p.ent","pdb1h8s.ent","pdb1h8u.ent","pdb1h8v.ent","pdb1h8x.ent","pdb1h8y.ent","pdb1h8z.ent","pdb1h91.ent","pdb1h9h.ent","pdb1h9i.ent","pdb1h9l.ent","pdb1h9v.ent","pdb1h9z.ent","pdb1ha0.ent","pdb1ha2.ent","pdb1ha6.ent","pdb1ha8.ent","pdb1ha9.ent","pdb1haa.ent","pdb1hae.ent","pdb1haf.ent","pdb1hag.ent","pdb1hah.ent","pdb1hai.ent","pdb1haj.ent"] n = len(run_list) typedict = {} for type_ in ["lys_arg", "his", "asp_glu", "trp", "phe", "tyr"]: typedict[type_] = (analysis_type == type_) timestart = time.ctime() for i, file in enumerate(run_list): #for i,file in enumerate(["pdb1w4y.ent"]): # print type(hist_) print pdb_dir + file hist_ = analysis.run(pdb_dir + file, x, erfile, dist=threshold, histogram=hist_, lys_arg=typedict["lys_arg"], his=typedict["his"], asp_glu=typedict["asp_glu"], trp=typedict["trp"], phe=typedict["phe"], tyr=typedict["tyr"]) m = int((i + 1) / float(n) * 10000) / 100.0 print str(m) + "% complete" timefinish1 = time.ctime() print "\nResults file is being saved. Please do not close the program window.\n" ################################################################## ##The histogram bit def histcmp(a, b): return (hist_[a] < hist_[b]) * 2 - 1 #fffff = open("histogram_","w") #hkeys = hist_.keys()[:] #hkeys.sort(histcmp) #x.add_sheet("Histogram") #histosheet = x.get_sheet(4) #histosheet.write(0,0,"Secondary structure 1",style0) #histosheet.write(0,1,"Secondary structure 2",style0) #histosheet.write(0,2,"Disulfide Bond type",style0) #histosheet.write(0,3,"Count",style0) #for row,h in enumerate(hkeys): # fffff.write(str(h)+" "*(21-len(h))+str(hist_[h])+"\n") # histosheet.write(row+1,0,h[0]) # histosheet.write(row+1,1,h[1]) # histosheet.write(row+1,2,h[2:]) # histosheet.write(row+1,3,hist_[h]) #fffff.close() ################################################################## realactive = x.get_active_sheet() firstactive = 6 sheet = x.get_sheet(0) if sheet.row(0).get_str_count() > 0: firstactive = 0 sheet.set_first_visible_row(0) sheet = x.get_sheet(1) if sheet.row(0).get_str_count() > 0 and firstactive > 1: firstactive = 1 sheet.set_first_visible_row(0) sheet = x.get_sheet(2) if sheet.row(0).get_str_count() > 0 and firstactive > 2: firstactive = 2 sheet.set_first_visible_row(0) sheet = x.get_sheet(3) if sheet.row(0).get_str_count() > 0 and firstactive > 3: firstactive = 3 sheet.set_first_visible_row(0) sheet = x.get_sheet(4) if sheet.row(0).get_str_count() > 0 and firstactive > 4: firstactive = 3 sheet.set_first_visible_row(0) sheet = x.get_sheet(5) sheet.set_first_visible_row(0) if sheet.row(0).get_str_count() > 0 and firstactive > 5: firstactive = 5 sheet.set_first_visible_row(0) sheet = x.get_sheet(6) sheet.set_first_visible_row(0) x.set_active_sheet(firstactive) x.save(write_path) if inA: os.system( "7z a -tzip -mx=9 D:\\xampp\\htdocs\\python\\Analyses\\disulfide_analysis_" + date_ + ".zip " + write_path) print "\nAnalysis complete and results saved.\n" timefinish2 = time.ctime() erfile.close() print "start =", timestart print "prcessed =", timefinish1 print "saved =", timefinish2 #sorting and printing of histogram #gives descending sort #write histogram to spreadsheet #cys["SSBOND 7 CYS A 1348 CYS A 1380"] return write_path
if args.barcode_level: counts['unique_id'] = list(counts.index) label = "barcode_level" elif args.feature_level: counts['unique_id'] = [ str(i) + "_" + str(j) + "_" + str(k) + "_" + str(l) for i, j, k, l in zip(counts['sseqid'], counts['guide_status'], counts['donor_status'], counts['library']) ] label = "feature_level" """elif args.v2: counts['unique_id'] = [str(i)+"_"+str(j)+"_"+str(k) for i,j,k in zip(counts['ref'], counts['guide_status'], counts['MD'])] label = "feature_level" else: # default is barcode level analysis counts['unique_id'] = list(counts.index) label = "barcode_level" """ #counts = counts[samples + ['unique_id', 'ref', 'guide_status', 'MD']].dropna(subset=samples, how='all') counts = counts[samples + [ "unique_id", "sseqid", "library", "subpool", "guide_status", "donor_status", "bsp_status" ]].dropna(subset=samples, how='all') counts['percentNA'] = counts[samples].isnull().sum(axis=1) / float( len(samples)) counts = counts[counts['percentNA'] < 0.25] counts.to_csv(outfile + "_" + label + "_counts.csv") results, log = analysis.run(counts, model, log=True) results.to_csv(outfile + "_" + label + "_analysis.csv") log.to_csv(outfile + "_" + label + "_log.csv")
def fig_Psizesep(outfile='fig_Psizesep.png'): """ Args: outfile: Returns: """ set_mplrc() sns.set_theme() sns.set_style('whitegrid') sns.set_context('paper') # Load f_mL f_mL = analysis.load_f_mL() frb_pre = analysis.get_candidates() # Init prior prior = associate_defs.adopted.copy() # Plot plt.figure(figsize=(7, 7)) # Bayesian + parse _, model_mags, model_theta, max_PMix = analysis.run(prior, frb_pre=frb_pre) #bins_L = np.linspace(0.01, 10., 20) bins_L = np.linspace(-2., 1, 15) #bins_L = np.linspace(0.1, 2., 20) #bins_L = np.linspace(16., 25., 20) # Testing only # High confidence high_conf_size, high_conf_sep = [], [] max_sep = 0. for frbA in frb_pre.frbA: if np.max(frbA.candidates.P_Ox) > associate_defs.POx_secure: # Max imax = np.argmax(frbA.candidates.P_Ox) # Measures ang_phys = associate_defs.cosmo.kpc_proper_per_arcmin(frbA.frb.z) high_conf_sep.append((frbA.candidates.iloc[imax].separation * units.arcsec * ang_phys).to('kpc').value) high_conf_size.append( ((frbA.candidates.iloc[imax].half_light * units.arcsec) * ang_phys).to('kpc').value) # if high_conf_sep[-1] > max_sep: max_sep = high_conf_sep[-1] max_FRB = frbA.frb high_conf_size = np.array(high_conf_size) high_conf_sep = np.array(high_conf_sep) print("Max sep: {}".format(max_FRB)) # Plot m_r vs. z df = pandas.DataFrame(dict(size=high_conf_size, sep=high_conf_sep)) jg = sns.jointplot(data=df, x='sep', y='size') #jg.ax_marg_x.set_xlim(0, 0.6) jg.ax_marg_y.set_ylim(0., 7) jg.ax_joint.set_xlabel('Physical Separation (kpc)') jg.ax_joint.set_ylabel('Galaxy size (kpc)') # One-to-one line jg.ax_joint.plot([0., 11.], [0., 11.], 'k--') #jg.ax_joint.yaxis.set_major_locator(plt.MultipleLocator(0.5)) #jg.ax_joint.yaxis.set_major_formatter(FormatStrFormatter(r'$%.3f$')) # Font size set_fontsize(jg.ax_joint, 15.) # End plt.tight_layout(pad=0.2, h_pad=0., w_pad=0.1) print('Writing {:s}'.format(outfile)) kwargs = {} if 'png' in outfile: kwargs['dpi'] = 700 plt.savefig(outfile, **kwargs) plt.close()
def fig_prior_vs_posterior(outfile='fig_prior_vs_posterior.png', POx_secure=associate_defs.POx_secure, prior_mode='conservative'): """ Args: outfile: Returns: """ set_mplrc() frb_pre = analysis.get_candidates() # Init prior prior = getattr(associate_defs, prior_mode).copy() thetas = np.linspace(0., associate_defs.theta_max, 1000) dtheta = thetas[1] - thetas[0] bins_theta = np.linspace(0., 6., 20) # Plot plt.figure(figsize=(6, 5)) gs = gridspec.GridSpec(2, 2) # Loop on theta priors for ss, clr, tprior in zip(np.arange(3), ['k', 'b', 'g', 'gray'], [ associate_defs.theta_u, associate_defs.theta_c, associate_defs.theta_e, associate_defs.theta_e, ]): # Plot ax = plt.subplot(gs[ss]) # Prior prior['theta'] = tprior if ss < 3: # Bayesian + parse _, model_mags, model_theta, max_PMix = analysis.run( prior, frb_pre=frb_pre) # Distribution ptheta = bayesian.pw_Oi(thetas, 1., prior['theta']) scl = np.sum(ptheta * dtheta) print('scl:', scl) convolved_ptheta = None # High confidence high_conf_theta = [] nsecure = 0 for frbA in frb_pre.frbA: if np.max(frbA.candidates.P_Ox) > POx_secure: imax = np.argmax(frbA.candidates.P_Ox) high_conf_theta.append( frbA.candidates.iloc[imax].separation / frbA.candidates.iloc[imax].half_light) # Convolve with prior sigR_phi = np.sqrt( frbA.frb.sig_a * frbA.frb.sig_b) / frbA.candidates.iloc[imax].half_light conv_theta = convolve(ptheta, np.exp(-thetas**2 / 2 / sigR_phi**2), mode='full') scl_c = np.sum(conv_theta * dtheta) if convolved_ptheta is None: convolved_ptheta = conv_theta / scl_c else: convolved_ptheta += conv_theta / scl_c # Increment nsecure += 1 # Nomalize high_conf_theta = np.array(high_conf_theta) convolved_ptheta /= nsecure # Save if tprior == associate_defs.theta_e: save_high = high_conf_theta.copy() # KS test #cumsum = np.cumsum(ptheta*dtheta) / scl cumsum = np.cumsum(convolved_ptheta * dtheta) more_thetas = np.arange(convolved_ptheta.size) * dtheta #f_CDF = interp1d(thetas, cumsum) f_CDF = interp1d(more_thetas, cumsum) res = kstest(high_conf_theta, f_CDF) pvalue = res.pvalue else: # Fit secure with a new exponential scale_lengths = np.linspace(0.25, 2., 100) P_KS = [] for scale_length in scale_lengths: ptheta = bayesian.pw_Oi(thetas, 1., associate_defs.theta_e, scale_half=scale_length) scl = np.sum(ptheta * dtheta) # KS test cumsum = np.cumsum(ptheta * dtheta) / scl f_CDF = interp1d(thetas, cumsum) res = kstest(high_conf_theta, f_CDF) # Save P_KS.append(res.pvalue) imax = np.argmax(P_KS) best_sl = scale_lengths[imax] print("Best scale length = {}".format(best_sl)) pvalue = P_KS[imax] # Once more ptheta = bayesian.pw_Oi(thetas, 1., associate_defs.theta_e, scale_half=best_sl) scl = np.sum(ptheta * dtheta) prior['theta']['method'] = 'exp ' + '{:0.1f}'.format( best_sl) + r'$ \, ' + cmhalf + '$' # high_conf_theta = save_high # Plot profiles ax.plot(thetas, ptheta / scl, label=prior['theta']['method'] + ' unconvolved', color=clr, alpha=0.3) more_thetas = np.arange(convolved_ptheta.size) * dtheta ax.plot(more_thetas, convolved_ptheta, label=prior['theta']['method'] + r' $P_{\rm KS} =' + '{:0.2f}'.format(pvalue) + '$', color=clr) # Plot secure weights3 = np.ones_like(high_conf_theta) / high_conf_theta.size lbl = r'$P(O_i) > ' + '{}'.format( POx_secure) + '$ FRBs' if ss == 0 else None ax.hist(high_conf_theta, bins=bins_theta, weights=weights3, color='darkred', label=lbl, histtype='stepfilled') # Plot all if ss < 3: weights2 = np.ones_like(model_theta) / model_theta.size lbl = 'All FRB candidates' if ss == 0 else None ax.hist(model_theta, weights=weights2, bins=bins_theta, color='darkgrey', label=lbl, histtype='step') # Label me ax.set_xlabel(r'$\theta/' + cmhalf + '$') ax.set_ylabel('PDF') ax.xaxis.set_major_locator(plt.MultipleLocator(1.)) #ax.xaxis.set_major_formatter(FormatStrFormatter(r'$%.3f$')) ax.set_xlim(0., 8) ax.set_ylim(0., 0.5) # Legend legend = ax.legend(loc='upper right', scatterpoints=1, borderpad=0.2, handletextpad=handletextpad, fontsize=11.) # Font size set_fontsize(ax, 13.) # End plt.tight_layout(pad=0.2, h_pad=0., w_pad=0.1) print('Writing {:s}'.format(outfile)) kwargs = {} if 'png' in outfile: kwargs['dpi'] = 700 plt.savefig(outfile, **kwargs) plt.close()
def main(analysis_type, command_variables, threshold): #pdb_dir = fopen("absolute_path_of_pdb_files")[0].strip() pdb_dir = "../pdb_files" x = Workbook() x.add_sheet("NMR averaged model") x.add_sheet("NMR multiple models") x.add_sheet("X-RAY") x.add_sheet("Other") date_ = time.strftime("%d_%b_%Y") x.save("../Analyses/" + analysis_type + "_" + date_ + ".xls") # file pdb1a5n is missing, possibly because 1a5n does not contain ssbonds (correct) #"pdb1tdy.ent","pdb1a5n.ent","pdb1cb6.ent","pdb2ac5.ent" #for file in [pdb_dir+name for name in ["pdb1kdg.ent", "pdb1kdk.ent", "pdb1kdm.ent", "pdb1kdq.ent", "pdb1kdu.ent", "pdb1kdv.ent", "pdb1kdy.ent", "pdb1kdz.ent", "pdb1ke1.ent", "pdb1ke2.ent", "pdb1keb.ent", "pdb1keg.ent", "pdb1kek.ent", "pdb1kel.ent", "pdb1kem.ent", "pdb1ken.ent", "pdb1keo.ent", "pdb1kex.ent", "pdb1kf2.ent", "pdb1kf3.ent", "pdb1kf4.ent", "pdb1kf5.ent", "pdb1kf7.ent"]]: # run(file,x) #try: # open("..\\Analyses\\basic_analysis_"+time.strftime("%a_%d_%b_%Y")+".xls","r") #except IOError: #for file in os.listdir(pdb_dir): #for file in ["pdb1axi.ent"]: # run(pdb_dir+file,x) #n = len(os.listdir(pdb_dir)) erfile = open("error_" + analysis_type + "_" + date_ + ".log", "w") #check command parameters if len(command_variables) <= 1: run_list = os.listdir(pdb_dir) run_list.remove("dsspout") print "1" elif command_variables[1].find(".ent") != -1: try: run_list = [command_variables[1]] print "2" #pdb_dir = "./" except IOError: print "The file " + command_variables[1] + " was not found." sys.exit(1) else: print "3" try: run_list = [ z.strip() for z in fopen("new_files_downloaded_on_" + command_variables[1]) ] except IOError: tag = "new_files_downloaded_on_" print "The file \"" + tag + command_variables[ 1] + "\" does not exist. Possible options are:" for line in [z for z in os.listdir(".") if z[:24] == tag]: print line sys.exit(1) ### ###temp # import string # run_list = [z.replace(".dssp",".ent") for z in open("dssplist","r").read().split("\n") if z != ""] # aer = run_list.index("pdb2gi7.ent") # run_list = run_list[aer:] # print run_list # run_list = ["pdb2gi7.ent"] ### #run_list = ["pdb2fz3.ent"] #histogram - for analysing relationships between secondary structures and bond types hist_ = {} #run with specified command parameters in run list # run_list = ["pdb1gpq.ent","pdb1gps.ent","pdb1gpt.ent","pdb1gpz.ent","pdb1gqb.ent","pdb1gqr.ent","pdb1gqs.ent","pdb1gqv.ent","pdb1gqz.ent","pdb1gr2.ent","pdb1gra.ent","pdb1grn.ent","pdb1grt.ent","pdb1gsk.ent","pdb1gsm.ent","pdb1gsn.ent","pdb1gsp.ent","pdb1gt6.ent","pdb1gtp.ent","pdb1gts.ent","pdb1gtt.ent","pdb1gu2.ent","pdb1gu3.ent","pdb1guj.ent","pdb1gur.ent","pdb1guv.ent","pdb1gv7.ent","pdb1gv8.ent","pdb1gv9.ent","pdb1gvc.ent","pdb1gvk.ent","pdb1gvl.ent","pdb1gvt.ent","pdb1gvu.ent","pdb1gvv.ent","pdb1gvw.ent","pdb1gvx.ent","pdb1gvz.ent","pdb1gw0.ent","pdb1gw2.ent","pdb1gwa.ent","pdb1gwb.ent","pdb1gwd.ent","pdb1gwn.ent","pdb1gwo.ent","pdb1gwt.ent","pdb1gwu.ent","pdb1gx2.ent","pdb1gx8.ent","pdb1gx9.ent","pdb1gxa.ent","pdb1gxd.ent","pdb1gxs.ent","pdb1gxv.ent","pdb1gxx.ent","pdb1gxy.ent","pdb1gxz.ent","pdb1gy0.ent","pdb1gyc.ent","pdb1gyd.ent","pdb1gye.ent","pdb1gyh.ent","pdb1gyo.ent","pdb1gz1.ent","pdb1gz2.ent","pdb1gz7.ent","pdb1gza.ent","pdb1gzb.ent","pdb1gzj.ent","pdb1gzm.ent","pdb1gzp.ent","pdb1gzq.ent","pdb1gzr.ent","pdb1gzy.ent","pdb1gzz.ent","pdb1h02.ent","pdb1h03.ent","pdb1h04.ent","pdb1h0b.ent","pdb1h0d.ent","pdb1h0g.ent","pdb1h0h.ent","pdb1h0i.ent","pdb1h0j.ent","pdb1h0l.ent","pdb1h0z.ent","pdb1h12.ent","pdb1h13.ent","pdb1h14.ent","pdb1h15.ent","pdb1h1b.ent","pdb1h1h.ent","pdb1h1n.ent","pdb1h20.ent","pdb1h22.ent","pdb1h23.ent","pdb1h2b.ent","pdb1h2p.ent","pdb1h2q.ent","pdb1h30.ent","pdb1h34.ent","pdb1h3j.ent","pdb1h3p.ent","pdb1h3t.ent","pdb1h3u.ent","pdb1h3v.ent","pdb1h3w.ent","pdb1h3x.ent","pdb1h3y.ent","pdb1h43.ent","pdb1h44.ent","pdb1h45.ent","pdb1h46.ent","pdb1h49.ent","pdb1h4i.ent","pdb1h4j.ent","pdb1h4p.ent","pdb1h4u.ent","pdb1h4w.ent","pdb1h52.ent","pdb1h53.ent","pdb1h55.ent","pdb1h57.ent","pdb1h58.ent","pdb1h59.ent","pdb1h5a.ent","pdb1h5b.ent","pdb1h5c.ent","pdb1h5d.ent","pdb1h5e.ent","pdb1h5f.ent","pdb1h5g.ent","pdb1h5h.ent","pdb1h5i.ent","pdb1h5j.ent","pdb1h5k.ent","pdb1h5l.ent","pdb1h5m.ent","pdb1h5o.ent","pdb1h5x.ent","pdb1h6m.ent","pdb1h6r.ent","pdb1h6v.ent","pdb1h75.ent","pdb1h76.ent","pdb1h7l.ent","pdb1h7q.ent","pdb1h80.ent","pdb1h81.ent","pdb1h82.ent","pdb1h83.ent","pdb1h84.ent","pdb1h86.ent","pdb1h87.ent","pdb1h8d.ent","pdb1h8i.ent","pdb1h8l.ent","pdb1h8n.ent","pdb1h8o.ent","pdb1h8p.ent","pdb1h8s.ent","pdb1h8u.ent","pdb1h8v.ent","pdb1h8x.ent","pdb1h8y.ent","pdb1h8z.ent","pdb1h91.ent","pdb1h9h.ent","pdb1h9i.ent","pdb1h9l.ent","pdb1h9v.ent","pdb1h9z.ent","pdb1ha0.ent","pdb1ha2.ent","pdb1ha6.ent","pdb1ha8.ent","pdb1ha9.ent","pdb1haa.ent","pdb1hae.ent","pdb1haf.ent","pdb1hag.ent","pdb1hah.ent","pdb1hai.ent","pdb1haj.ent"] n = len(run_list) typedict = {} for type_ in ["lys_arg", "his", "asp_glu", "trp", "phe", "tyr"]: typedict[type_] = (analysis_type == type_) timestart = time.ctime() for i, file in enumerate(run_list): #for i,file in enumerate(["pdb1w4y.ent"]): # print type(hist_) hist_ = analysis.run(pdb_dir + file, x, erfile, dist=threshold, histogram=hist_, lys_arg=typedict["lys_arg"], his=typedict["his"], asp_glu=typedict["asp_glu"], trp=typedict["trp"], phe=typedict["phe"], tyr=typedict["tyr"]) m = int((i + 1) / float(n) * 10000) / 100.0 print str(m) + "% complete" timefinish1 = time.ctime() print "\nResults file is being saved. Please do not close the program window.\n" ################################################################## ##The histogram bit def histcmp(a, b): return (hist_[a] < hist_[b]) * 2 - 1 fffff = open("histogram_", "w") hkeys = hist_.keys()[:] hkeys.sort(histcmp) x.add_sheet("Histogram") histosheet = x.get_sheet(4) histosheet.write(0, 0, "Secondary structure 1", style0) histosheet.write(0, 1, "Secondary structure 2", style0) histosheet.write(0, 2, "Disulfide Bond type", style0) histosheet.write(0, 3, "Count", style0) for row, h in enumerate(hkeys): fffff.write(str(h) + " " * (21 - len(h)) + str(hist_[h]) + "\n") histosheet.write(row + 1, 0, h[0]) histosheet.write(row + 1, 1, h[1]) histosheet.write(row + 1, 2, h[2:]) histosheet.write(row + 1, 3, hist_[h]) fffff.close() ################################################################## x.save("..\\Analyses\\" + analysis_type + "_" + date_ + ".xls") print "\nAnalysis complete and results saved.\n" timefinish2 = time.ctime() erfile.close() print "start =", timestart print "prcessed =", timefinish1 print "saved =", timefinish2