def MPCorrelation(self): call('progs/ViennaRNA-2.1.8/Progs/RNALfold -d2 --noLP -L 100 < data/hsa_chr11_sample_2.fasta > data/hsa_chr11_sample_2.folds', shell=True) MPpairs = [] with open('data/hsa_chr11_sample_2.folds', 'r') as fileIn: with open('results/mp_pairs_neg.tsv', 'w') as fileOut: writeSeq = False for line in fileIn: if line[0] in '.()': writeFold = True fold = line.split()[0] try: float(line.split('(')[-1].split(')')[0]) except ValueError: print "Invalid float:", line.split('(')[-1].split(')')[0] continue if '..' in line.split('(')[-1].split(')')[0]: print line # Restrictive Must-be-a-perfect-hairpin for s in fold.split(')')[1:]: if '(' in s: writeFold = False if writeFold: MPpairs.append( (float(line.split('(')[-1].split(')')[0]), line.count('(') - 1) ) fileOut.write( str(float(line.split('(')[-1].split(')')[0])) +'\t'+ str(line.count('(')-1)+'\n') # Less restrictive can-have-anything-inside-hairpin # elif float(line.split('(')[-1].split(')')[0]) <= -15.00 and fold.split(')')[0].count('(') >= 18 and fold.split('(')[-1].count(')') >= 18: # out.write(line) # writeSeq = True p = Plotter() p.plot_scatter(MPpairs, "mp_scatter_neg.png")
def make_metrics_plot(df, output): """ Plot metrics table :param df: pandas df :param output: output path :return: None """ pl = Plotter(df, "metrics", output) pl.plot_metrics_table()
def make_score(df_list, output): """ Make global plot long short profit. :param df_list: list of pd dfs :param output: output path :return: None """ logger.info("in score plots") for item in df_list: df = item[0] df_name = item[1] if pc['prediction'] in df_name: score_ens_df = pd.DataFrame(columns=['model', 'r2', 'mse']) score_single_df = pd.DataFrame(columns=['model', 'r2', 'mse']) for i in df.columns.to_list(): if pc['prediction'] in i: scr_r2 = r2_score(df[mc["label"]], df[i]) scr_mse = mean_squared_error(df[mc["label"]], df[i]) score_single_df = score_single_df.append( {'model': i, 'r2': scr_r2, 'mse': scr_mse}, ignore_index=True ) if 'ens' in i: ens_scr_r2 = r2_score(df[mc["label"]], df[i]) ens_scr_mse = mean_squared_error(df[mc["label"]], df[i]) score_ens_df = score_ens_df.append( {'model': i, 'r2': ens_scr_r2, 'mse': ens_scr_mse}, ignore_index=True ) logger.info("df name: {}, single score df shape: {} ".format(df_name, score_single_df.shape)) logger.info("df name: {}, ens score df shape: {} ".format(df_name, score_ens_df.shape)) # single_name = str(df_name) + "_single" # ens_name = str(df_name) + "_ens" # score_ens_df = score_ens_df.sort_values(by=['r2'], ascending=False) # score_single_df = score_single_df.sort_values(by=['r2'], ascending=False) # plotter_sng = Plotter(score_single_df, single_name, output) # plotter_sng.plot_score_table() # plotter_ens = Plotter(score_ens_df, ens_name, output) # plotter_ens.plot_ens_score_table() plotter_sng = Plotter(score_single_df.sort_values(by=['r2'], ascending=False), str(df_name) + "_single", output) plotter_sng.plot_score_table() plotter_ens = Plotter(score_ens_df.sort_values(by=['r2'], ascending=False), str(df_name) + "_ens", output) plotter_ens.plot_ens_score_table()
def single_model_profit(df, csv_str, output): """ Plot daily profit and ls_profit by checking the columns :param df: pandas df :param csv_str: model name string :param output: output path :return: None """ if pc["profit_csv"] in csv_str: if pc["rf_csv"] in csv_str: output_loc = create_folder(output, csv_str) pl = Plotter(df, csv_str, output_loc) pl.plot_profit_template() else: output_loc = create_folder(output, csv_str) pl = Plotter(df, csv_str, output_loc) pl.plot_profit_template()
discriminatorIterationsRatio = 1 dataRoot = './Data' #%% if (lookup): batchSize = 9 training_mode = False dataRoot = './Lookup' #%% if (not training_mode): epochs = 1 #%% generator = G(lookup=lookup) discriminator = D(lookup=lookup, batch_size=batchSize) plotter = Plotter(batchSize) noiseGenerator = NGen() generator.cuda() discriminator.cuda() #%% if (load_weights): generator.load_state_dict(load('./generator.pth')) discriminator.load_state_dict(load('./discriminator.pth')) #%% if (load_weights): generator.eval() discriminator.eval()
def crossValidate(self, posFile, negFile, numFolds): allData = FeatureSet() allData.load('data/'+posFile, patternClass='real') allData.add_instances('data/'+negFile, patternClass='pseudo') allData.libsvm_scale(paramOut = 'data/params') subsets = allData.get_cv_subsets(numFolds) resultList = [] # Go through all n folds... for i in range(numFolds): # Build training and test sets testSet = subsets[i] trainSet = FeatureSet() for j in range(numFolds): if j != i: trainSet.add_instances_from_featureset(subsets[j]) # Create svm files for train and test fold data. Train and test on these files. trainSet.weka_smote() trainSet.export_svm('data/trainSet.libsvm') testSet.export_svm('data/testSet.libsvm') # SVM settings for HMP features call('svm-train -c 1 -d 1 -h 1 -e 0.001 -g 0.06 -b 1 data/trainSet.libsvm models/'+str(i)+'.model', shell=True) # SVM settings for MicroPred features # call('svm-train -c 10000000 -d 1 -h 1 -e 0.001 -g 0.0019531 -b 1 data/trainSet.libsvm models/'+str(i)+'.model', shell=True) call('svm-predict -b 1 data/testSet.libsvm models/'+str(i)+'.model data/'+str(i)+'.results', shell=True) # Calculate sensitivity and specificity for fold model with open('data/'+str(i)+'.results', 'r') as resultFile: with open("data/"+str(i)+".sresults", 'w') as resultOut: # resultLines = resultFile.readlines() # posLines = resultLines[1:testSet.get_numpos())].sorted( key=lambda l: float(l.split()[1]) ) # negLines = resultLines[testSet.get_numpos():].sorted( key=lambda l: float(l.split()[1]) ) trueNeg = 0.0 truePos = 0.0 falseNeg = 0.0 falsePos = 0.0 resultSet = [] resultFile.readline() for j in range(testSet.get_numpos()): line = resultFile.readline() if line[0] == '1': resultSet.append(Result(t='1', p='1', conf=line.split()[1])) truePos += 1.0 else: resultSet.append(Result(t='1', p='0', conf=line.split()[1])) falseNeg += 1.0 for j in range(testSet.get_numneg()): line = resultFile.readline() if line[0] == '1': resultSet.append(Result(t='0', p='1', conf=line.split()[1])) falsePos += 1.0 else: resultSet.append(Result(t='0', p='0', conf=line.split()[1])) trueNeg += 1.0 resultSet = sorted(resultSet, key=lambda l: float(l.conf), reverse=True) for r in resultSet: resultOut.write(r.t + '\t' + r.p + '\t' + r.conf + '\n') resultList.append( (truePos/(truePos+falseNeg),trueNeg/(trueNeg+falsePos)) ) with open("roc_"+str(i)+".tsv", 'w') as rocOut: with open("pr_"+str(i)+".tsv", 'w') as prOut: ssList = [] prList = [] sens = 0.0 spec = 1.0 for r in resultSet: if r.t == '1': sens += 1.0 / testSet.get_numpos() if r.t == '0': spec -= 1.0 / testSet.get_numneg() ssList.append((sens*self.hpSens, (1-spec)*self.hpSpec)) if (sens*self.hpSens+(1-spec)*self.ci*self.hpSpec) != 0: prList.append((sens*self.hpSens/(sens*self.hpSens+(1-spec)*self.ci*self.hpSpec), sens*self.hpSens)) rocOut.write(str(sens)+'\t'+str(1-spec)+'\n') prOut.write(str(sens/(sens+spec*self.ci))+'\t'+str(sens)+'\n') p = Plotter() p.plot_roc(ssList, "Test", "roc_"+str(i)+".png") p.plot_pr(prList, "Test", self.ci, "pr_"+str(i)+".png") ################### # Report Results ################### for i in range(len(resultList)): print "## SVM "+str(i)+" ##" print 'Sensitivity: '+str(resultList[i][0]) print 'Specificity: '+str(resultList[i][1]) print 'average Sensitivity: '+str(sum([result[0] for result in resultList])/numFolds) print 'average Specificity: '+str(sum([result[1] for result in resultList])/numFolds) print 'Geometric mean: '+str(pow(sum([result[0] for result in resultList])/numFolds*sum([result[1] for result in resultList])/numFolds, 0.5))