コード例 #1
0
	def MPCorrelation(self):
		call('progs/ViennaRNA-2.1.8/Progs/RNALfold -d2 --noLP -L 100 < data/hsa_chr11_sample_2.fasta > data/hsa_chr11_sample_2.folds', shell=True)
		MPpairs = []
		with open('data/hsa_chr11_sample_2.folds', 'r') as fileIn:
			with open('results/mp_pairs_neg.tsv', 'w') as fileOut:
				writeSeq = False
				for line in fileIn:
					if line[0] in '.()':
						writeFold = True
						fold = line.split()[0]
						try:
						    float(line.split('(')[-1].split(')')[0])
						except ValueError:
						    print "Invalid float:", line.split('(')[-1].split(')')[0]
						    continue
						if '..' in line.split('(')[-1].split(')')[0]:
							print line
						# Restrictive Must-be-a-perfect-hairpin
						for s in fold.split(')')[1:]:
							if '(' in s:
								writeFold = False
						if writeFold:
							MPpairs.append( (float(line.split('(')[-1].split(')')[0]), line.count('(') - 1) )
							fileOut.write( str(float(line.split('(')[-1].split(')')[0])) +'\t'+ str(line.count('(')-1)+'\n')
						# Less restrictive can-have-anything-inside-hairpin
						# elif float(line.split('(')[-1].split(')')[0]) <= -15.00  and fold.split(')')[0].count('(') >= 18 and fold.split('(')[-1].count(')') >= 18:
						# 	out.write(line)
						# 	writeSeq = True
		p = Plotter()
		p.plot_scatter(MPpairs, "mp_scatter_neg.png")
コード例 #2
0
def make_metrics_plot(df, output):
    """
    Plot metrics table
    :param df: pandas df
    :param output: output path
    :return: None
    """
    pl = Plotter(df, "metrics", output)
    pl.plot_metrics_table()
コード例 #3
0
def make_score(df_list, output):
    """
    Make global plot long short profit.
    :param df_list: list of pd dfs
    :param output: output path
    :return: None
    """
    logger.info("in score plots")
    for item in df_list:
        df = item[0]
        df_name = item[1]
        if pc['prediction'] in df_name:
            score_ens_df = pd.DataFrame(columns=['model', 'r2', 'mse'])
            score_single_df = pd.DataFrame(columns=['model', 'r2', 'mse'])
            for i in df.columns.to_list():
                if pc['prediction'] in i:
                    scr_r2 = r2_score(df[mc["label"]], df[i])
                    scr_mse = mean_squared_error(df[mc["label"]], df[i])
                    score_single_df = score_single_df.append(
                        {'model': i,
                         'r2': scr_r2,
                         'mse': scr_mse},
                        ignore_index=True
                    )
                if 'ens' in i:
                    ens_scr_r2 = r2_score(df[mc["label"]], df[i])
                    ens_scr_mse = mean_squared_error(df[mc["label"]], df[i])
                    score_ens_df = score_ens_df.append(
                        {'model': i,
                         'r2': ens_scr_r2,
                         'mse': ens_scr_mse},
                        ignore_index=True
                    )
            logger.info("df name: {}, single score df shape: {} ".format(df_name, score_single_df.shape))
            logger.info("df name: {}, ens score df shape: {} ".format(df_name, score_ens_df.shape))
            # single_name = str(df_name) + "_single"
            # ens_name = str(df_name) + "_ens"
            # score_ens_df = score_ens_df.sort_values(by=['r2'], ascending=False)
            # score_single_df = score_single_df.sort_values(by=['r2'], ascending=False)
            # plotter_sng = Plotter(score_single_df, single_name, output)
            # plotter_sng.plot_score_table()
            # plotter_ens = Plotter(score_ens_df, ens_name, output)
            # plotter_ens.plot_ens_score_table()
            plotter_sng = Plotter(score_single_df.sort_values(by=['r2'], ascending=False),
                                  str(df_name) + "_single",
                                  output)
            plotter_sng.plot_score_table()
            plotter_ens = Plotter(score_ens_df.sort_values(by=['r2'], ascending=False),
                                  str(df_name) + "_ens",
                                  output)
            plotter_ens.plot_ens_score_table()
コード例 #4
0
def single_model_profit(df, csv_str, output):
    """
    Plot daily profit and ls_profit by checking
    the columns
    :param df: pandas df
    :param csv_str: model name string
    :param output: output path
    :return: None
    """
    if pc["profit_csv"] in csv_str:
        if pc["rf_csv"] in csv_str:
            output_loc = create_folder(output, csv_str)
            pl = Plotter(df, csv_str, output_loc)
            pl.plot_profit_template()
        else:
            output_loc = create_folder(output, csv_str)
            pl = Plotter(df, csv_str, output_loc)
            pl.plot_profit_template()
コード例 #5
0
discriminatorIterationsRatio = 1
dataRoot = './Data'

#%%
if (lookup):
    batchSize = 9
    training_mode = False
    dataRoot = './Lookup'

#%%
if (not training_mode):
    epochs = 1
#%%
generator = G(lookup=lookup)
discriminator = D(lookup=lookup, batch_size=batchSize)
plotter = Plotter(batchSize)
noiseGenerator = NGen()

generator.cuda()
discriminator.cuda()

#%%
if (load_weights):
    generator.load_state_dict(load('./generator.pth'))
    discriminator.load_state_dict(load('./discriminator.pth'))

#%%
if (load_weights):
    generator.eval()
    discriminator.eval()
コード例 #6
0
	def crossValidate(self, posFile, negFile, numFolds):
		allData = FeatureSet()
		allData.load('data/'+posFile, patternClass='real')
		allData.add_instances('data/'+negFile, patternClass='pseudo')
		allData.libsvm_scale(paramOut = 'data/params')
		subsets = allData.get_cv_subsets(numFolds)
		resultList = []
		# Go through all n folds...
		for i in range(numFolds):
			# Build training and test sets
			testSet = subsets[i]
			trainSet = FeatureSet()
			for j in range(numFolds):
				if j != i:
					trainSet.add_instances_from_featureset(subsets[j])
			# Create svm files for train and test fold data. Train and test on these files.
			trainSet.weka_smote()
			trainSet.export_svm('data/trainSet.libsvm')
			testSet.export_svm('data/testSet.libsvm')
			# SVM settings for HMP features
			call('svm-train -c 1 -d 1 -h 1 -e 0.001 -g 0.06 -b 1 data/trainSet.libsvm models/'+str(i)+'.model', shell=True)
			# SVM settings for MicroPred features
			# call('svm-train -c 10000000 -d 1 -h 1 -e 0.001 -g 0.0019531 -b 1 data/trainSet.libsvm models/'+str(i)+'.model', shell=True)
			call('svm-predict -b 1 data/testSet.libsvm models/'+str(i)+'.model data/'+str(i)+'.results', shell=True)
			# Calculate sensitivity and specificity for fold model
			with open('data/'+str(i)+'.results', 'r') as resultFile:
				with open("data/"+str(i)+".sresults", 'w') as resultOut:
					# resultLines = resultFile.readlines()
					# posLines = resultLines[1:testSet.get_numpos())].sorted( key=lambda l: float(l.split()[1]) )
					# negLines = resultLines[testSet.get_numpos():].sorted( key=lambda l: float(l.split()[1]) )
					trueNeg = 0.0
					truePos = 0.0
					falseNeg = 0.0
					falsePos = 0.0
					resultSet = []
					resultFile.readline()
					for j in range(testSet.get_numpos()):
						line = resultFile.readline()
						if line[0] == '1':
							resultSet.append(Result(t='1', p='1', conf=line.split()[1]))
							truePos += 1.0
						else:
							resultSet.append(Result(t='1', p='0', conf=line.split()[1]))
							falseNeg += 1.0
					for j in range(testSet.get_numneg()):
						line = resultFile.readline()
						if line[0] == '1':
							resultSet.append(Result(t='0', p='1', conf=line.split()[1]))
							falsePos += 1.0
						else:
							resultSet.append(Result(t='0', p='0', conf=line.split()[1]))
							trueNeg += 1.0
					resultSet = sorted(resultSet, key=lambda l: float(l.conf), reverse=True)
					for r in resultSet:
						resultOut.write(r.t + '\t' + r.p + '\t' + r.conf + '\n')

					resultList.append( (truePos/(truePos+falseNeg),trueNeg/(trueNeg+falsePos)) )

					with open("roc_"+str(i)+".tsv", 'w') as rocOut:
						with open("pr_"+str(i)+".tsv", 'w') as prOut:
							ssList = []
							prList = []
							sens = 0.0
							spec = 1.0
							for r in resultSet:
								if r.t == '1':
									sens += 1.0 / testSet.get_numpos()
								if r.t == '0':
									spec -= 1.0 / testSet.get_numneg()
								ssList.append((sens*self.hpSens, (1-spec)*self.hpSpec))
								if (sens*self.hpSens+(1-spec)*self.ci*self.hpSpec) != 0:
									prList.append((sens*self.hpSens/(sens*self.hpSens+(1-spec)*self.ci*self.hpSpec), sens*self.hpSens))
								rocOut.write(str(sens)+'\t'+str(1-spec)+'\n')
								prOut.write(str(sens/(sens+spec*self.ci))+'\t'+str(sens)+'\n')

					p = Plotter()
					p.plot_roc(ssList, "Test", "roc_"+str(i)+".png")
					p.plot_pr(prList, "Test", self.ci, "pr_"+str(i)+".png")

		###################
		# Report Results
		###################
		for i in range(len(resultList)):
			print "## SVM "+str(i)+" ##"
			print 'Sensitivity: '+str(resultList[i][0])
			print 'Specificity: '+str(resultList[i][1])
		print 'average Sensitivity: '+str(sum([result[0] for result in resultList])/numFolds)
		print 'average Specificity: '+str(sum([result[1] for result in resultList])/numFolds)
		print 'Geometric mean: '+str(pow(sum([result[0] for result in resultList])/numFolds*sum([result[1] for result in resultList])/numFolds, 0.5))