def get_FA_data(anno_source, taxid, file="", datadir=""): functionalData = "" if anno_source == "GM": genemania = CS.Genemania(taxid) #genemania = CS.Genemania("6239") functionalData = genemania.getScoreCalc() elif anno_source == "STRING": string = CS.STRING(taxid, datadir) functionalData = string.getScoreCalc() elif anno_source == "FILE": if file == "": print "When using FILE tag please suppy path to file containing functional annotation using -F file+path" sys.exit() # the supplied functional evidence data needs to have the correct header row... externaldata = CS.ExternalEvidence(file) #externaldata.readFile() functionalData = externaldata.getScoreCalc() else: print "EPIC only support GeneMane, STRING, and flat file input please use the followign tags for anno_source GM, STRING, FILE. Returning empty string object." return functionalData
def calc_feature_combination(args): feature_combination, se, input_dir, use_rf, num_folds, overlap, local, cutoff, num_cores, scoreF, mode, anno, faF, ref_complexes, output_dir = args #Create feature combination cutoff = float(cutoff) / 100 num_folds = int(num_folds) if feature_combination == "00000000": sys.exit() this_scores = get_fs_comb(feature_combination) num_cores = int(num_cores) use_rf = use_rf == "True" overlap = overlap == "True" local = local == "True" clf_name = "SVM" if use_rf: clf_name = "RF" clf = CS.CLF_Wrapper(num_cores, use_rf) ref_gs = Goldstandard_from_cluster_File(ref_complexes) scoreCalc = CS.CalculateCoElutionScores(this_scores, "", scoreF, num_cores=num_cores, cutoff=cutoff) scoreCalc.readTable(scoreF, ref_gs) feature_comb = feature_selector([fs.name for fs in this_scores], scoreCalc) print feature_comb.scoreCalc.scores.shape print scoreCalc.scores.shape if mode == "comb": fa = utils.get_FA_data(anno, faF) feature_comb.add_fun_anno(fa) elif mode == "fa": feature_comb = utils.get_FA_data(anno, faF) print type(feature_comb) elif mode != "exp": print "not support this mode" sys.exit() scores, head = n_fold_cross_validation(num_folds, ref_gs, feature_comb, clf, output_dir, overlap, local) outFH = open(output_dir + ".eval.txt", "w") print "FS\tSE\tCLF\t" + head print "%s\t%s\t%s\t" % (feature_combination, se, clf_name) + scores print >> outFH, "FS\tSE\tCLF\t" + head print >> outFH, "%s\t%s\t%s\t" % (feature_combination, se, clf_name) + scores outFH.close()
def main(): (scoreF, refF, elutionF, geneNameF, outF) = sys.argv[1:] geneNameFH = open(geneNameF) geneName= {} species = {} for line in geneNameFH: line = line.rstrip() ida, idb, spec = line.split("\t") if ida not in geneName: geneName[ida] = set([]) geneName[ida].add(idb) species[ida] = spec species[idb] = spec geneNameFH.close() toLearn, toPred = calcS.loadScoreData(scoreF, refF) rfc = calcS.trainML(toLearn) print rfc.getValScores() ref, eluD, calc = calcS.loadData(refF, elutionF) calc.calculate2DScores(ref) outFH = open(outF + ".arff", "w") outFH.write(calc.toArffData()) outFH.close() print "Calculated scores" rfc2 = calcS.trainML(calc) print rfc2.getValScores() data, targets = toPred.toSklearnData() dataL, targetsL = toLearn.toSklearnData() preds = rfc.predict(data) prots = [] for protA, protB, label in toPred.scores: prots.append((protA, protB)) outFH = open(outF, "w") for i in range(len(preds)): protA, protB = prots[i] if protA in geneName: geneA = ",".join(geneName[protA]) if protB in geneName: geneB = ",".join(geneName[protB]) spec = species[protA] if preds[i][1]>0.5: print >> outFH, "%s\t%s\t%s\t%s\t%s\t%f" % (protA, protB, geneA, geneB, spec, preds[i][1]) outFH.close()
def main(): (elutionF, refF, outD) = sys.argv[1:] reference, elutionData, scoreCalc = calcS.loadData(refF, elutionF) iexFractions = range(1, 49) out = np.array([[-1.00] * 48] * 48) for removeLeft in range(1, 49): tmpFracs = copy.copy(iexFractions) for i in range(1, removeLeft): if i in tmpFracs: tmpFracs.remove(i) for removeRight in reversed(range(removeLeft + 1, 50)): if removeRight in tmpFracs: tmpFracs.remove(removeRight) print tmpFracs fractions = getIEXFracs(tmpFracs) tmpElution = copy.copy(elutionData) tmpElution.getSubset(fractions) scoreCalc = calcS.CalculateCoElutionScores(tmpElution) scoreCalc.calculateAllScores([calcS.Euclidiean()], reference) data, targets = scoreCalc.toSklearnData() clf = calcS.RandomForest(data, targets) scores = clf.getValScores() out[removeLeft - 1][49 - removeRight] = scores[1] print "%i\t%i\t%.2f" % (removeLeft - 1, 49 - removeRight, scores[1]) outFH = open(outD + ".iex.dat", "w") print >> outFH, "\t" + "\t".join(map(str, range(48))) for i in range(48): print >> outFH, "%i\t%s" % (i, "\t".join(map("{0:.2f}".format, out[i]))) outFH.close()
def load_data(data, scores, orthmap="", fc=2, mfc=1): if type(data) is list: paths = data else: paths = [os.path.join(data, fn) for fn in next(os.walk(data))[2]] elutionDatas = [] elutionProts = set([]) for elutionFile in paths: if elutionFile.rsplit(os.sep, 1)[-1].startswith("."): continue elutionFile = elutionFile.rstrip() elutionData = CS.ElutionData(elutionFile, frac_count=fc, max_frac_count=mfc) if orthmap != "": if orthmap != False: mapper = GS.Inparanoid("", inparanoid_cutoff=1) mapper.readTable(orthmap, direction=0) elutionData.orthmap(mapper) elutionDatas.append(elutionData) elutionProts = elutionProts | set(elutionData.prot2Index.keys()) for score in scores: score.init(elutionData) return elutionProts, elutionDatas
def main(): (elutionF, geneNameF, outPrefix) = sys.argv[1:] geneNameFH = open(geneNameF) geneName= {} species = {} for line in geneNameFH: line = line.rstrip() ida, idb, spec = line.split("\t") if ida not in geneName: geneName[ida] = set([]) geneName[ida].add(idb) species[ida] = spec species[idb] = spec geneNameFH.close() elutionData, scoreCalc = calcS.loadEData(elutionF) preds = scoreCalc.getAllPairs() out = {} for protA, protB, _ in preds: if protA not in species or protB not in species: continue if species[protA] != species[protB]: continue if species[protA] not in out: out[species[protA]] = set() out[species[protA]].add("\t".join(sorted([protA, protB]))) for species in out: outFH = open("%s.%s.topred.txt" % (outPrefix, species) , "w") print >> outFH, "ProtA\tProtB" print >> outFH, "\n".join(out[species]) outFH.close()
def ppi_fs(args): fsc, scoreF, use_rf, se, num_cores, refComplexesF, output_dir = args num_cores = int(num_cores) use_rf = use_rf == "True" clf_name = "SVM" if use_rf: clf_name = "RF" clf = CS.CLF_Wrapper(num_cores, use_rf) this_fs = get_fs_comb(fsc) all_gs = Goldstandard_from_cluster_File(refComplexesF) valprots = all_gs.get_proteins() scoreCalc = CS.CalculateCoElutionScores(this_fs, "", scoreF, num_cores=num_cores, cutoff=-1) scoreCalc.readTable(scoreF, all_gs) print scoreCalc.scores.shape test_scoreCalc = feature_selector([fs.name for fs in this_fs], scoreCalc) print("The size of chopped matrix for selected features") print np.shape(test_scoreCalc.get_scoreCalc().get_all_scores()) print "training ppis: %i" % len(set(test_scoreCalc.ppiToIndex.keys())) train_gold_complexes = all_gs.return_gold_standard_complexes( set(test_scoreCalc.ppiToIndex.keys())) print "Train_gold comp:%i" % len(train_gold_complexes.complexes.complexes) print "Num valid ppis in pos: %i" % len(train_gold_complexes.positive) print "Num valid ppis in neg: %i" % len(train_gold_complexes.negative) # Evaluate classifier evaluation_results = utils.bench_by_PPI_clf(10, test_scoreCalc, train_gold_complexes, clf) print evaluation_results outFH = open("%s.ppi_eva.txt" % (output_dir), "w") print >> outFH, "FS\tSE\tCLF\tFM\tauPR\tauROC\n%s\t%s\t%s\t%s" % ( fsc, se, clf_name, "\t".join(map(str, evaluation_results))) outFH.close()
def main(): (elutionF, outF) = sys.argv[1:] elutionData, scoreCalc = calcS.loadEData(elutionF) scoreCalc.calculateAllPairs([calcS.MutualInformation(2)]) outFH = open(outF, "w") outFH.write(scoreCalc.toTable(False)) outFH.close()
def get_fs_comb(comb_string): #Create feature combination scores = [ CS.MutualInformation(2), CS.Bayes(3), CS.Euclidiean(), CS.Wcc(), CS.Jaccard(), CS.Poisson(5), CS.Pearson(), CS.Apex() ] this_scores = [] for i, feature_selection in enumerate(comb_string): if feature_selection == "1": this_scores.append(scores[i]) return this_scores
def main(): (elutionF, refF, windowSize, outF) = sys.argv[1:] windowSize = int(windowSize) outData = ['']*3 reference, elutionData, scoreCalc = calcS.loadData(refF, elutionF) j = 0 name = elutionF.split("Ce_")[1].split(".")[0] for resultScore in getFracEvals(elutionData.elutionMat): data_lines = entropyVSprecision(elutionData, reference, resultScore, windowSize) for i in range(len(data_lines)): outData[j] += "\n%s\t%i\t%s" % (name, windowSize, data_lines[i]) j += 1 if len(outData[0]) != 0: printTable("%s_%s_Entropy_%i.dat" % (outF, name, windowSize), "Entropy", outData[0]) printTable("%s_%s_Prot-prob_%i.dat" % (outF, name, windowSize), "Prot-prob", outData[1]) printTable("%s_%s_Num-prots_%i.dat" % (outF, name, windowSize), "Num-prots", outData[2])
def rf_cutoff(args): pred_clust_F, ref_clust_F, ppiF, cutoff, outF = args num_ppis = CS.lineCount(ppiF) pred_clusters = GS.Clusters(False) pred_clusters.read_file(pred_clust_F) ref_clusters = GS.Clusters(False) ref_clusters.read_file(ref_clust_F) # utils.clustering_evaluation(train.complexes, pred_clusters, "Train", True) scores, head = utils.clustering_evaluation(ref_clusters, pred_clusters, "", True) outFH = open(outF, "w") outFH.write("%s\t%i\t%i\t%s\n" % (cutoff, num_ppis, len(pred_clusters.complexes), scores)) outFH.close()
def cut(args): fc, scoreF, outF = args if fc == "00000000": sys.exit() this_scores = get_fs_comb(fc) scoreCalc = CS.CalculateCoElutionScores("", "", "", "", cutoff=0.5) empty_gs = GS.Goldstandard_from_Complexes() empty_gs.positive = set([]) empty_gs.negative = set([]) scoreCalc.readTable(scoreF, empty_gs) print scoreCalc.to_predict feature_comb = feature_selector([fs.name for fs in this_scores], scoreCalc) feature_comb.open() outFH = open(outF, "w") print >> outFH, "\t".join(feature_comb.scoreCalc.header) for i in range(feature_comb.to_predict): edge, edge_scores = feature_comb.get_next() if edge == "" or edge_scores == []: continue print >> outFH, "%s\t%s" % (edge, "\t".join(map(str, edge_scores))) outFH.close() feature_comb.close()
def calc_scores(args): topred = [] if args[0] == "-ref": _, refF, fs, numcores, cutoff, e_dir, outF = args gs = Goldstandard_from_cluster_File(refF) topred = list(gs.positive | gs.negative) print len(topred) else: fs, numcores, cutoff, e_dir, outF = args numcores = int(numcores) cutoff = float(cutoff) this_fs = get_fs_comb(fs) prots, edatas = utils.load_data(e_dir, this_fs) scoreCalc = CS.CalculateCoElutionScores(this_fs, edatas, outF, num_cores=numcores, cutoff=cutoff) if topred == []: topred = scoreCalc.getAllPairs() scoreCalc.calculateScores(topred)
def filter_scoreCalc(self, scoreCalc): filtered_sc = CS.CalculateCoElutionScores("", "", "", 1) filtered_sc.scoreF = scoreCalc.scoreF filtered_sc.header = list( np.array(scoreCalc.header)[self.to_keep_header]) filtered_sc.scores = np.zeros( (len(scoreCalc.ppiToIndex.keys()), len(self.to_keep_score))) ppi_index = 0 for i in range(scoreCalc.scores.shape[0]): ppi = scoreCalc.IndexToPpi[i] protA, protB = ppi.split("\t") if (protA not in self.valprots or protB not in self.valprots) and self.valprots != []: continue ppi_scores = self.filter_score(scoreCalc.scores[i, :]) if ppi_scores == []: continue filtered_sc.ppiToIndex[ppi] = ppi_index filtered_sc.IndexToPpi[ppi_index] = ppi filtered_sc.scores[ppi_index, :] = ppi_scores ppi_index += 1 filtered_sc.scores = filtered_sc.scores[0:ppi_index, :] return filtered_sc
def read_scores(scoreF, cutoff): num_prots = CS.lineCount(scoreF) scoreFH = open(scoreF) header = scoreFH.readline().rstrip() header = header.split("\t") out = CS.CalculateCoElutionScores("", "", "", 4) out.scores = np.zeros((num_prots , len(header[2:]))) out.header = header i = 0 for line in scoreFH: line = line.rstrip() if line == "":continue line = line.split("\t") edge = "\t".join(line[:2]) this_score = np.array(map(float, line[2:])) if len(list(set(np.where(this_score >= cutoff)[0]))) > 0: out.ppiToIndex[edge] = i out.IndexToPpi[i] = edge out.scores[i, :] = this_score i += 1 out.scores = out.scores[0:i, :] print i return out
def main(): (elutionFiles, refF, direction, outF) = sys.argv[1:] elutionFilesFH = open(elutionFiles) outData = {} maxSize = 0 for line in elutionFilesFH: line = line.rstrip() reference, elutionData, scoreCalc = calcS.loadData(refF, line) scores = removeFracs(elutionData, reference, scoreCalc, direction) name = line.split("Ce_")[2].split(".")[0] outData[name] = scores maxSize = max(len(scores), maxSize) elutionFilesFH.close() outFH = open(outF, "w") print >> outFH, "Experiment_name\tFraction_%s" % ("\tFraction_".join(map(str,range(1, maxSize+1)))) for dataset in outData: scores = outData[dataset] numFracs = len(scores) outline = "%s\t%s" % (dataset, "\t".join(map(str, scores))) if maxSize-numFracs > 0: outline = "%s\t%s" % (outline, "\t".join(["NA"]*(maxSize-numFracs))) print >> outFH, outline outFH.close()
def exp_comb(args): FS, i, j, num_iter, input_dir, num_cores, ref_complexes, scoreF, mode, fun_anno_F, ppi, output_dir = args i, j, num_iter, num_cores = map(int, [i, j, num_iter, num_cores]) ppi == "True" search_engine = input_dir.split(os.path.sep)[-2] def get_eData_comb(data_dir, num_iex, num_beads): all_exp = map(str, glob.glob(data_dir + "*.txt")) iex_exp = [ f for f in all_exp if (f.split(os.sep)[-1].startswith("all")) ] beads_exp = [ f for f in all_exp if (not f.split(os.sep)[-1].startswith("all")) ] if (i > len(iex_exp)): print "i is to large" sys.exit() if (j > len(beads_exp)): print "j is to large" sys.exit() sel_iex = rnd.sample(iex_exp, num_iex) sel_beads = rnd.sample(beads_exp, num_beads) return sel_iex + sel_beads # EPIC paramters if FS == "00000000": sys.exit() this_scores = get_fs_comb(FS) clf = CS.CLF_Wrapper(num_cores, True) ref_gs = Goldstandard_from_cluster_File(ref_complexes) scoreCalc = CS.CalculateCoElutionScores(this_scores, "", scoreF, num_cores=num_cores, cutoff=0.5) scoreCalc.readTable(scoreF, ref_gs) # the supplied functional evidence data needs to have the correct header row... functionalData = "" if mode == "comb": functionalData = utils.get_FA_data("FILE", fun_anno_F) if i == 0 and j == 0: sys.exit() out_head = "" all_scores = [] for iter in range(num_iter): rnd.seed() this_eprofiles = get_eData_comb(input_dir, i, j) this_eprofiles_fnames = [ f.rsplit(os.sep, 1)[1] for f in this_eprofiles ] rnd.seed(1) print this_eprofiles_fnames this_foundprots, _ = utils.load_data(this_eprofiles, []) print len(this_foundprots) feature_comb = feature_selector( [fs.name for fs in this_scores], scoreCalc, valprots=this_foundprots, elution_file_names=this_eprofiles_fnames) if mode == "comb": feature_comb.add_fun_anno(functionalData) scores = "" head = "" if ppi: print "Running PPI cross fold" ppi_ref = ref_gs.return_gold_standard_complexes( set(feature_comb.scoreCalc.ppiToIndex.keys())) fmeasure, auc_pr, auc_roc = utils.bench_by_PPI_clf( 10, feature_comb, ppi_ref, clf) scores = "\t".join(map(str, [fmeasure, auc_pr, auc_roc])) head = "\tFM\taucPR\taucROC" else: print "Running Cluster cross fold" scores, head = n_fold_cross_validation(2, ref_gs, feature_comb, clf, "%s_%i_%i" % (output_dir, i, j), overlap=True, local=False) # head, scores = run_epic_with_feature_combinations(this_scores, ref_gs, scoreCalc, clf, output_dir, valprots=this_foundprots) out_head = head all_scores.append( "%s\t%s\t%i\t%i\t%s\t%i\t%s" % (FS, mode, i, j, search_engine, len(this_foundprots), scores)) print head print scores outFH = open(output_dir + ".%i_%i.all.eval.txt" % (i, j), "w") print >> outFH, "FS\tNum_iex\tNum_beads\tSearch_engine\tNum_Prots\t%s" % out_head for score in all_scores: print >> outFH, "%s" % (score) outFH.close()
def main(): parser = argparse.ArgumentParser() parser.add_argument( "-s", "--feature_selection", type=str, help= "Select which features to use. This is an 8 position long array of 0 and 1, where each position determines which co-elution feature to use. Features sorted by position are: MI, Bayes, Euclidean, WCC, Jaccard, PCCN, PCC, and Apex. Each default=11101001", default="11101001") parser.add_argument( "input_dir", type=str, help="Directory containing the elution files for each experiment") parser.add_argument( "-t", "--taxid", type=str, help="TAXID to automatically download reference from GO,CORUM,INtACT", default="") parser.add_argument( "-c", "--cluster", type=str, help="Path to file containing protein clsuter reference", default="") parser.add_argument("-p", "--ppi", type=str, help="path to ppi File", default="") parser.add_argument("output_dir", type=str, help="Directory containing the output files") parser.add_argument("-o", "--output_prefix", type=str, help="Prefix name for all output Files", default="Out") parser.add_argument( "-M", "--classifier", type=str, help="Select which classifier to use. Values: RF SVM, default RF", default="RF") parser.add_argument("-n", "--num_cores", type=int, help="Number of cores to be used, default 1", default=1) parser.add_argument( "-m", "--mode", type=str, help= "Run EPIC with experimental, functional, or both evidences. Values: EXP, FA, COMB, default: EXP ", default="EXP") parser.add_argument( "-f", "--fun_anno_source", type=str, help= "Where to get functional annotaiton from. Values: STRING or GM or FILE, default= GM", default="GM") parser.add_argument( "-F", "--fun_anno_file", type=str, help= "Path to File containing functional annotation. This flag needs to be set when using FILE as fun_anno_source.", ) parser.add_argument("-r", "--co_elution_cutoff", type=float, help="Co-elution score cutoff. default 0.5", default=0.5) parser.add_argument( "-R", "--classifier_cutoff", type=float, help="Classifier confidence valye cutoff. default = 0.5", default=0.5) parser.add_argument( "-e", "--elution_max_count", type=int, help= "Removies protein that have a maximal peptide count less than the given value. default = 1", default=1) parser.add_argument( "-E", "--frac_count", type=int, help= "Number of fracrions a protein needs to be measured in. default = 2", default=2) parser.add_argument( "-P", "--precalcualted_score_file", type=str, help= "Path to precalulated scorefile to read scores from for faster rerunning of EPIC. default = None", default="NONE") args = parser.parse_args() args.mode = args.mode.upper() args.fun_anno_source = args.fun_anno_source.upper() #Create feature combination if args.feature_selection == "00000000": print "Select at least one feature" sys.exit() this_scores = utils.get_fs_comb(args.feature_selection) print "\t".join([fs.name for fs in this_scores]) # Initialize CLF use_rf = args.classifier == "RF" clf = CS.CLF_Wrapper(args.num_cores, use_rf) # Load elution data foundprots, elution_datas = utils.load_data(args.input_dir, this_scores, fc=args.frac_count, mfc=args.elution_max_count) # Generate reference data set gs = "" if ((args.taxid != "" and args.ppi != "") or (args.cluster != "" and args.ppi != "")): print "Refernce from cluster and PPI are nor compatiple. Please supply ppi or complex reference, not both!" sys.exit() if args.taxid == "" and args.ppi == "" and args.cluster == "": print "Please supply a reference by setting taxid, cluster, or ppi tag" sys.exit() gs_clusters = [] if (args.taxid != "" and args.cluster == "" and args.ppi == ""): print "Loading clusters from GO, CORUM, and Intact" gs_clusters.extend(utils.get_reference_from_net(args.taxid)) if args.cluster != "": print "Loading complexes from file" if args.mode == "FA": gs_clusters.append(GS.FileClusters(args.cluster, "all")) else: gs_clusters.append(GS.FileClusters(args.cluster, foundprots)) if args.ppi != "": print "Reading PPI file from %s" % args.reference gs = Goldstandard_from_PPI_File(args.ppi, foundprots) print gs_clusters if len(gs_clusters) > 0: gs = utils.create_goldstandard(gs_clusters, args.taxid, foundprots) output_dir = args.output_dir + os.sep + args.output_prefix refFH = open(output_dir + ".ref_complexes.txt", "w") for comp in gs.complexes.complexes: print >> refFH, "%s\t%s" % (",".join(comp), ",".join( gs.complexes.complexes[comp])) refFH.close() scoreCalc = CS.CalculateCoElutionScores(this_scores, elution_datas, output_dir + ".scores.txt", num_cores=args.num_cores, cutoff=args.co_elution_cutoff) if args.precalcualted_score_file == "NONE": scoreCalc.calculate_coelutionDatas(gs) else: scoreCalc.readTable(args.precalcualted_score_file, gs) print scoreCalc.scores.shape functionalData = "" gs.positive = set(gs.positive & set(scoreCalc.ppiToIndex.keys())) gs.negative = set(gs.negative & set(scoreCalc.ppiToIndex.keys())) gs.rebalance() print len(gs.positive) print len(gs.negative) if args.mode != "EXP": print "Loading functional data" functionalData = utils.get_FA_data(args.fun_anno_source, args.taxid, args.fun_anno_file) print "Dimension of fun anno " + str(functionalData.scores.shape) print "Start benchmarking" if args.mode == "EXP": utils.cv_bench_clf(scoreCalc, clf, gs, output_dir, format="pdf", verbose=True, folds=5) if args.mode == "COMB": tmp_sc = copy.deepcopy(scoreCalc) tmp_sc.add_fun_anno(functionalData) utils.cv_bench_clf(tmp_sc, clf, gs, output_dir, format="pdf", verbose=True, folds=5) if args.mode == "FA": utils.cv_bench_clf(functionalData, clf, gs, output_dir, format="pdf", verbose=True, folds=5) # PPI evaluation print utils.cv_bench_clf(scoreCalc, clf, gs, args.output_dir, verbose=False, format="pdf", folds=5) #print "I am here" network = utils.make_predictions(scoreCalc, args.mode, clf, gs, fun_anno=functionalData) # Predict protein interaction outFH = open("%s.pred.txt" % (output_dir), "w") final_network = [] for PPI in network: items = PPI.split("\t") if float(items[2]) >= args.classifier_cutoff: final_network.append(PPI) print >> outFH, "\n".join(final_network) outFH.close() # Predicting clusters utils.predict_clusters("%s.pred.txt" % (output_dir), "%s.clust.txt" % (output_dir)) # Evaluating predicted clusters pred_clusters = GS.Clusters(False) pred_clusters.read_file("%s.clust.txt" % (output_dir)) overlapped_complexes_with_reference = gs.get_complexes( ).get_overlapped_complexes_set(pred_clusters) print "# of complexes in reference dataset: " + str( len(overlapped_complexes_with_reference)) #clust_scores, header = utils.clustering_evaluation(gs.complexes, pred_clusters, "", False) clust_scores, header, composite_score = utils.clustering_evaluation( gs.complexes, pred_clusters, "", False) outFH = open("%s.eval.txt" % (output_dir), "w") header = header.split("\t") clust_scores = clust_scores.split("\t") for i, head in enumerate(header): print "%s\t%s" % (head, clust_scores[i]) print >> outFH, "%s\t%s" % (head, clust_scores[i]) outFH.close()