def run(args): # Parse the arguments into shorter variable handles mutationMatrix = args.mutation_matrix geneFile = args.gene_file patientFile = args.patient_file minFreq = args.min_freq rc = args.num_initial t = len(args.gene_set_sizes) # number of pathways ks = args.gene_set_sizes # size of each pathway N = args.num_iterations # number of iteration s = args.step_length # step NStop = args.n_stop acc = args.accelerator nt = args.nt hybridCutoff = args.binom_cut NInc = 1.5 # increamental for non-converged chain tc = 1 # Load the mutation data mutations = C.load_mutation_data(mutationMatrix, patientFile, geneFile, minFreq) m, n, genes, patients, geneToCases, patientToGenes = mutations if args.subtype: with open(args.subtype) as f: subSet = [l.rstrip() for l in f] else: subSet = list() if args.verbose: print 'Mutation data: %s genes x %s patients' % (m, n) # Precompute factorials C.precompute_factorials(max(m, n)) C.set_random_seed(args.seed) # stored the score of pre-computed collections into C if args.precomputed_scores: load_precomputed_scores(args.precomputed_scores, mutations, subSet) # num_initial > 1, perform convergence pipeline, otherwise, perform one run only if args.num_initial > 1: # collect initial soln from users, multidendrix and random. initialSolns, totalOut = initial_solns_generator( args.num_initial, mutations, ks, args.initial_soln, subSet) runN = N while True: lastSolns = list() for i in range(len(initialSolns)): init = initialSolns[i] outresults, lastSoln = comet(mutations, n, t, ks, runN, s, init, acc, subSet, nt, hybridCutoff, args.exact_cut, True) print "Mem usage: ", resource.getrusage( resource.RUSAGE_SELF).ru_maxrss / 1000 merge_runs(totalOut[i], outresults) lastSolns.append(lastSoln) finalTv = C.discrete_convergence(totalOut, int(N / s)) print finalTv, N newN = int(N * NInc) if newN > NStop or finalTv < args.total_distance_cutoff: break runN = newN - N N = newN initialSolns = lastSolns runNum = len(totalOut) results = merge_results(totalOut) printParameters(args, ks, finalTv) # store and output parameters into .json else: init = list() outresults, lastSoln = comet(mutations, n, t, ks, N, s, init, acc, subSet, nt, hybridCutoff, args.exact_cut, True) results = outresults runNum = 1 printParameters(args, ks, 1) C.free_factorials() # Output Comet results to TSV collections = sorted(results.keys(), key=lambda S: results[S]["total_weight"], reverse=True) header = "#Freq\tTotal Weight\tTarget Weight\t" header += "\t".join([ "Gene set %s (k=%s)\tProb %s\tWeight function %s" % (i, ks[i - 1], i, i) for i in range(1, len(ks) + 1) ]) tbl = [header] for S in collections: data = results[S] row = [ data["freq"], data["total_weight"], format(data["target_weight"], 'g') ] for d in sorted(data["sets"], key=lambda d: d["W"]): row += [", ".join(sorted(d["genes"])), d["prob"], d["num_tbls"]] tbl.append("\t".join(map(str, row))) outputFile = "%s.tsv" % iter_num(args.output_prefix + '.sum', N * (runNum), ks, args.accelerator) with open(outputFile, "w") as outfile: outfile.write("\n".join(tbl)) return [(S, results[S]["freq"], results[S]["total_weight"]) for S in collections]
def run(args): ########################################################################### # Parse the arguments into shorter variable handles mutationMatrix = args.mutation_matrix geneFile = args.gene_file patientFile = args.patient_file minFreq = args.min_freq subtypeFile = args.subtype rc = args.num_initial t = len(args.gene_set_sizes) # number of pathways ks = args.gene_set_sizes # size of each pathway N = args.num_iterations # number of iteration s = args.step_length # step NStop = args.n_stop acc = args.accelerator nt = args.nt hybridCutoff = args.binom_cut NInc = 1.5 # increamental for non-converged chain # Load the mutation data mutations = C.load_mutation_data(mutationMatrix, patientFile, geneFile, minFreq, subtypeFile) m, n, genes, patients, geneToCases, patientToGenes, subtypes = mutations mutations = (m, n, genes, patients, geneToCases, patientToGenes) ########################################################################### if args.verbose: print(f'Mutation data: {m} genes x {n} patients') if args.core_events: with open(args.core_events) as f: subSet = list(subtypes.union(set([l.rstrip() for l in f]))) else: subSet = list(subtypes) # Precompute factorials C.precompute_factorials(max(m, n)) C.set_random_seed(args.seed) # stored the score of pre-computed collections into C if args.precomputed_scores: C.load_precomputed_scores(args.precomputed_scores, mutations, subSet) # num_initial > 1, perform convergence pipeline, otherwise, perform one run only if args.num_initial > 1: # collect initial soln from users, multidendrix and random. initialSolns, totalOut = C.initial_solns_generator(args.num_initial, \ mutations, ks, args.initial_soln, subSet, \ importMultidendrix, multi_dendrix) runN = N while True: lastSolns = list() for i in range(len(initialSolns)): init = initialSolns[i] outresults, lastSoln = comet(mutations, n, t, ks, runN, s, \ init, acc, subSet, nt, hybridCutoff, args.exact_cut, args.verbose) C.merge_runs(totalOut[i], outresults) lastSolns.append(lastSoln) finalTv = C.discrete_convergence(totalOut, int(N / s)) print(finalTv, N) newN = int(N * NInc) if newN > NStop or finalTv < args.total_distance_cutoff: break runN = newN - N N = newN initialSolns = lastSolns runNum = len(totalOut) results = C.merge_results(totalOut) else: init = list() outresults, lastSoln = comet(mutations, n, t, ks, N, s, \ init, acc, subSet, nt, hybridCutoff, args.exact_cut, args.verbose) results = outresults runNum = 1 C.free_factorials() # Output comet results to TSV and website collections = sorted(results.keys(), key=lambda S: results[S]["total_weight"], reverse=True) C.output_comet(args, mutations, results, collections, ks, N * (runNum), 0, 0) return [(S, results[S]["freq"], results[S]["total_weight"]) for S in collections]
def run( args ): # Parse the arguments into shorter variable handles mutationMatrix = args.mutation_matrix geneFile = args.gene_file patientFile = args.patient_file minFreq = args.min_freq rc = args.num_initial t = len(args.gene_set_sizes) # number of pathways ks = args.gene_set_sizes # size of each pathway N = args.num_iterations # number of iteration s = args.step_length # step NStop = args.n_stop acc = args.accelerator nt = args.nt hybridCutoff = args.binom_cut NInc = 1.5 # increamental for non-converged chain tc = 1 # Load the mutation data mutations = C.load_mutation_data(mutationMatrix, patientFile, geneFile, minFreq) m, n, genes, patients, geneToCases, patientToGenes = mutations if args.subtype: with open(args.subtype) as f: subSet = [ l.rstrip() for l in f ] else: subSet = list() if args.verbose: print 'Mutation data: %s genes x %s patients' % (m, n) # Precompute factorials C.precompute_factorials(max(m, n)) C.set_random_seed(args.seed) # stored the score of pre-computed collections into C if args.precomputed_scores: load_precomputed_scores(args.precomputed_scores, mutations, subSet) # num_initial > 1, perform convergence pipeline, otherwise, perform one run only if args.num_initial > 1: # collect initial soln from users, multidendrix and random. initialSolns, totalOut = initial_solns_generator(args.num_initial, mutations, ks, args.initial_soln, subSet ) runN = N while True: lastSolns = list() for i in range(len(initialSolns)): init = initialSolns[i] outresults, lastSoln = comet(mutations, n, t, ks, runN, s, init, acc, subSet, nt, hybridCutoff, args.exact_cut, True) print "Mem usage: ", resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1000 merge_runs(totalOut[i], outresults) lastSolns.append(lastSoln) finalTv = C.discrete_convergence(totalOut, int(N/s)) print finalTv, N newN = int(N*NInc) if newN > NStop or finalTv < args.total_distance_cutoff: break runN = newN - N N = newN initialSolns = lastSolns runNum = len(totalOut) results = merge_results(totalOut) printParameters(args, ks, finalTv) # store and output parameters into .json else: init = list() outresults, lastSoln = comet(mutations, n, t, ks, N, s, init, acc, subSet, nt, hybridCutoff, args.exact_cut, True) results = outresults runNum = 1 printParameters(args, ks, 1) C.free_factorials() # Output Comet results to TSV collections = sorted(results.keys(), key=lambda S: results[S]["total_weight"], reverse=True) weight_func_mapping = {0: 'E', 1:'E', 2:'B', 3:'P'} header = "#Freq\tTotal Weight\tTarget Weight\t" header += "\t".join(["Gene set %s (k=%s)\tPhi %s\tWeight function %s" % (i, ks[i-1], i, i) for i in range(1, len(ks)+1)]) tbl = [header] for S in collections: data = results[S] row = [ data["freq"], data["total_weight"], format(data["target_weight"], 'g') ] for d in sorted(data["sets"], key=lambda d: d["W"]): row += [", ".join(sorted(d["genes"])), d["prob"], weight_func_mapping[d["num_tbls"]] ] tbl.append("\t".join(map(str, row))) outputFile = "%s.tsv" % iter_num(args.output_prefix + '.sum', N*(runNum), ks, args.accelerator) with open(outputFile, "w") as outfile: outfile.write( "\n".join(tbl) ) return [ (S, results[S]["freq"], results[S]["total_weight"]) for S in collections ]
def run( args ): ########################################################################### # Parse the arguments into shorter variable handles mutationMatrix = args.mutation_matrix geneFile = args.gene_file patientFile = args.patient_file minFreq = args.min_freq subtypeFile = args.subtype rc = args.num_initial t = len(args.gene_set_sizes) # number of pathways ks = args.gene_set_sizes # size of each pathway N = args.num_iterations # number of iteration s = args.step_length # step NStop = args.n_stop acc = args.accelerator nt = args.nt hybridCutoff = args.binom_cut NInc = 1.5 # increamental for non-converged chain # Load the mutation data mutations = C.load_mutation_data(mutationMatrix, patientFile, geneFile, minFreq, subtypeFile) m, n, genes, patients, geneToCases, patientToGenes, subtypes = mutations mutations = ( m, n, genes, patients, geneToCases, patientToGenes ) ########################################################################### if args.verbose: print('Mutation data: %s genes x %s patients' % (m, n)) if args.core_events: with open(args.core_events) as f: subSet = list( subtypes.union( set( [ l.rstrip() for l in f ] ) ) ) else: subSet = list( subtypes ) # Precompute factorials C.precompute_factorials(max(m, n)) C.set_random_seed(args.seed) # stored the score of pre-computed collections into C if args.precomputed_scores: C.load_precomputed_scores(args.precomputed_scores, mutations, subSet) # num_initial > 1, perform convergence pipeline, otherwise, perform one run only if args.num_initial > 1: # collect initial soln from users, multidendrix and random. initialSolns, totalOut = C.initial_solns_generator(args.num_initial, \ mutations, ks, args.initial_soln, subSet, \ importMultidendrix, multi_dendrix) runN = N while True: lastSolns = list() for i in range(len(initialSolns)): init = initialSolns[i] outresults, lastSoln = comet(mutations, n, t, ks, runN, s, \ init, acc, subSet, nt, hybridCutoff, args.exact_cut, args.verbose) C.merge_runs(totalOut[i], outresults) lastSolns.append(lastSoln) finalTv = C.discrete_convergence(totalOut, int(N/s)) print(finalTv, N) newN = int(N*NInc) if newN > NStop or finalTv < args.total_distance_cutoff: break runN = newN - N N = newN initialSolns = lastSolns runNum = len(totalOut) results = C.merge_results(totalOut) else: init = list() outresults, lastSoln = comet(mutations, n, t, ks, N, s, \ init, acc, subSet, nt, hybridCutoff, args.exact_cut, args.verbose) results = outresults runNum = 1 C.free_factorials() # Output comet results to TSV and website collections = sorted(results, key=lambda S: results[S]["total_weight"], reverse=True) C.output_comet(args, mutations, results, collections, ks, N*(runNum), 0, 0) return [ (S, results[S]["freq"], results[S]["total_weight"]) for S in collections ]