def run( args ): # Set up the arguments for a general CoMEt run on real data realOutputDir = "{}/comet-results".format(args.output_directory) realCometArgs = [] permuteFlags = ["-np", "--parallel", "--keep_temp_files", "-o"] for i, arg in enumerate(sys.argv[1:]): if arg not in permuteFlags and sys.argv[i] not in permuteFlags: realCometArgs.append( arg ) realCometArgs += [ "-o", realOutputDir, "--noviz"] # perform simple run without viz first. results = runComet(realCometArgs) # Load mutation data using Multi-Dendrix and output as a temporary file realMutations = C.load_mutation_data(args.mutation_matrix, args.patient_file, args.gene_file, args.min_freq, args.subtype) m, n, genes, patients, geneToCases, patientToGenes, subtypes = realMutations if args.verbose: print('* Mutation data: %s genes x %s patients' % (m, n)) # Construct bipartite graph from mutation data if args.verbose: print("* Creating bipartite graph...") G = C.construct_mutation_graph(geneToCases, patientToGenes) if args.verbose: print('\t- Graph has', len( G.edges() ), 'edges among', len( G.nodes() ), 'nodes.') # reset the arguments for a general CoMEt run on permuted matrices cometArgs = [] permuteFlags = ["-np", "--parallel", "--keep_temp_files", "-m", "-o"] for i, arg in enumerate(sys.argv[1:]): if arg not in permuteFlags and sys.argv[i] not in permuteFlags: cometArgs.append( arg ) cometArgs.append('--noviz') # Create a permuted matrix, and then run it through CoMEt import tempfile arguments = [] if args.keep_temp_files: directory = args.output_directory else: directory = tempfile.mkdtemp(dir=".", prefix=".tmp") # Generate random seeds for each permutation random.seed(args.seed) seeds = [ random.randint(0, 2**31-1) for _ in range(args.num_permutations) ] for i, seed in enumerate(seeds): # Print simple progress bar sys.stdout.write("* Running CoMEt on permuted matrices... {}/{}\r".format(i+1, args.num_permutations)) sys.stdout.flush() # Create a permuted dataset and save it a temporary file mutations = C.permute_mutation_data(G, genes, patients, seed, args.Q) _, _, _, _, geneToCases, patientToGenes = mutations adj_list = [ p + "\t" + "\t".join( sorted(patientToGenes[p]) ) for p in patients ] permutation_file = "{}/permuted-matrix-{}.m2".format(directory, i+1) with open(permutation_file, 'w') as outfile: outfile.write('\n'.join(adj_list)) # Add the new arguments permuteArgs = list(map(str, cometArgs)) permuteArgs += [ "-m", permutation_file ] permuteArgs += [ "-o", "{}/comet-results-on-permutation-{}".format(directory, i+1)] arguments.append( permuteArgs ) if args.parallel: pool = mp.Pool(25) results = pool.map(runComet, arguments) pool.close() pool.join() else: results = [ runComet(permuteArgs) for permuteArgs in arguments ] # Find the maximum test statistic on the permuted datasets from itertools import islice maxStat = 0 for rf in [ rf for rf in os.listdir(directory) if rf.startswith("comet-results-on-permutation") ]: for df in [df for df in os.listdir("{}/{}/results".format(directory, rf) ) if df.endswith(".tsv")]: with open("{}/{}/results/{}".format(directory, rf, df)) as infile: for line in islice(infile, 1, 2): score = float(line.split("\t")[1]) if score > maxStat: maxStat = score print("*" * 80) print("Number of permutations:", args.num_permutations) print("Max statistic:", maxStat) # Prepare comet results on real, mutation data, and output directory for viz for rf in [rf for rf in os.listdir( "{}/results/".format(realOutputDir) ) if rf.endswith(".tsv")]: resultsTable = [l.rstrip() for l in open( "{}/results/{}".format(realOutputDir, rf))] realMutations = (m, n, genes, patients, geneToCases, patientToGenes ) outputDirViz = realOutputDir + "/viz/" C.ensure_dir(outputDirViz) # Perform visualization C.output_comet_viz(RC.get_parser().parse_args(realCometArgs), realMutations, resultsTable, maxStat, args.num_permutations) # Destroy the temporary directory if necessary if not args.keep_temp_files: import shutil shutil.rmtree(directory)
def run( args ): # Load mutation data using Multi-Dendrix and output as a temporary file mutations = C.load_mutation_data(args.mutation_matrix, args.patient_file, args.gene_file, args.min_freq) m, n, genes, patients, geneToCases, patientToGenes = mutations if args.verbose: print '* Mutation data: %s genes x %s patients' % (m, n) # Construct bipartite graph from mutation data if args.verbose: print "* Creating bipartite graph..." G = C.construct_mutation_graph(geneToCases, patientToGenes) if args.verbose: print '\t- Graph has', len( G.edges() ), 'edges among', len( G.nodes() ), 'nodes.' # Set up the arguments for a general CoMEt run cometArgs = [] permuteFlags = ["-np", "--parallel", "--keep_temp_files", "-m", "-o"] for i, arg in enumerate(sys.argv[1:]): if arg not in permuteFlags and sys.argv[i] not in permuteFlags: cometArgs.append( arg ) # Create a permuted matrix, and then run it through CoMEt import tempfile arguments = [] if args.keep_temp_files: directory = args.output_directory else: directory = tempfile.mkdtemp(dir=".", prefix=".tmp") for i in range(args.num_permutations): # Print simple progress bar sys.stdout.write("* Running CoMEt on permuted matrices... {}/{}\r".format(i+1, n)) sys.stdout.flush() # Create a permuted dataset and save it a temporary file mutations = C.permute_mutation_data(G, genes, patients, args.seed, args.Q) _, _, _, _, geneToCases, patientToGenes = mutations adj_list = [ p + "\t" + "\t".join( sorted(patientToGenes[p]) ) for p in patients ] permutation_file = "{}/permuted-matrix-{}.m2".format(directory, i+1) with open(permutation_file, 'w') as outfile: outfile.write('\n'.join(adj_list)) # Add the new arguments permuteArgs = map(str, cometArgs) permuteArgs += [ "-m", permutation_file ] permuteArgs += [ "-o", "{}/comet-results-on-permutation-{}".format(directory, i+1)] arguments.append( permuteArgs ) if args.parallel: pool = mp.Pool(25) results = pool.map(runComet, arguments) pool.close() pool.join() else: results = [ runComet(permuteArgs) for permuteArgs in arguments ] # Find the maximum test statistic on the permuted datasets from itertools import islice maxStat = 0 for rf in [ rf for rf in os.listdir(directory) if rf.startswith("comet-results") ]: with open("{}/{}".format(directory, rf)) as infile: for line in islice(infile, 1, 2): score = float(line.split("\t")[1]) if score > maxStat: maxStat = score print "*" * 80 print "Number of permutations:", args.num_permutations print "Max statistic:", maxStat # Output the results to files with open("{}/comet-stats.json".format(args.output_directory), "w") as outfile: output = dict(maxPermutedWeight=maxStat, numPermutations=args.num_permutations, keepTempFiles=args.keep_temp_files, mutationNatrix=args.mutation_matrix, geneFile=args.gene_file, patientFile=args.patient_file, minFreq=args.min_freq, Q=args.Q) json.dump( output, outfile, sort_keys=True, indent=4) # Destroy the temporary directory if necessary if not args.keep_temp_files: import shutil shutil.rmtree(directory)
def run(args): # Load mutation data using Multi-Dendrix and output as a temporary file mutations = C.load_mutation_data(args.mutation_matrix, args.patient_file, args.gene_file, args.min_freq) m, n, genes, patients, geneToCases, patientToGenes = mutations if args.verbose: print '* Mutation data: %s genes x %s patients' % (m, n) # Construct bipartite graph from mutation data if args.verbose: print "* Creating bipartite graph..." G = C.construct_mutation_graph(geneToCases, patientToGenes) if args.verbose: print '\t- Graph has', len(G.edges()), 'edges among', len( G.nodes()), 'nodes.' # Set up the arguments for a general CoMEt run cometArgs = [] permuteFlags = ["-np", "--parallel", "--keep_temp_files", "-m", "-o"] for i, arg in enumerate(sys.argv[1:]): if arg not in permuteFlags and sys.argv[i] not in permuteFlags: cometArgs.append(arg) # Create a permuted matrix, and then run it through CoMEt import tempfile arguments = [] if args.keep_temp_files: directory = args.output_directory else: directory = tempfile.mkdtemp(dir=".", prefix=".tmp") # Generate random seeds for each permutation random.seed(args.seed) seeds = [ random.randint(0, 2**31 - 1) for _ in range(args.num_permutations) ] for i, seed in enumerate(seeds): # Print simple progress bar sys.stdout.write( "* Running CoMEt on permuted matrices... {}/{}\r".format( i + 1, args.num_permutations)) sys.stdout.flush() # Create a permuted dataset and save it a temporary file mutations = C.permute_mutation_data(G, genes, patients, seed, args.Q) _, _, _, _, geneToCases, patientToGenes = mutations adj_list = [ p + "\t" + "\t".join(sorted(patientToGenes[p])) for p in patients ] permutation_file = "{}/permuted-matrix-{}.m2".format(directory, i + 1) with open(permutation_file, 'w') as outfile: outfile.write('\n'.join(adj_list)) # Add the new arguments permuteArgs = map(str, cometArgs) permuteArgs += ["-m", permutation_file] permuteArgs += [ "-o", "{}/comet-results-on-permutation-{}".format(directory, i + 1) ] arguments.append(permuteArgs) if args.parallel: pool = mp.Pool(25) results = pool.map(runComet, arguments) pool.close() pool.join() else: results = [runComet(permuteArgs) for permuteArgs in arguments] # Find the maximum test statistic on the permuted datasets from itertools import islice maxStat = 0 for rf in [ rf for rf in os.listdir(directory) if rf.startswith("comet-results") ]: with open("{}/{}".format(directory, rf)) as infile: for line in islice(infile, 1, 2): score = float(line.split("\t")[1]) if score > maxStat: maxStat = score print "*" * 80 print "Number of permutations:", args.num_permutations print "Max statistic:", maxStat # Output the results to files with open("{}/comet-stats.json".format(args.output_directory), "w") as outfile: output = dict(maxPermutedWeight=maxStat, numPermutations=args.num_permutations, keepTempFiles=args.keep_temp_files, mutationNatrix=args.mutation_matrix, geneFile=args.gene_file, patientFile=args.patient_file, minFreq=args.min_freq, Q=args.Q) json.dump(output, outfile, sort_keys=True, indent=4) # Destroy the temporary directory if necessary if not args.keep_temp_files: import shutil shutil.rmtree(directory)
def run(args): # Set up the arguments for a general CoMEt run on real data realOutputDir = "{}/comet-results".format(args.output_directory) realCometArgs = [] permuteFlags = ["-np", "--parallel", "--keep_temp_files", "-o"] for i, arg in enumerate(sys.argv[1:]): if arg not in permuteFlags and sys.argv[i] not in permuteFlags: realCometArgs.append(arg) realCometArgs += ["-o", realOutputDir, "--noviz"] # perform simple run without viz first. results = runComet(realCometArgs) # Load mutation data using Multi-Dendrix and output as a temporary file realMutations = C.load_mutation_data(args.mutation_matrix, args.patient_file, args.gene_file, args.min_freq, args.subtype) m, n, genes, patients, geneToCases, patientToGenes, subtypes = realMutations if args.verbose: print(f'* Mutation data: {m} genes x {n} patients') # Construct bipartite graph from mutation data if args.verbose: print('* Creating bipartite graph...') G = C.construct_mutation_graph(geneToCases, patientToGenes) if args.verbose: print('\t- Graph has', len(G.edges()), 'edges among', len(G.nodes()), 'nodes.') # reset the arguments for a general CoMEt run on permuted matrices cometArgs = [] permuteFlags = ["-np", "--parallel", "--keep_temp_files", "-m", "-o"] for i, arg in enumerate(sys.argv[1:]): if arg not in permuteFlags and sys.argv[i] not in permuteFlags: cometArgs.append(arg) cometArgs.append('--noviz') # Create a permuted matrix, and then run it through CoMEt import tempfile arguments = [] if args.keep_temp_files: directory = args.output_directory else: directory = tempfile.mkdtemp(dir=".", prefix=".tmp") # Generate random seeds for each permutation random.seed(args.seed) seeds = [ random.randint(0, 2**31 - 1) for _ in range(args.num_permutations) ] for i, seed in enumerate(seeds): # Print simple progress bar sys.stdout.write( "* Running CoMEt on permuted matrices... {}/{}\r".format( i + 1, args.num_permutations)) sys.stdout.flush() # Create a permuted dataset and save it a temporary file mutations = C.permute_mutation_data(G, genes, patients, seed, args.Q) _, _, _, _, geneToCases, patientToGenes = mutations adj_list = [ p + "\t" + "\t".join(sorted(patientToGenes[p])) for p in patients ] permutation_file = "{}/permuted-matrix-{}.m2".format(directory, i + 1) with open(permutation_file, 'w') as outfile: outfile.write('\n'.join(adj_list)) # Add the new arguments permuteArgs = list(map(str, cometArgs)) permuteArgs += ["-m", permutation_file] permuteArgs += [ "-o", "{}/comet-results-on-permutation-{}".format(directory, i + 1) ] arguments.append(permuteArgs) if args.parallel: pool = mp.Pool(25) results = pool.map(runComet, arguments) pool.close() pool.join() else: results = [runComet(permuteArgs) for permuteArgs in arguments] # Find the maximum test statistic on the permuted datasets from itertools import islice maxStat = 0 for rf in [ rf for rf in os.listdir(directory) if rf.startswith("comet-results-on-permutation") ]: for df in [ df for df in os.listdir("{}/{}/results".format(directory, rf)) if df.endswith(".tsv") ]: with open("{}/{}/results/{}".format(directory, rf, df)) as infile: for line in islice(infile, 1, 2): score = float(line.split("\t")[1]) if score > maxStat: maxStat = score print("*" * 80) print("Number of permutations:", args.num_permutations) print("Max statistic:", maxStat) # Prepare comet results on real, mutation data, and output directory for viz for rf in [ rf for rf in os.listdir("{}/results/".format(realOutputDir)) if (not rf.startswith('.') and rf.endswith(".tsv")) ]: resultsTable = [ l.rstrip() for l in open("{}/results/{}".format(realOutputDir, rf)) ] realMutations = (m, n, genes, patients, geneToCases, patientToGenes) outputDirViz = realOutputDir + "/viz/" C.ensure_dir(outputDirViz) # Perform visualization C.output_comet_viz(RC.get_parser().parse_args(realCometArgs), realMutations, \ resultsTable, maxStat, args.num_permutations) # Destroy the temporary directory if necessary if not args.keep_temp_files: import shutil shutil.rmtree(directory)