コード例 #1
0
ファイル: run_comet_full.py プロジェクト: mruffalo/comet
def run( args ):

    # Set up the arguments for a general CoMEt run on real data
    realOutputDir = "{}/comet-results".format(args.output_directory)
    realCometArgs = []
    permuteFlags = ["-np", "--parallel", "--keep_temp_files", "-o"]
    for i, arg in enumerate(sys.argv[1:]):
        if arg not in permuteFlags and sys.argv[i] not in permuteFlags:
            realCometArgs.append( arg )

    realCometArgs += [ "-o", realOutputDir, "--noviz"]
    # perform simple run without viz first.
    results = runComet(realCometArgs)

    # Load mutation data using Multi-Dendrix and output as a temporary file
    realMutations = C.load_mutation_data(args.mutation_matrix, args.patient_file,
                                     args.gene_file, args.min_freq, args.subtype)
    m, n, genes, patients, geneToCases, patientToGenes, subtypes = realMutations

    if args.verbose:
        print('* Mutation data: %s genes x %s patients' % (m, n))

    # Construct bipartite graph from mutation data
    if args.verbose: print("* Creating bipartite graph...")
    G = C.construct_mutation_graph(geneToCases, patientToGenes)
    if args.verbose:
        print('\t- Graph has', len( G.edges() ), 'edges among', len( G.nodes() ), 'nodes.')

    # reset the arguments for a general CoMEt run on permuted matrices
    cometArgs = []
    permuteFlags = ["-np", "--parallel", "--keep_temp_files", "-m", "-o"]
    for i, arg in enumerate(sys.argv[1:]):
        if arg not in permuteFlags and sys.argv[i] not in permuteFlags:
            cometArgs.append( arg )

    cometArgs.append('--noviz')
    # Create a permuted matrix, and then run it through CoMEt
    import tempfile
    arguments = []
    if args.keep_temp_files:
        directory = args.output_directory
    else:
        directory = tempfile.mkdtemp(dir=".", prefix=".tmp")

    # Generate random seeds for each permutation
    random.seed(args.seed)
    seeds = [ random.randint(0, 2**31-1) for _ in range(args.num_permutations) ]

    for i, seed in enumerate(seeds):
        # Print simple progress bar
        sys.stdout.write("* Running CoMEt on permuted matrices... {}/{}\r".format(i+1, args.num_permutations))
        sys.stdout.flush()

        # Create a permuted dataset and save it a temporary file
        mutations = C.permute_mutation_data(G, genes, patients, seed, args.Q)
        _, _, _, _, geneToCases, patientToGenes = mutations
        adj_list = [ p + "\t" + "\t".join( sorted(patientToGenes[p]) ) for p in patients ]

        permutation_file = "{}/permuted-matrix-{}.m2".format(directory, i+1)
        with open(permutation_file, 'w') as outfile: outfile.write('\n'.join(adj_list))

        # Add the new arguments
        permuteArgs = list(map(str, cometArgs))
        permuteArgs += [ "-m", permutation_file ]
        permuteArgs += [ "-o", "{}/comet-results-on-permutation-{}".format(directory, i+1)]
        arguments.append( permuteArgs )

    if args.parallel:
        pool = mp.Pool(25)
        results = pool.map(runComet, arguments)
        pool.close()
        pool.join()
    else:
        results = [ runComet(permuteArgs) for permuteArgs in arguments ]

    # Find the maximum test statistic on the permuted datasets
    from itertools import islice
    maxStat = 0

    for rf in [ rf for rf in os.listdir(directory) if rf.startswith("comet-results-on-permutation") ]:
        for df in [df for df in os.listdir("{}/{}/results".format(directory, rf)  ) if df.endswith(".tsv")]:
            with open("{}/{}/results/{}".format(directory, rf, df)) as infile:
                for line in islice(infile, 1, 2):
                    score = float(line.split("\t")[1])
                    if score > maxStat:
                        maxStat = score

    print("*" * 80)
    print("Number of permutations:", args.num_permutations)
    print("Max statistic:", maxStat)

    # Prepare comet results on real, mutation data, and output directory for viz
    for rf in [rf for rf in os.listdir( "{}/results/".format(realOutputDir) ) if rf.endswith(".tsv")]:
        resultsTable = [l.rstrip() for l in open( "{}/results/{}".format(realOutputDir, rf))]

    realMutations = (m, n, genes, patients, geneToCases, patientToGenes )
    outputDirViz = realOutputDir + "/viz/"
    C.ensure_dir(outputDirViz)

    # Perform visualization
    C.output_comet_viz(RC.get_parser().parse_args(realCometArgs), realMutations,
        resultsTable, maxStat, args.num_permutations)

    # Destroy the temporary directory if necessary
    if not args.keep_temp_files:
        import shutil
        shutil.rmtree(directory)
コード例 #2
0
ファイル: run_comet_permutation.py プロジェクト: xtmgah/comet
def run( args ):
    # Load mutation data using Multi-Dendrix and output as a temporary file
    mutations = C.load_mutation_data(args.mutation_matrix, args.patient_file,
                                     args.gene_file, args.min_freq)
    m, n, genes, patients, geneToCases, patientToGenes = mutations

    if args.verbose:
        print '* Mutation data: %s genes x %s patients' % (m, n)

    # Construct bipartite graph from mutation data
    if args.verbose: print "* Creating bipartite graph..."
    G = C.construct_mutation_graph(geneToCases, patientToGenes)
    if args.verbose:
        print '\t- Graph has', len( G.edges() ), 'edges among', len( G.nodes() ), 'nodes.'
        
    # Set up the arguments for a general CoMEt run 
    cometArgs = []
    permuteFlags = ["-np", "--parallel", "--keep_temp_files", "-m", "-o"]
    for i, arg in enumerate(sys.argv[1:]):
        if arg not in permuteFlags and sys.argv[i] not in permuteFlags:
            cometArgs.append( arg )

    # Create a permuted matrix, and then run it through CoMEt
    import tempfile
    arguments = []
    if args.keep_temp_files:
        directory = args.output_directory
    else:
        directory = tempfile.mkdtemp(dir=".", prefix=".tmp")

    for i in range(args.num_permutations):
        # Print simple progress bar
        sys.stdout.write("* Running CoMEt on permuted matrices... {}/{}\r".format(i+1, n))
        sys.stdout.flush()

        # Create a permuted dataset and save it a temporary file
        mutations = C.permute_mutation_data(G, genes, patients, args.seed, args.Q)
        _, _, _, _, geneToCases, patientToGenes = mutations
        adj_list = [ p + "\t" + "\t".join( sorted(patientToGenes[p]) ) for p in patients ]
        
	permutation_file = "{}/permuted-matrix-{}.m2".format(directory, i+1)
        with open(permutation_file, 'w') as outfile: outfile.write('\n'.join(adj_list))
        
        # Add the new arguments
        permuteArgs = map(str, cometArgs)
        permuteArgs += [ "-m", permutation_file ]
        permuteArgs += [ "-o", "{}/comet-results-on-permutation-{}".format(directory, i+1)]
        arguments.append( permuteArgs )

    if args.parallel:
        pool = mp.Pool(25)
        results = pool.map(runComet, arguments)
        pool.close()
        pool.join()
    else:
        results = [ runComet(permuteArgs) for permuteArgs in arguments ]

    # Find the maximum test statistic on the permuted datasets
    from itertools import islice
    maxStat = 0
    for rf in [ rf for rf in os.listdir(directory) if rf.startswith("comet-results") ]:
        with open("{}/{}".format(directory, rf)) as infile:
    	    for line in islice(infile, 1, 2):
                score = float(line.split("\t")[1])
                if score > maxStat:
                    maxStat = score

    print "*" * 80
    print "Number of permutations:", args.num_permutations
    print "Max statistic:", maxStat

    # Output the results to files
    with open("{}/comet-stats.json".format(args.output_directory), "w") as outfile:
        output = dict(maxPermutedWeight=maxStat,
                      numPermutations=args.num_permutations,
                      keepTempFiles=args.keep_temp_files,
                      mutationNatrix=args.mutation_matrix,
                      geneFile=args.gene_file, patientFile=args.patient_file,
                      minFreq=args.min_freq, Q=args.Q)
        json.dump( output, outfile, sort_keys=True, indent=4)

    # Destroy the temporary directory if necessary
    if not args.keep_temp_files:
        import shutil
        shutil.rmtree(directory)
コード例 #3
0
def run(args):
    # Load mutation data using Multi-Dendrix and output as a temporary file
    mutations = C.load_mutation_data(args.mutation_matrix, args.patient_file,
                                     args.gene_file, args.min_freq)
    m, n, genes, patients, geneToCases, patientToGenes = mutations

    if args.verbose:
        print '* Mutation data: %s genes x %s patients' % (m, n)

    # Construct bipartite graph from mutation data
    if args.verbose: print "* Creating bipartite graph..."
    G = C.construct_mutation_graph(geneToCases, patientToGenes)
    if args.verbose:
        print '\t- Graph has', len(G.edges()), 'edges among', len(
            G.nodes()), 'nodes.'

    # Set up the arguments for a general CoMEt run
    cometArgs = []
    permuteFlags = ["-np", "--parallel", "--keep_temp_files", "-m", "-o"]
    for i, arg in enumerate(sys.argv[1:]):
        if arg not in permuteFlags and sys.argv[i] not in permuteFlags:
            cometArgs.append(arg)

    # Create a permuted matrix, and then run it through CoMEt
    import tempfile
    arguments = []
    if args.keep_temp_files:
        directory = args.output_directory
    else:
        directory = tempfile.mkdtemp(dir=".", prefix=".tmp")

    # Generate random seeds for each permutation
    random.seed(args.seed)
    seeds = [
        random.randint(0, 2**31 - 1) for _ in range(args.num_permutations)
    ]

    for i, seed in enumerate(seeds):
        # Print simple progress bar
        sys.stdout.write(
            "* Running CoMEt on permuted matrices... {}/{}\r".format(
                i + 1, args.num_permutations))
        sys.stdout.flush()

        # Create a permuted dataset and save it a temporary file
        mutations = C.permute_mutation_data(G, genes, patients, seed, args.Q)
        _, _, _, _, geneToCases, patientToGenes = mutations
        adj_list = [
            p + "\t" + "\t".join(sorted(patientToGenes[p])) for p in patients
        ]

        permutation_file = "{}/permuted-matrix-{}.m2".format(directory, i + 1)
        with open(permutation_file, 'w') as outfile:
            outfile.write('\n'.join(adj_list))

        # Add the new arguments
        permuteArgs = map(str, cometArgs)
        permuteArgs += ["-m", permutation_file]
        permuteArgs += [
            "-o",
            "{}/comet-results-on-permutation-{}".format(directory, i + 1)
        ]
        arguments.append(permuteArgs)

    if args.parallel:
        pool = mp.Pool(25)
        results = pool.map(runComet, arguments)
        pool.close()
        pool.join()
    else:
        results = [runComet(permuteArgs) for permuteArgs in arguments]

    # Find the maximum test statistic on the permuted datasets
    from itertools import islice
    maxStat = 0
    for rf in [
            rf for rf in os.listdir(directory)
            if rf.startswith("comet-results")
    ]:
        with open("{}/{}".format(directory, rf)) as infile:
            for line in islice(infile, 1, 2):
                score = float(line.split("\t")[1])
                if score > maxStat:
                    maxStat = score

    print "*" * 80
    print "Number of permutations:", args.num_permutations
    print "Max statistic:", maxStat

    # Output the results to files
    with open("{}/comet-stats.json".format(args.output_directory),
              "w") as outfile:
        output = dict(maxPermutedWeight=maxStat,
                      numPermutations=args.num_permutations,
                      keepTempFiles=args.keep_temp_files,
                      mutationNatrix=args.mutation_matrix,
                      geneFile=args.gene_file,
                      patientFile=args.patient_file,
                      minFreq=args.min_freq,
                      Q=args.Q)
        json.dump(output, outfile, sort_keys=True, indent=4)

    # Destroy the temporary directory if necessary
    if not args.keep_temp_files:
        import shutil
        shutil.rmtree(directory)
コード例 #4
0
def run(args):

    # Set up the arguments for a general CoMEt run on real data
    realOutputDir = "{}/comet-results".format(args.output_directory)
    realCometArgs = []
    permuteFlags = ["-np", "--parallel", "--keep_temp_files", "-o"]
    for i, arg in enumerate(sys.argv[1:]):
        if arg not in permuteFlags and sys.argv[i] not in permuteFlags:
            realCometArgs.append(arg)

    realCometArgs += ["-o", realOutputDir, "--noviz"]
    # perform simple run without viz first.
    results = runComet(realCometArgs)

    # Load mutation data using Multi-Dendrix and output as a temporary file
    realMutations = C.load_mutation_data(args.mutation_matrix,
                                         args.patient_file, args.gene_file,
                                         args.min_freq, args.subtype)
    m, n, genes, patients, geneToCases, patientToGenes, subtypes = realMutations

    if args.verbose:
        print(f'* Mutation data: {m} genes x {n} patients')

    # Construct bipartite graph from mutation data
    if args.verbose: print('* Creating bipartite graph...')
    G = C.construct_mutation_graph(geneToCases, patientToGenes)
    if args.verbose:
        print('\t- Graph has', len(G.edges()), 'edges among', len(G.nodes()),
              'nodes.')

    # reset the arguments for a general CoMEt run on permuted matrices
    cometArgs = []
    permuteFlags = ["-np", "--parallel", "--keep_temp_files", "-m", "-o"]
    for i, arg in enumerate(sys.argv[1:]):
        if arg not in permuteFlags and sys.argv[i] not in permuteFlags:
            cometArgs.append(arg)

    cometArgs.append('--noviz')
    # Create a permuted matrix, and then run it through CoMEt
    import tempfile
    arguments = []
    if args.keep_temp_files:
        directory = args.output_directory
    else:
        directory = tempfile.mkdtemp(dir=".", prefix=".tmp")

    # Generate random seeds for each permutation
    random.seed(args.seed)
    seeds = [
        random.randint(0, 2**31 - 1) for _ in range(args.num_permutations)
    ]

    for i, seed in enumerate(seeds):
        # Print simple progress bar
        sys.stdout.write(
            "* Running CoMEt on permuted matrices... {}/{}\r".format(
                i + 1, args.num_permutations))
        sys.stdout.flush()

        # Create a permuted dataset and save it a temporary file
        mutations = C.permute_mutation_data(G, genes, patients, seed, args.Q)
        _, _, _, _, geneToCases, patientToGenes = mutations
        adj_list = [
            p + "\t" + "\t".join(sorted(patientToGenes[p])) for p in patients
        ]

        permutation_file = "{}/permuted-matrix-{}.m2".format(directory, i + 1)
        with open(permutation_file, 'w') as outfile:
            outfile.write('\n'.join(adj_list))

        # Add the new arguments
        permuteArgs = list(map(str, cometArgs))
        permuteArgs += ["-m", permutation_file]
        permuteArgs += [
            "-o",
            "{}/comet-results-on-permutation-{}".format(directory, i + 1)
        ]
        arguments.append(permuteArgs)

    if args.parallel:
        pool = mp.Pool(25)
        results = pool.map(runComet, arguments)
        pool.close()
        pool.join()
    else:
        results = [runComet(permuteArgs) for permuteArgs in arguments]

    # Find the maximum test statistic on the permuted datasets
    from itertools import islice
    maxStat = 0

    for rf in [
            rf for rf in os.listdir(directory)
            if rf.startswith("comet-results-on-permutation")
    ]:
        for df in [
                df for df in os.listdir("{}/{}/results".format(directory, rf))
                if df.endswith(".tsv")
        ]:
            with open("{}/{}/results/{}".format(directory, rf, df)) as infile:
                for line in islice(infile, 1, 2):
                    score = float(line.split("\t")[1])
                    if score > maxStat:
                        maxStat = score

    print("*" * 80)
    print("Number of permutations:", args.num_permutations)
    print("Max statistic:", maxStat)

    # Prepare comet results on real, mutation data, and output directory for viz
    for rf in [
            rf for rf in os.listdir("{}/results/".format(realOutputDir))
            if (not rf.startswith('.') and rf.endswith(".tsv"))
    ]:
        resultsTable = [
            l.rstrip() for l in open("{}/results/{}".format(realOutputDir, rf))
        ]

    realMutations = (m, n, genes, patients, geneToCases, patientToGenes)
    outputDirViz = realOutputDir + "/viz/"
    C.ensure_dir(outputDirViz)

    # Perform visualization
    C.output_comet_viz(RC.get_parser().parse_args(realCometArgs), realMutations, \
        resultsTable, maxStat, args.num_permutations)

    # Destroy the temporary directory if necessary
    if not args.keep_temp_files:
        import shutil
        shutil.rmtree(directory)