コード例 #1
0
ファイル: bingenesbypairs.py プロジェクト: lujonathanh/mutex
def run(args):
    mdictfile = args.mdictfile
    cdictfile = args.cdictfile

    mprob = args.mprob
    cprob = args.cprob
    cooccur_distance_threshold = args.cooccur_distance_threshold
    bin_distance_threshold = args.bin_distance_threshold

    mutationmatrix = args.mutation_matrix
    newmutationmatrix = args.newmutationmatrix

    file_prefix = args.output_prefix
    if not file_prefix:
        file_prefix = newmutationmatrix

    geneFile = args.gene_file
    patientFile = args.patient_file
    gene_blacklist = args.gene_blacklist_file
    patient_blacklist = args.patient_blacklist_file
    minFreq = args.min_freq
    minCooccur = args.min_cooccur
    min_cooccurrence_ratio = args.min_cooccurrence_ratio
    top_percentile = args.top_percentile
    top_number = args.top_number

    parallel_compute_number = args.parallel_compute_number


    filter_cooccur_same_segment = args.filter_cooccur_same_segment
    fcss_cratiothresh = args.fcss_cratiothresh
    fcss_mutfreqdiffthresh = args.fcss_mutfreqdiffthresh
    fcss_mutfreqdiffratiothresh = args.fcss_mutfreqdiffratiothresh
    fcss_coveragethresh = args.fcss_coveragethresh
    fcss_probabilitythresh = args.fcss_probabilitythresh



    gene_segment_file = args.gene_segment_file
    load_gene_segments = args.load_gene_segments
    is_gene2seg = args.is_gene2seg
    gene_bin_entries_file = args.gene_bin_entries_file
    no_throw_out_extras = args.no_throw_out_extras
    segment_info_file = args.segment_info_file


    if not gene_bin_entries_file:
        gene_bin_entries_file = file_prefix + '_binnedgenes.tsv'

    if not segment_info_file:
        segment_info_file = file_prefix + '_SEGMENTINFO.tsv'

    #-----------------------------------------------------






    mutations = mex.remove_blacklists(gene_blacklist, patient_blacklist,
                                  *mex.load_mutation_data(mutationmatrix, patientFile, geneFile, minFreq))
    numGenes, numCases, genes, patients, geneToCases, patientToGenes = mutations
    print 'Filtered Mutation data: %s genes x %s patients' % (numGenes, numCases)


    # Load segment info
    if load_gene_segments:

        # extra_genes is the genes not found in the segment file.
        # If throw_out_extras is False, extra_genes will be empty.
        geneToBin, extra_genes = load_gene_to_bin(gene_segment_file, geneToCases, no_throw_out_extras=no_throw_out_extras, is_gene2seg=is_gene2seg)

        numGenes, numCases, genes, patients, geneToCases, patientToGenes = mex.remove_extra_genes(extra_genes, numGenes, numCases, genes, patients, geneToCases, patientToGenes)


    else:
        print "Beginning bin genes by co-occurring pairs. "
        genepairs = getgenepairs(geneToCases, genes, closer_than_distance=bin_distance_threshold)
        print "Pairs retrieved. Calculating cooccurring pairs to make bins."

        cpairsdict, cgenedict = met.complete_cooccurpairs(numCases, geneToCases, patientToGenes, genepairs, fcss_probabilitythresh, minCooccur,
                      cooccur_distance_threshold, fcss_cratiothresh, parallel_compute_number,
                      filter_cooccur_same_segment, fcss_cratiothresh, fcss_mutfreqdiffratiothresh,
                      fcss_coveragethresh, fcss_probabilitythresh)

        print "Cooccurring pairs calculated."
        geneToBin = get_gene_bins_cooccur_same_segment(cpairsdict, geneToCases, fcss_cratiothresh, fcss_mutfreqdiffthresh,
                           fcss_mutfreqdiffratiothresh, fcss_coveragethresh, fcss_probabilitythresh, bin_distance_threshold=bin_distance_threshold)
        # Write these new bins out
        new_bins = convert_genes_to_bins(genes, geneToBin)
        write_segment_infos(new_bins, filename=segment_info_file)
        print "New SEGMENTINFO written to ", segment_info_file

        write_gene_positions(new_bins)
        print "New segment positions appended to gene_positions.txt"


    # Update to the new mutation matrix.
    geneToBinSet, bin_setToBin = bin_sets_from_geneToBin(genes, geneToBin)

    newGeneToCases, newPatientToGenes = update_geneToCases_patientToGenes(geneToCases, patientToGenes, bin_setToBin, at_least_half=True)

    gene_bin_entries = get_gene_bin_entries(geneToCases, newGeneToCases, geneToBinSet, bin_setToBin)

    if gene_bin_entries_file:
        met.writeanydict(gene_bin_entries, gene_bin_entries_file)
        print "Gene bin entries written to ", gene_bin_entries_file


    # Write the new mutation matrix out.
    writemutationmatrix(newPatientToGenes, filename=newmutationmatrix)
コード例 #2
0
ファイル: bingenesbypairs.py プロジェクト: lujonathanh/mutex
def run(args):
    mdictfile = args.mdictfile
    cdictfile = args.cdictfile

    mprob = args.mprob
    cprob = args.cprob
    cooccur_distance_threshold = args.cooccur_distance_threshold
    bin_distance_threshold = args.bin_distance_threshold

    mutationmatrix = args.mutation_matrix
    newmutationmatrix = args.newmutationmatrix

    file_prefix = args.output_prefix
    if not file_prefix:
        file_prefix = newmutationmatrix

    geneFile = args.gene_file
    patientFile = args.patient_file
    gene_blacklist = args.gene_blacklist_file
    patient_blacklist = args.patient_blacklist_file
    minFreq = args.min_freq
    minCooccur = args.min_cooccur
    min_cooccurrence_ratio = args.min_cooccurrence_ratio
    top_percentile = args.top_percentile
    top_number = args.top_number

    parallel_compute_number = args.parallel_compute_number

    filter_cooccur_same_segment = args.filter_cooccur_same_segment
    fcss_cratiothresh = args.fcss_cratiothresh
    fcss_mutfreqdiffthresh = args.fcss_mutfreqdiffthresh
    fcss_mutfreqdiffratiothresh = args.fcss_mutfreqdiffratiothresh
    fcss_coveragethresh = args.fcss_coveragethresh
    fcss_probabilitythresh = args.fcss_probabilitythresh

    gene_segment_file = args.gene_segment_file
    load_gene_segments = args.load_gene_segments
    is_gene2seg = args.is_gene2seg
    gene_bin_entries_file = args.gene_bin_entries_file
    no_throw_out_extras = args.no_throw_out_extras
    segment_info_file = args.segment_info_file

    if not gene_bin_entries_file:
        gene_bin_entries_file = file_prefix + '_binnedgenes.tsv'

    if not segment_info_file:
        segment_info_file = file_prefix + '_SEGMENTINFO.tsv'

    #-----------------------------------------------------

    mutations = mex.remove_blacklists(
        gene_blacklist, patient_blacklist,
        *mex.load_mutation_data(mutationmatrix, patientFile, geneFile,
                                minFreq))
    numGenes, numCases, genes, patients, geneToCases, patientToGenes = mutations
    print 'Filtered Mutation data: %s genes x %s patients' % (numGenes,
                                                              numCases)

    # Load segment info
    if load_gene_segments:

        # extra_genes is the genes not found in the segment file.
        # If throw_out_extras is False, extra_genes will be empty.
        geneToBin, extra_genes = load_gene_to_bin(
            gene_segment_file,
            geneToCases,
            no_throw_out_extras=no_throw_out_extras,
            is_gene2seg=is_gene2seg)

        numGenes, numCases, genes, patients, geneToCases, patientToGenes = mex.remove_extra_genes(
            extra_genes, numGenes, numCases, genes, patients, geneToCases,
            patientToGenes)

    else:
        print "Beginning bin genes by co-occurring pairs. "
        genepairs = getgenepairs(geneToCases,
                                 genes,
                                 closer_than_distance=bin_distance_threshold)
        print "Pairs retrieved. Calculating cooccurring pairs to make bins."

        cpairsdict, cgenedict = met.complete_cooccurpairs(
            numCases, geneToCases, patientToGenes, genepairs,
            fcss_probabilitythresh, minCooccur, cooccur_distance_threshold,
            fcss_cratiothresh, parallel_compute_number,
            filter_cooccur_same_segment, fcss_cratiothresh,
            fcss_mutfreqdiffratiothresh, fcss_coveragethresh,
            fcss_probabilitythresh)

        print "Cooccurring pairs calculated."
        geneToBin = get_gene_bins_cooccur_same_segment(
            cpairsdict,
            geneToCases,
            fcss_cratiothresh,
            fcss_mutfreqdiffthresh,
            fcss_mutfreqdiffratiothresh,
            fcss_coveragethresh,
            fcss_probabilitythresh,
            bin_distance_threshold=bin_distance_threshold)
        # Write these new bins out
        new_bins = convert_genes_to_bins(genes, geneToBin)
        write_segment_infos(new_bins, filename=segment_info_file)
        print "New SEGMENTINFO written to ", segment_info_file

        write_gene_positions(new_bins)
        print "New segment positions appended to gene_positions.txt"

    # Update to the new mutation matrix.
    geneToBinSet, bin_setToBin = bin_sets_from_geneToBin(genes, geneToBin)

    newGeneToCases, newPatientToGenes = update_geneToCases_patientToGenes(
        geneToCases, patientToGenes, bin_setToBin, at_least_half=True)

    gene_bin_entries = get_gene_bin_entries(geneToCases, newGeneToCases,
                                            geneToBinSet, bin_setToBin)

    if gene_bin_entries_file:
        met.writeanydict(gene_bin_entries, gene_bin_entries_file)
        print "Gene bin entries written to ", gene_bin_entries_file

    # Write the new mutation matrix out.
    writemutationmatrix(newPatientToGenes, filename=newmutationmatrix)
コード例 #3
0
ファイル: chisquared.py プロジェクト: lujonathanh/coffdrop
def main():


    mutationmatrix = '/Users/jlu96/maf/new/OV_broad/OV_broad-cna-jl.m2'
    patientFile = '/Users/jlu96/maf/new/OV_broad/shared_patients.plst'
    cpairfile = '/Users/jlu96/conte/jlu/Analyses/CooccurImprovement/LorenzoModel/Binomial/OV_broad-cna-jl-cpairs-min_cohort.txt'
    partitionfile = '/Users/jlu96/maf/new/OV_broad/OV_broad-cna-jl.ppf'
    load_partitions = True
    do_min_cohort = True

    geneFile = None
    minFreq = 0
    test_minFreq = 100
    compute_mutex = True



    include_cohort_info = False
    num_cohorts_list = [1,3, 5, 7]


    numGenes, numCases, genes, patients, geneToCases, patientToGenes = mex.load_mutation_data(mutationmatrix, patientFile, geneFile, minFreq)

    print "number of genes is ", numGenes


    if do_min_cohort:
        cohort_dict, clusterToProp, min_cohort = partition.load_patient_cohorts(partitionfile, patientToGenes)
        min_cohort_genes = set.union(*(patientToGenes[p] for p in min_cohort))

        print "getting pairs"
        genepairs = met.getgenepairs(geneToCases, min_cohort_genes, test_minFreq=test_minFreq)

        print "Number of pairs ", len(genepairs)


        print "Normal cooccur test"
        t = time.time()
        cpairsdict, cgenedict = met.cooccurpairs(numCases, geneToCases, patientToGenes, genepairs, compute_mutex=compute_mutex)
        print "Normal cooccur done in ", time.time() - t

        print "Beginning cohorts"
        t = time.time()
        cpairsdict = add_BinomP_min_cohort_all_pairs(cpairsdict, geneToCases, patientToGenes, cohort_dict, min_cohort)
        print "Cohorts done in ", time.time() - t

    else:
        genepairs = met.getgenepairs(geneToCases, genes, test_minFreq=test_minFreq)
        print "Number of pairs ", len(genepairs)


        print "Normal cooccur test"
        cpairsdict, cgenedict = met.cooccurpairs(numCases, geneToCases, patientToGenes, genepairs, compute_mutex=compute_mutex)

        # print "Add binomial probability"
        # cpairsdict = add_BinomP_all_pairs(cpairsdict, geneToCases, patientToGenes)

        # undo
        print "Beginning cohorts"





        if load_partitions:
            cohort_dict = partition.load_patient_cohorts(partitionfile)
            cpairsdict = add_BinomP_cohorts_all_pairs(cpairsdict, geneToCases, patientToGenes, cohort_dict)

        else:
            for num_cohorts in num_cohorts_list:
                # get cohorts
                cohort_dict = generate_patient_cohorts(patientToGenes, num_cohorts)

                cpairsdict = add_BinomP_cohorts_all_pairs(cpairsdict, geneToCases, patientToGenes, cohort_dict)

                if include_cohort_info:
                    cpairsdict = add_cohorts_all_pairs(cpairsdict, geneToCases, patientToGenes, cohort_dict)

    print "Writing to file..."
    met.writeanydict(cpairsdict, cpairfile)
コード例 #4
0
ファイル: chisquared.py プロジェクト: lujonathanh/mutex
def main():

    mutationmatrix = '/Users/jlu96/maf/new/OV_broad/OV_broad-cna-jl.m2'
    patientFile = '/Users/jlu96/maf/new/OV_broad/shared_patients.plst'
    cpairfile = '/Users/jlu96/conte/jlu/Analyses/CooccurImprovement/LorenzoModel/Binomial/OV_broad-cna-jl-cpairs-min_cohort.txt'
    partitionfile = '/Users/jlu96/maf/new/OV_broad/OV_broad-cna-jl.ppf'
    load_partitions = True
    do_min_cohort = True

    geneFile = None
    minFreq = 0
    test_minFreq = 100
    compute_mutex = True

    include_cohort_info = False
    num_cohorts_list = [1, 3, 5, 7]

    numGenes, numCases, genes, patients, geneToCases, patientToGenes = mex.load_mutation_data(
        mutationmatrix, patientFile, geneFile, minFreq)

    print "number of genes is ", numGenes

    if do_min_cohort:
        cohort_dict, clusterToProp, min_cohort = partition.load_patient_cohorts(
            partitionfile, patientToGenes)
        min_cohort_genes = set.union(*(patientToGenes[p] for p in min_cohort))

        print "getting pairs"
        genepairs = met.getgenepairs(geneToCases,
                                     min_cohort_genes,
                                     test_minFreq=test_minFreq)

        print "Number of pairs ", len(genepairs)

        print "Normal cooccur test"
        t = time.time()
        cpairsdict, cgenedict = met.cooccurpairs(numCases,
                                                 geneToCases,
                                                 patientToGenes,
                                                 genepairs,
                                                 compute_mutex=compute_mutex)
        print "Normal cooccur done in ", time.time() - t

        print "Beginning cohorts"
        t = time.time()
        cpairsdict = add_BinomP_min_cohort_all_pairs(cpairsdict, geneToCases,
                                                     patientToGenes,
                                                     cohort_dict, min_cohort)
        print "Cohorts done in ", time.time() - t

    else:
        genepairs = met.getgenepairs(geneToCases,
                                     genes,
                                     test_minFreq=test_minFreq)
        print "Number of pairs ", len(genepairs)

        print "Normal cooccur test"
        cpairsdict, cgenedict = met.cooccurpairs(numCases,
                                                 geneToCases,
                                                 patientToGenes,
                                                 genepairs,
                                                 compute_mutex=compute_mutex)

        # print "Add binomial probability"
        # cpairsdict = add_BinomP_all_pairs(cpairsdict, geneToCases, patientToGenes)

        # undo
        print "Beginning cohorts"

        if load_partitions:
            cohort_dict = partition.load_patient_cohorts(partitionfile)
            cpairsdict = add_BinomP_cohorts_all_pairs(cpairsdict, geneToCases,
                                                      patientToGenes,
                                                      cohort_dict)

        else:
            for num_cohorts in num_cohorts_list:
                # get cohorts
                cohort_dict = generate_patient_cohorts(patientToGenes,
                                                       num_cohorts)

                cpairsdict = add_BinomP_cohorts_all_pairs(
                    cpairsdict, geneToCases, patientToGenes, cohort_dict)

                if include_cohort_info:
                    cpairsdict = add_cohorts_all_pairs(cpairsdict, geneToCases,
                                                       patientToGenes,
                                                       cohort_dict)

    print "Writing to file..."
    met.writeanydict(cpairsdict, cpairfile)