Example #1
0
def main():

    mutationmatrix = '/Users/jlu96/maf/new/PRAD_broad/PRAD_broad-som.m2'
    patientFile = None #'/Users/jlu96/maf/new/PRAD_broad/shared_patients.plst'
    geneFile = None #'/Users/jlu96/conte/jlu/REQUIREDFILES_OnlyLoss2/COSMICGenes_OnlyLoss.txt'
    load_directory = '/Users/jlu96/conte/jlu/Analyses/CooccurImprovement/LoadMatrices'
    minFreq = 0
    num_permutations = 20
    binary_perm_method = False
    Q = 100
    write_matrices = True
    matrixdirectory = '/Users/jlu96/conte/jlu/Analyses/CooccurImprovement/LoadMatrices'
        #'/Users/jlu96/conte/jlu/Analyses/CooccurImprovement/SARC_broad-som-jl-' + ('matrix' if binary_perm_method else 'network')
    outmutexfile = matrixdirectory + '/mutex' + str(num_permutations) + str(time.time()) + '.tsv'
    outcooccurfile = matrixdirectory + '/cooccur' + str(num_permutations)  + str(time.time()) + '.tsv'
    outseedsfile = matrixdirectory + '/seeds' + str(time.time()) + '.tsv'


    if not os.path.exists(os.path.dirname(matrixdirectory)):
        os.makedirs(os.path.dirname(matrixdirectory))


    numGenes, numCases, genes, patients, geneToCases, patientToGenes = mex.load_mutation_data(mutationmatrix, patientFile, geneFile, minFreq)

    print "numGenes ", numGenes, " and numCases ", numCases

    for patient in patients:
        if not patientToGenes[patient]:
            patientToGenes.pop(patient)
            print patient, "popped"

    # Generate Permutation Matrices
    pm = PermutationMatrices(geneToCases, patientToGenes, num_permutations, Q=Q, matrixdirectory=matrixdirectory,
                             binary_perm_method=binary_perm_method, write_matrices=write_matrices, load_directory=load_directory,
                             geneFile=geneFile, patientFile=patientFile, minFreq=minFreq)

    # Make list of pairs from highly mutated genes
    test_genes = [gene for gene in genes if len(geneToCases[gene]) > 5]
    # for test_gene in test_genes:
    #     print test_gene
    genepairs = met.getgenepairs(geneToCases, test_genes)
    print "Number of pairs to test ", len(genepairs)





    # CALCULATE MUTEX

    # Create a list of ConditionFunctions that you must later initialize...
    ConditionFunctions = range(len(genepairs))
    mutex_set_condition_function_list = []

    # Generate set_condition_function_list
    for i in range(len(genepairs)):
        genepair = genepairs[i]

        condition_dict = {}
        condition_dict['Genes'] = tuple(genepair)
        condition_dict['Overlap'] = len(set.intersection(*[geneToCases[gene] for gene in condition_dict['Genes']]))
        condition_dict['Mutex'] = True

        ConditionFunctions[i] = Condition([condition_dict])

        if [condition_dict] != ConditionFunctions[i].conditions:
            print condition_dict, ConditionFunctions[i].conditions


        mutex_set_condition_function_list.append((genepair, ConditionFunctions[i]))

    print "Finished mutex condition function list"

    t= time.time()
    # Calculate pvalues for mutual exclusivity
    pair_to_mutex = {}

    pair_to_mutex_network_pvalue = pm.set_to_pvalue(mutex_set_condition_function_list)
    print "mutex pair network pvalues finished in ", time.time() - t

    for genepair in genepairs:
        pair_to_mutex[genepair] = mex.analyze_mutex_set_new(numCases, geneToCases, patientToGenes, genepair)
        pair_to_mutex[genepair]['NetworkProbability'] = pair_to_mutex_network_pvalue[genepair]




    # Write to output
    with open(outmutexfile, 'w') as csvfile:
        fieldnames = pair_to_mutex[genepairs[0]].keys()
        writer = csv.DictWriter(csvfile, delimiter='\t', fieldnames=fieldnames)
        writer.writeheader()
        for genepair in pair_to_mutex:
            writer.writerow(pair_to_mutex[genepair])



    # CALCULATE COOCCUR

    cooccur_set_condition_function_list = []

    # Generate set_condition_function_list
    for genepair in genepairs:
        ConditionFunction = Condition(None)

        condition_dict = {}
        condition_dict['Genes'] = tuple(genepair)
        condition_dict['Overlap'] = len(set.intersection(*[geneToCases[gene] for gene in condition_dict['Genes']]))
        condition_dict['Mutex'] = False

        ConditionFunction.set_params([condition_dict])

        cooccur_set_condition_function_list.append((genepair, ConditionFunction))



    t= time.time()
    # Calculate pvalues for mutual exclusivity
    pair_to_cooccur = {}

    pair_to_cooccur_network_pvalue = pm.set_to_pvalue(cooccur_set_condition_function_list)
    print "cooccur pair network pvalues finished in ", time.time() - t

    for genepair in genepairs:
        pair_to_cooccur[genepair] = mex.analyze_cooccur_set_new(numCases, geneToCases, patientToGenes, genepair)
        pair_to_cooccur[genepair]['NetworkProbability'] = pair_to_cooccur_network_pvalue[genepair]




    # Write to output
    with open(outcooccurfile, 'w') as csvfile:
        fieldnames = pair_to_cooccur[genepairs[0]].keys()
        writer = csv.DictWriter(csvfile, delimiter='\t', fieldnames=fieldnames)
        writer.writeheader()
        for genepair in pair_to_cooccur:
            writer.writerow(pair_to_cooccur[genepair])


    # Write seeds to output
    with open(outseedsfile, 'w') as csvfile:
        writer = csv.writer(csvfile, delimiter='\t')
        for seed in pm.seeds:
            writer.writerow([seed])
Example #2
0
def main():


    mutationmatrix = '/Users/jlu96/maf/new/OV_broad/OV_broad-cna-jl.m2'
    patientFile = '/Users/jlu96/maf/new/OV_broad/shared_patients.plst'
    cpairfile = '/Users/jlu96/conte/jlu/Analyses/CooccurImprovement/LorenzoModel/Binomial/OV_broad-cna-jl-cpairs-min_cohort.txt'
    partitionfile = '/Users/jlu96/maf/new/OV_broad/OV_broad-cna-jl.ppf'
    load_partitions = True
    do_min_cohort = True

    geneFile = None
    minFreq = 0
    test_minFreq = 100
    compute_mutex = True



    include_cohort_info = False
    num_cohorts_list = [1,3, 5, 7]


    numGenes, numCases, genes, patients, geneToCases, patientToGenes = mex.load_mutation_data(mutationmatrix, patientFile, geneFile, minFreq)

    print "number of genes is ", numGenes


    if do_min_cohort:
        cohort_dict, clusterToProp, min_cohort = partition.load_patient_cohorts(partitionfile, patientToGenes)
        min_cohort_genes = set.union(*(patientToGenes[p] for p in min_cohort))

        print "getting pairs"
        genepairs = met.getgenepairs(geneToCases, min_cohort_genes, test_minFreq=test_minFreq)

        print "Number of pairs ", len(genepairs)


        print "Normal cooccur test"
        t = time.time()
        cpairsdict, cgenedict = met.cooccurpairs(numCases, geneToCases, patientToGenes, genepairs, compute_mutex=compute_mutex)
        print "Normal cooccur done in ", time.time() - t

        print "Beginning cohorts"
        t = time.time()
        cpairsdict = add_BinomP_min_cohort_all_pairs(cpairsdict, geneToCases, patientToGenes, cohort_dict, min_cohort)
        print "Cohorts done in ", time.time() - t

    else:
        genepairs = met.getgenepairs(geneToCases, genes, test_minFreq=test_minFreq)
        print "Number of pairs ", len(genepairs)


        print "Normal cooccur test"
        cpairsdict, cgenedict = met.cooccurpairs(numCases, geneToCases, patientToGenes, genepairs, compute_mutex=compute_mutex)

        # print "Add binomial probability"
        # cpairsdict = add_BinomP_all_pairs(cpairsdict, geneToCases, patientToGenes)

        # undo
        print "Beginning cohorts"





        if load_partitions:
            cohort_dict = partition.load_patient_cohorts(partitionfile)
            cpairsdict = add_BinomP_cohorts_all_pairs(cpairsdict, geneToCases, patientToGenes, cohort_dict)

        else:
            for num_cohorts in num_cohorts_list:
                # get cohorts
                cohort_dict = generate_patient_cohorts(patientToGenes, num_cohorts)

                cpairsdict = add_BinomP_cohorts_all_pairs(cpairsdict, geneToCases, patientToGenes, cohort_dict)

                if include_cohort_info:
                    cpairsdict = add_cohorts_all_pairs(cpairsdict, geneToCases, patientToGenes, cohort_dict)

    print "Writing to file..."
    met.writeanydict(cpairsdict, cpairfile)
Example #3
0
def main():

    mutationmatrix = '/Users/jlu96/maf/new/OV_broad/OV_broad-cna-jl.m2'
    patientFile = '/Users/jlu96/maf/new/OV_broad/shared_patients.plst'
    cpairfile = '/Users/jlu96/conte/jlu/Analyses/CooccurImprovement/LorenzoModel/Binomial/OV_broad-cna-jl-cpairs-min_cohort.txt'
    partitionfile = '/Users/jlu96/maf/new/OV_broad/OV_broad-cna-jl.ppf'
    load_partitions = True
    do_min_cohort = True

    geneFile = None
    minFreq = 0
    test_minFreq = 100
    compute_mutex = True

    include_cohort_info = False
    num_cohorts_list = [1, 3, 5, 7]

    numGenes, numCases, genes, patients, geneToCases, patientToGenes = mex.load_mutation_data(
        mutationmatrix, patientFile, geneFile, minFreq)

    print "number of genes is ", numGenes

    if do_min_cohort:
        cohort_dict, clusterToProp, min_cohort = partition.load_patient_cohorts(
            partitionfile, patientToGenes)
        min_cohort_genes = set.union(*(patientToGenes[p] for p in min_cohort))

        print "getting pairs"
        genepairs = met.getgenepairs(geneToCases,
                                     min_cohort_genes,
                                     test_minFreq=test_minFreq)

        print "Number of pairs ", len(genepairs)

        print "Normal cooccur test"
        t = time.time()
        cpairsdict, cgenedict = met.cooccurpairs(numCases,
                                                 geneToCases,
                                                 patientToGenes,
                                                 genepairs,
                                                 compute_mutex=compute_mutex)
        print "Normal cooccur done in ", time.time() - t

        print "Beginning cohorts"
        t = time.time()
        cpairsdict = add_BinomP_min_cohort_all_pairs(cpairsdict, geneToCases,
                                                     patientToGenes,
                                                     cohort_dict, min_cohort)
        print "Cohorts done in ", time.time() - t

    else:
        genepairs = met.getgenepairs(geneToCases,
                                     genes,
                                     test_minFreq=test_minFreq)
        print "Number of pairs ", len(genepairs)

        print "Normal cooccur test"
        cpairsdict, cgenedict = met.cooccurpairs(numCases,
                                                 geneToCases,
                                                 patientToGenes,
                                                 genepairs,
                                                 compute_mutex=compute_mutex)

        # print "Add binomial probability"
        # cpairsdict = add_BinomP_all_pairs(cpairsdict, geneToCases, patientToGenes)

        # undo
        print "Beginning cohorts"

        if load_partitions:
            cohort_dict = partition.load_patient_cohorts(partitionfile)
            cpairsdict = add_BinomP_cohorts_all_pairs(cpairsdict, geneToCases,
                                                      patientToGenes,
                                                      cohort_dict)

        else:
            for num_cohorts in num_cohorts_list:
                # get cohorts
                cohort_dict = generate_patient_cohorts(patientToGenes,
                                                       num_cohorts)

                cpairsdict = add_BinomP_cohorts_all_pairs(
                    cpairsdict, geneToCases, patientToGenes, cohort_dict)

                if include_cohort_info:
                    cpairsdict = add_cohorts_all_pairs(cpairsdict, geneToCases,
                                                       patientToGenes,
                                                       cohort_dict)

    print "Writing to file..."
    met.writeanydict(cpairsdict, cpairfile)