Esempio n. 1
0
def main():
    args = parser.parse_args()
    beta = 0.5
    #get mutational data
    mvals = getMutationalData()

    ##load up interactome
    gfile = args.graph
    ##TODO: replace this with Docker image call
    g = pickle.load(open(gfile, 'rb'))
    key = 'mpnstPDXmuts'
    this_hyp = hyphalNetwork(mvals, g)
    this_hyp._to_file(key + '_hypha.pkl')

    ##read from file
    ###this is all we need to do in a single eval, then we can do tests later
    this_hyp.node_stats().to_csv(key + '_nodelist.csv')
    for_e = hyEnrich.go_enrich_forests(this_hyp)
    this_hyp.assign_enrichment(for_e, type='forest')
    for_e.to_csv(key + 'enrichedForestGoTerms.csv')
    com_e = hyEnrich.go_enrich_communities(this_hyp)
    this_hyp.assign_enrichment(com_e, type='community')
    this_hyp._to_file(key + '_hypha.pkl')
    com_e.to_csv(key + 'enrichedCommunityGOterms.csv')
    this_hyp.community_stats(prefix=key).to_csv(key + '_communityStats.csv')
    res = hyStats.compute_all_distances({'mutations': this_hyp})
    res.to_csv('panPDXDistances.csv')
    nmi = hyStats.compute_all_nmi({'mutations': this_hyp}, g)
    nmi.to_csv('panPDXNMI.csv')
Esempio n. 2
0
def significant_genes(data_frame, group, subgroup, value):
    data_frame['zscore'] = stats.zscore(data_frame[value])
    #SG
    #here you reference `data` and not `data frame`
    #significant = data[abs(data['zscore']) >= 2.58]
    #also altered to remove absolute value
    significant = data_frame[data_frame['zscore'] >= 2.58]
    #replaced with new function
    #gene_dictionary = nested_dict(significant[[group, subgroup, value]])
    #SG: updated this based on what i found on google....
    gene_dictionary = (significant.groupby(group).apply(
        lambda x: dict(zip(x[subgroup], x[value]))).to_dict())
    #SG This is causing issue, it's old code g = hyp.make_graph_from_dict(gfile)

    hyphae = dict()
    beta = 0.5
    #SG:
    #this was the issue, it was an uncessary loop
    #for key, val in gene_dictionary.items():
    # hyphalNetwork is supposed to take a gene dictionary!
    key = 'proteomics'
    this_hyp = hyphalNetwork(gene_dictionary, g.copy(), beta)
    hyphae[key] = this_hyp
    this_hyp._to_file(key + '_hypha.pkl')
    #print (hyphae)
    return hyphae
Esempio n. 3
0
def main():
    args = parser.parse_args()
    qval = args.quant

    ##first we run a helper function to make sure we have all cptac data
    fdict = pdata.cptacData()

    ##first get proteomics measurements
    allDat = pdata.getCancerData(fdict, qval, byType=False)
    patientData = pdata.getCombinedClinicalData(fdict)
    patientData.to_csv('clinicalData.csv')
    mutationData = pdata.getCombinedMutationData(fdict)
    mutationData.to_csv('mutationData.csv')

    ##make srue this file is built!
    g = pickle.load(open('../odata/igraphPPI.pkl', 'rb'))
    beta = .5

    #build hyphal network of network communities
    phyph = hyphalNetwork(allDat, g, beta)
    phpyh._to_file(args.refName + '_hypNet.pkl')

    #write out distances within communities
    res = phyph.distVals
    fname = args.refName + '_DistanceVals.csv'
    res.to_csv(fname)
Esempio n. 4
0
def main():
    args = parser.parse_args()
    beta = 0.5
    #get mutational data
    mdf = pd.read_csv(args.df)
    key = args.output
    mvals = df2dict(mdf)
    ##load up interactome
    gfile = args.graph
    ##TODO: replace this with Docker image call
    g = pickle.load(open(gfile, 'rb'))
    this_hyp = hyphalNetwork(mvals, g)
    this_hyp._to_file(key + '_hypha.pkl')
Esempio n. 5
0
def significant_genes(data_frame, group, subgroup, value):
    data_frame['zscore'] = stats.zscore(data_frame[value])
    significant = data_frame[data_frame['zscore'] >= 2.58]
    gene_dictionary = (significant.groupby(group).apply(
        lambda x: dict(zip(x[subgroup], x[value]))).to_dict())

    hyphae = dict()
    beta = 0.5
    key = 'proteomics'
    this_hyp = hyphalNetwork(gene_dictionary, g.copy(), beta)
    hyphae[key] = this_hyp
    this_hyp._to_file(key + '_hypha.pkl')
    return hyphae
Esempio n. 6
0
def build_hyphae_from_data(qt, g, sample=False):
    """ Temp function to load data from local directory"""
    ##this is the framework for the PDC data parser.
    #now we want to build network communities for each
    hyphae = dict()
    patDiffs = loadCancerData(qt)
    beta = 0.5
    for key, vals in patDiffs.items():
        if sample:
            new_vals = {}
            for v in random.sample(list(vals), 300):
                new_vals[v] = vals[v]
            vals = new_vals
            print(len(vals))
        this_hyp = hyphalNetwork(vals, g.copy(),beta=beta, g=3, do_forest=False, noComms=False)
        hyphae[key+str(qt)] = this_hyp
        this_hyp._to_file(key+str(qt)+'_hypha.pkl')
    return hyphae
Esempio n. 7
0
def build_hyphae_from_data():
    """ Temp function to load data from local directory"""
    ##this is the framework for the PDC data parser.
    norms = prot.normals_from_manifest('data/PDC_biospecimen_manifest_05112020_184928.csv')

#    bcData = prot.parsePDCfile('data/TCGA_Breast_BI_Proteome.itraq.tsv')
    bcData = prot.parsePDCfile('data/CPTAC2_Breast_Prospective_Collection_BI_Proteome.tmt10.tsv')
    lungData = prot.parsePDCfile('data/CPTAC3_Lung_Adeno_Carcinoma_Proteome.tmt10.tsv')
    colData = prot.parsePDCfile('data/CPTAC2_Colon_Prospective_Collection_PNNL_Proteome.tmt10.tsv')
    gbmData = prot.parsePDCfile('data/CPTAC3_Glioblastoma_Multiforme_Proteome.tmt11.tsv')

    normPats = {'brca': set([a for a in bcData['Patient'] if a in norms['Breast Invasive Carcinoma']]),\
                'coad': set([a for a in colData['Patient'] if a in norms['Colon Adenocarcinoma']]),\
                'luad': set([a for a in lungData['Patient'] if a in norms['Lung Adenocarcinoma']]),\
                'gbm': set([a for a in gbmData['Patient'] if a in norms['Other']])}

    gfile = '../../../OmicsIntegrator2/interactomes/inbiomap.9.12.2016.full.oi2'
    g = hyp.make_graph(gfile)

    namemapper = None #hyp.mapHGNCtoNetwork()

    ##here we get the top values for each patient
    patVals = {'brca':prot.getProtsByPatient(bcData, namemapper),\
               'luad':prot.getProtsByPatient(lungData, namemapper),\
             'coad':prot.getProtsByPatient(colData, namemapper),\
             'gbm':prot.getProtsByPatient(gbmData, namemapper)}

    #here we get the top most distinguished from normals
    patDiffs = {'brca': prot.getTumorNorm(bcData, normPats['brca'], namemapper),
                'luad': prot.getTumorNorm(lungData, normPats['luad'], namemapper),
                'coad': prot.getTumorNorm(colData, normPats['coad'], namemapper),
                'gbm': prot.getTumorNorm(gbmData, normPats['gbm'], namemapper)}
    #now we want to build network communities for each
    hyphae = dict()

    for key in patDiffs:
        this_hyp = hyphalNetwork(patDiffs[key], g)
        hyphae[key] = this_hyp
        this_hyp._to_file(key+'_hypha.pkl')
    return hyphae
Esempio n. 8
0
def main():
    args = parser.parse_args()
    qval = args.quant

    ##first we run a helper function to make sure we have all cptac data
    fdict = pdat.cptacData()

    ##first get proteomics measurements
    allDat = pdat.getCancerData(fdict, qval, byType=True)

    ##make srue this file is built!
    g = pickle.load(open('../odata/igraphPPI.pkl', 'rb'))
    beta = .5

    #build hyphal network of network communities
    phyph = pickle.load(open(args.hyph, 'rb'))
    hDict = {'panCan': phyph}
    for ct, dat in allDat.items():
        hDict[ct] = hyphalNetwork(dat, g, beta)

    nmi = hyStats.compute_all_nmi(hDict, g)
    nmi.to_csv(args.refName + '_nmi.csv')
Esempio n. 9
0
def main():

    gfile = '../../data/igraphPPI.pkl'
    g = pickle.load(open(gfile, 'rb'))  #hyp.make_graph_from_dict(gfile)

    args = parser.parse_args()
    beta = 0.5
    proteomics_dictionary = significant_prots(data, 'AML sample', 'Gene',
                                              'LogFoldChange')
    gene_dictionary = tumor_genes(data, 'AML sample', 'Gene', 'Tumor VAF')
    if args.fromFile is None:
        hyphae = dict()
        hyphae['mutations'] = hyphalNetwork(gene_dictionary, g.copy(), beta)
        hyphae['proteomics'] = hyphalNetwork(proteomics_dictionary, g.copy(),
                                             beta)
        for key, this_hyp in hyphae.items():
            this_hyp._to_file(key + '_amlPatientData_hypha.pkl')
    else:
        hyphae = loadFromFile(args.fromFile)

        #now compute graph distances to ascertain fidelity
    if args.getDist:
        res = hyStats.compute_all_distances(hyphae)
        res.to_csv('amlNetworkdistances.csv')
        tab = table.build_table("AML Network Distances", 'syn22128879', res)
        syn.store(tab)
        nmi = hyStats.compute_all_nmi(hyphae, g)
        nmi.to_csv('amlNMI.csv')
        syn.store(File('amlNMI.csv', parent='syn22269875'))
        #store distances
    for key, this_hyp in hyphae.items():
        node_stats = this_hyp.node_stats()
        node_stats.to_csv(key + '_nodelist.csv')
        tab = table.build_table("AML Network Nodes", 'syn22128879', node_stats)
        syn.store(tab)
        if args.doEnrich:
            if len(this_hyp.forest_enrichment) == 0:
                for_e = hyEnrich.go_enrich_forests(this_hyp)  #SG, ncbi)
                this_hyp.assign_enrichment(for_e, type='forest')
                for_e.to_csv(key + 'enrichedForestGoTerms.csv')
                syn.store(
                    File(key + 'enrichedForestGoTerms.csv',
                         parent='syn22269875'))
                this_hyp._to_file(key + '_amlPatientData_hypha.pkl')
            if len(this_hyp.community_enrichment) == 0:
                com_e = hyEnrich.go_enrich_communities(this_hyp)
                this_hyp.assign_enrichment(com_e, type='community')
                com_e.to_csv(key + 'enrichedCommunityGOterms.csv')
                syn.store(
                    File(key + 'enrichedCommunityGOterms.csv',
                         parent='syn22269875'))
                this_hyp._to_file(key + '_amlPatientData_hypha.pkl')
            ##next: compare enrichment between patients mapped to communities
        this_hyp.community_stats(prefix=key).to_csv(key +
                                                    '_communityStats.csv')
        this_hyp.forest_stats().to_csv(key + '_TreeStats.csv')
        for files in [
                key + '_amlPatientData_hypha.pkl', key + '_communityStats.csv',
                key + '_TreeStats.csv'
        ]:
            syn.store(File(files, parent='syn22269875'))