コード例 #1
0
ファイル: mCosmCompare.py プロジェクト: xguse/gusPyProj
def collectMiRNA_totals(pathToFile):
    lines = open(pathToFile,'rU').readlines() 
    
    groupedLines = groupByField_silent(lines,0,sep=' : ')
    groupedLines.sort(key=lambda x: x[0][0])
    
    rDict = {}
    
    for miR in groupedLines:
        data = Bag({'name':miR[0][0],
                    'orthoTypes':[],
                    'AGAPs':initList(4,set())
                    })
        
        for line in miR:
            if line[1].startswith("allPassedSeedsFor_"):
                orthoType = int(line[1][-1])
                data.orthoTypes.append(orthoType)
                agaps = []
                for group in eval(line[-1]):
                    for gene in group:
                        if gene.startswith('AGAP'):
                            agaps.append(gene)
                data.AGAPs[orthoType].update(agaps)
        rDict[data.name]=data
    return rDict
コード例 #2
0
def collectGeneNames(goTerm,masterGenes,anoXdict):
    """\tFor a GO term, query anoXdict for which genes in masterGenes(real & ctrls)
    are tagged with itself, and return a list([ set(real) , [sets(ctrl)] ]).
    
    RETURNS: a list([ set(realGenes) , [sets(ctrlGenes)] ])
    """
    rList = [set(),initList(ctrlNum,set())]
    
    notInAnoXcel = set()
    
    # -- Collect Reals --
    """Should Collect those gene Names not found in AnoXcel"""
    for gene in masterGenes[0]:
        try:
            anoXdata[gene]
        except KeyError:
            notInAnoXcel.add(gene)
            continue
        geneData = [x[-2] for x in anoXdata[gene]]
        if goTerm in geneData:
            rList[0].add(gene)
    
    # -- Collect Ctrls --
    """Should Collect those gene Names not found in AnoXcel"""
    for i in range(ctrlNum):
        for gene in masterGenes[1][i]:
            try:
                anoXdata[gene]
            except KeyError:
                notInAnoXcel.add(gene) 
                continue
            geneData = [x[-2] for x in anoXdata[gene]]
            if goTerm in geneData:
                rList[1][i].add(gene) 
                
    # Return
    return rList
コード例 #3
0
    else:
        anoXdata[row[0][:-3]] = [[row[0]]+row[strt:stp]]
        goInfo[row[strt:stp][-2]] = row[strt:stp] # Create a library of goTermInfo


# -- Good miRNA A1-m8 list --
print "processing Good miRNA A1-m8 list..."
A1_m8s = sorted(map(lambda l: l.strip('\n'), open(A1_m8_file,'rU')))

# -- Process Events Pickle --
print "processing Events Pickle..."
data_Ca    = cPickle.load(open(pklPath_Ca,'rU'))
classConvert = {'II':2,'III':3}

ctrlNum = len(data_Ca[data_Ca.keys()[0]].ctrlEvents['A1_to_m8'])
masterGeneSets = [set(),initList(ctrlNum,set())]


getMasterTargetList(useClass,A1_m8s,masterGeneSets, data_Ca)


print 'deleting Events Pickle...'
del(data_Ca) # free-up some memory to work with!





# --------- Main Body ---------
# -- Collect GO term Data --
print 'Collecting GO-term Data...'
コード例 #4
0
def writeTargetsFdrMedMeth(miRobj_Ca,miRobj_Cb,oFile):
    #print 'Processing %s...' % (miRobj_Ca.name)
    miRHits_Ca = miRobj_Ca.reportGeneTargetsFdrMedMeth(stdvLimit=stdvsAboveMed,
                                                       consFdrThresh=consFdrThreshold,
                                                       divide=0)
    totReal = [None,set(),set(),set()]
    totCtrl = initList(len(miRobj_Ca.ctrlEvents[miRobj_Ca.ctrlEvents.keys()[0]]),[None, set(), set(),set()])
    
    # Calulate combined FDR for miRNA using Ctrl_b data from seedTypes that passed
    # the reportGeneTargetsFdrMedMeth() Ctrl_a screen.
    #   >> Gather and combine data from passed seedTypes:
    for oType in range(1,4):
        for sType in miRT._seedModels:
            if miRHits_Ca[sType][oType]:
                trLen_0 = len(totReal[oType])
                totReal[oType].update(miRHits_Ca[sType][oType][0])
                rUpdtLen = len(miRHits_Ca[sType][oType][0])
                trLen_1 = len(totReal[oType])
                None
                for i in range(len(totCtrl)):
                    tciLen_0 = len(totCtrl[i][oType])
                    totCtrl[i][oType].update(miRobj_Cb.ctrlEvents[sType][i][oType])
                    cUpdtLen = len(miRobj_Cb.ctrlEvents[sType][i][oType])
                    tciLen_1 = len(totCtrl[i][oType])
                    None
            
    #   >> Calculate separate FDRs for each Ctrl_b group:
    totalsData = [None,None,None,None]
    for oType in range(1,4):
        if totReal[oType] == set():
                continue
        tempFDRs = []
        for i in range(len(totCtrl)):
            ctrlVal = len(totCtrl[i][oType])
            realVal = len(totReal[oType])
            
            if float(ctrlVal)/realVal >= 1:
                tempFDRs.append(1.0)
            else:
                tempFDRs.append(float(ctrlVal)/realVal)
        tLen = len(tempFDRs)
        oFDRstdv,oFDRmed = mathDefs.stdDv(tempFDRs,'median')
        cons_oFDR        = oFDRmed + (stdvsAboveMed*oFDRstdv)
        totalsData[oType] = [totReal[oType],oFDRmed,cons_oFDR]
    
    # Write out Totals data:
    print miRobj_Cb.name
    outFile.write('-- %s --\n' % (miRobj_Cb.name)) 
    for i in range(1,len(totalsData)):
        if totalsData[i]:
            outFile.write('%s : allPassedSeedsFor_%s : %s : %s : %s  Seqs=%s\n' \
                          %(miRobj_Cb.name,
                            i,
                            len(totalsData[i][0]),
                            totalsData[i][2],
                            totalsData[i][1],
                            ','.join(sorted([str(x) for x in totalsData[i][0]]))))

                    
        
    # write out passed Seed data
    for seedType in sorted(miRHits_Ca):
        for i in range(1,len(miRHits_Ca[seedType])):
            if miRHits_Ca[seedType][i]:
                outFile.write('%s : %s : orthoType_%s : %s : %.2f : %.4f Seqs=%s\n'\
                              %(miRobj_Ca.name,
                                seedType,
                                i,
                                len(miRHits_Ca[seedType][i][0]),
                                miRHits_Ca[seedType][i][2],
                                miRHits_Ca[seedType][i][1],
                                ','.join(sorted([str(x) for x in miRHits_Ca[seedType][i][0]]))))
    outFile.flush()