Ejemplo n.º 1
0
def sortClass2sBySpcsPair(line,genomeTokens):
    """
    genomeTokens -> ['SpcA','SpcB','SpcC']
    
    writes results to outFile.
    """
    
    tokPairs = [tuple(sorted(x)) for x in xuniqueCombinations(genomeTokens,2)]    
    tokPairs.sort()
    
    data = {tokPairs[0]:[],
            tokPairs[1]:[],
            tokPairs[2]:[]}
    
    genePairs = eval(line[-1])
    
    for pair in genePairs:
        if   (tokPairs[0][0] in ''.join(pair)) and (tokPairs[0][1] in ''.join(pair)):
            data[tokPairs[0]].append(pair)
        elif (tokPairs[1][0] in ''.join(pair)) and (tokPairs[1][1] in ''.join(pair)):
            data[tokPairs[1]].append(pair)
        elif (tokPairs[2][0] in ''.join(pair)) and (tokPairs[2][1] in ''.join(pair)):
            data[tokPairs[2]].append(pair)
    
    line[-1:-1] = [str(len(data[tokPairs[0]])),str(len(data[tokPairs[1]])),str(len(data[tokPairs[2]]))]
    line[-1] = data[tokPairs[0]]+data[tokPairs[1]]+data[tokPairs[2]]
    # Write the counts to each pair       
    outFile.write('%s\t%s\n' % ('\t'.join(line[:-1]),line[-1]))
Ejemplo n.º 2
0
    for line in miR:
        if line[1].startswith("allPassedSeedsFor_"):
            orthoType = int(line[1][-1])
            data.orthoTypes.append(orthoType)
            data.numTot[orthoType] = len(getAGAP(line[5]))
            data.AGAPgenes[orthoType] = getAGAP(line[5])
            
            #for i in miR:
                #if i[2].startswith('orthoType_%s' % (orthoType)):
                    #if i[1]+'_fdr' in data:
                        #data[i[1]+'_fdr'][orthoType] = i[4]
    
    mirDict[data.name] = data

# ---- Work out combinations ----
mirCombos = sorted([sorted(x) for x in xuniqueCombinations(mirDict.keys(),2)])
l = len(mirCombos)
outTmp = []        
for mCombo in mirCombos:
    for i in range(2,4):
        if (mirDict[mCombo[0]].numTot[i] == None) or (mirDict[mCombo[1]].numTot[i] == None):
            pass
        else:
            cmbo  = ':'.join(mCombo)
            clas  = 'Class %s' % (i)
            eatot = '%s:%s' % (mirDict[mCombo[0]].numTot[i],mirDict[mCombo[1]].numTot[i])
            c1Set = mirDict[mCombo[0]].AGAPgenes[i]
            c2Set = mirDict[mCombo[1]].AGAPgenes[i]
            inTot = '%s' % (len(c1Set.intersection(c2Set)))
            inter = '%s' % (sorted(list(c1Set.intersection(c2Set))))
            
Ejemplo n.º 3
0
    def countHitsInOrthos4(self,genomeToken,returnGenes=True):
        """
        Uses results of miRNA.tallyHits() and self.orthos to count how many genes the miRNA seed
        hits in at least one genome, in at least two orthologs, and in all three orthologs.  If
        returnGenes: returns tuple of two dicts:
        matchDict(keys=seedType : vals=[None,genesWithMatch,genePairsWithMatch,geneTriplesWithMatch])
        ctrlDict(keys=seedType : vals=[[],genesWithMatch_1,genePairsWithMatch_1,geneTriplesWithMatch_1],
                                       [],genesWithMatch_2,genePairsWithMatch_2,geneTriplesWithMatch_2],
                                       ...])
        """
        # make sure we have tallied the hits already.
        assert self.matchData and self.ctrlData, \
               'ERROR:  It looks like we have not tallied the hits yet. Call miRNA.tallyHits() first.'
        
        if returnGenes:
            rGeneNames = {}
            rCtrlNames = {}
            for seedType in _seedModels:
                rGeneNames[seedType] = [None,[],[],[]]
                rCtrlNames[seedType] = JamesDefs.initList(len(self.matchVersions[seedType][1]),[None,[],[],[]])
             
        # Initialize self.matchCounts/self.ctrlCounts
        for seedType in _seedModels:
            self.matchCounts[seedType] = [0,0,0,0]
            self.ctrlCounts[seedType]  = [[0]*4 for i in range(len(self.matchVersions[seedType][1]))]
        # Cycle through self.orthos 
        for orthoSet in self.orthos:
            assert len(orthoSet) == 3,\
                   'ERROR: It seems len(%s) != 3.'
            # Query the matcheData and ctrlData for hits in orthoSet
            for seedType in _seedModels:
                genesInMatchD = 0
                genesInCtrlD  = [0]*len(self.matchVersions[seedType][1])
                if returnGenes:
                    geneNames = [] 
                    ctrlNames = JamesDefs.initList(len(self.matchVersions[seedType][1]),[])
                # Count how many genes in each orthoSet were hit by the respective seedTypes
                for gene in orthoSet:
                    if gene in self.matchData[seedType]:
                        genesInMatchD += 1
                        if returnGenes: geneNames.append(gene)
                    for i in range(len(self.ctrlData[seedType])):
                        if gene in self.ctrlData[seedType][i]:
                            genesInCtrlD[i] += 1
                            if returnGenes: ctrlNames[i].append(gene)
                        
                # Update self.matchData based on how many hits the orthoSet got for seedType
                if genesInMatchD == 0:
                    ##self.matchCounts[seedType][0] += 3
                    pass
                elif genesInMatchD == 1:
                    if ''.join(geneNames).find(genomeToken) != -1:
                        ##self.matchCounts[seedType][0] += 2
                        self.matchCounts[seedType][1] += 1
                        if returnGenes:
                            rGeneNames[seedType][1].extend(geneNames)
                elif genesInMatchD == 2:
                    if ''.join(geneNames).find(genomeToken) != -1:
                        ##self.matchCounts[seedType][0] += 1
                        ##self.matchCounts[seedType][1] += 1 # self.matchCounts[seedType][1] += 2
                        ##self.matchCounts[seedType][2] += 1
                        if returnGenes:
                            rGeneNames[seedType][1].extend([x for x in geneNames if x.find(genomeToken) != -1])
                            rGeneNames[seedType][2].append(tuple(sorted(geneNames)))
                elif genesInMatchD == 3:
                    if ''.join(geneNames).find(genomeToken) != -1:
                        ##self.matchCounts[seedType][1] += 1 # self.matchCounts[seedType][1] += 3
                        ##self.matchCounts[seedType][2] += 2 # self.matchCounts[seedType][2] += 3
                        ##self.matchCounts[seedType][3] += 1
                        if returnGenes:
                            rGeneNames[seedType][1].extend([x for x in geneNames if x.find(genomeToken) != -1])
                            type2 = [tuple(sorted(x)) for x in xpermutations.xuniqueCombinations(geneNames,2) if ''.join(x).find(genomeToken) != -1]
                            type3 = tuple(sorted(geneNames))
                            rGeneNames[seedType][2].extend(type2)
                            rGeneNames[seedType][3].append(type3)
                # Update self.ctrlData based on how many hits the orthoSet got in each ctrl for seedType
                for i in range(len(self.ctrlData[seedType])):
                    if genesInCtrlD[i] == 0:
                        ##self.ctrlCounts[seedType][i][0] += 3
                        pass
                    elif genesInCtrlD[i] == 1:
                        if ''.join(ctrlNames[i]).find(genomeToken) != -1:
                            ##self.ctrlCounts[seedType][i][1] += 1
                            if returnGenes:
                                rCtrlNames[seedType][i][1].extend(ctrlNames[i])
                    elif genesInCtrlD[i] == 2:
                        if ''.join(ctrlNames[i]).find(genomeToken) != -1:
                            ##self.ctrlCounts[seedType][i][1] += 1 # self.ctrlCounts[seedType][i][1] += 2
                            ##self.ctrlCounts[seedType][i][2] += 1
                            if returnGenes:
                                rCtrlNames[seedType][i][1].extend([x for x in ctrlNames[i] if x.find(genomeToken) != -1])
                                rCtrlNames[seedType][i][2].append(tuple(sorted(ctrlNames[i])))
                    elif genesInCtrlD[i] == 3:
                        if ''.join(ctrlNames[i]).find(genomeToken) != -1:
                            ##self.ctrlCounts[seedType][i][1] += 1 # self.ctrlCounts[seedType][i][1] += 3
                            ##self.ctrlCounts[seedType][i][2] += 2
                            ##self.ctrlCounts[seedType][i][3] += 1
                            if returnGenes:
                                rCtrlNames[seedType][i][1].extend([x for x in ctrlNames[i] if x.find(genomeToken) != -1])
                                type2 = [tuple(sorted(x)) for x in xpermutations.xuniqueCombinations(ctrlNames[i],2) if ''.join(x).find(genomeToken) != -1]
                                type3 = tuple(sorted(ctrlNames[i]))
                                rCtrlNames[seedType][i][2].extend(type2)
                                rCtrlNames[seedType][i][3].append(type3)

                if returnGenes:
                    for i in range(1,4):
                        assert len(rGeneNames[seedType][i]) == len(set(rGeneNames[seedType][i])),\
                               "ERROR: rGeneNames[%s] in miRNA(%s) has redundancy." % (i, self.name)
        if returnGenes:
            # store and return rGeneNames
            self.matchEvents = rGeneNames
            self.ctrlEvents  = rCtrlNames
            return (rGeneNames,rCtrlNames)
Ejemplo n.º 4
0
def findBestPairAlignments(listOfMotifObjs, minoverlap=6, verbose=None):
    """
    Takes: list of TAMO motif objects.  Finds best pairwise alignments among list members, trying both
    orientations. Motifs in list are numbered by original index in results. Returns: 2D list of 
    results for each combination of motifs with the matrix coords corresponding to motif 
    index in original list (exp: dist of motif0 and motif4 == 2dList[0][4]; BUT 2dList[4][0] == None).
    Always put lower index first or you will get 'None'.  Same index twice also gives 'None'
    
    Each value at the 2D coords contains a tuple: (alignOri,distScore,alignment,offset).
    alignOri = 1 == both motifs in original ori.  alignOri = -1 == motif with higher index
    was revComped to get best score.
    
    verbose == True prints the scores, orientations and alignments for each motif pair.
    """
    # rename listOfMotifObjs for brevity
    motifs = listOfMotifObjs
    
    # Initialize empty return-matrix
    rMat = []
    for i in range(len(motifs)):
        rMat.append([None]*len(motifs))
        
    # Create list of non-redundant index combos for comparing
    toCompare = [x for x in xpermutations.xuniqueCombinations(range(len(motifs)),2)]
    
    for i in range(len(toCompare)):
        alignOri  = None
        distScore = None
        alignment = None
        offset    = None
        
        minDiffOri = getMinDiffOri(motifs[toCompare[i][0]],motifs[toCompare[i][1]],minoverlap=minoverlap, getOffset=1)
        
        # If pos ori, then motif obj returned will be ref to original motifs[toCompare[i][1]]
        # else: newly constructed revComp is returned
        if motifs[toCompare[i][1]] is minDiffOri[0]: alignOri = 1
        else: alignOri = -1
        
        distScore = minDiffOri[1]
        alignment = alignPairWithOffSet(motifs[toCompare[i][0]], minDiffOri[0], minDiffOri[2])
        offset = minDiffOri[2]
        
        # Assign tuple to  matrix coords:
        rMat[toCompare[i][0]][toCompare[i][1]] = (alignOri, distScore, alignment, offset)

    # Write out the results if verbose
    if verbose:
        oString = '#MotifPair\tAlignOri\tAlignScore\tAlignment\n'
        for pair in toCompare:
            tmp = '%s:%s\t%s\t%.3G\t%s' \
                % (str(pair[0])+'_'+motifs[pair[0]].oneletter,
                   str(pair[1])+'_'+motifs[pair[1]].oneletter,
                   rMat[pair[0]][pair[1]][0],
                   rMat[pair[0]][pair[1]][1],
                   rMat[pair[0]][pair[1]][2])
            # Futz with formating to allow alignments to match when pasted in an exclFile
            tmp    = tmp.split('\n')
            spc    = ' '*3
            add    = spc.join(['\t']*tmp[0].count('\t'))
            tmp[1] = '%s%s%s\n' % (spc,add,tmp[1])
            
            print '\n'.join(tmp)
            oString += '\n'.join(tmp)
            
        
    
    return rMat