Beispiel #1
0
 def buildCtrlsFromMatchVers(self, restrictedList, numOfCtrls=15):
     """
     WARNING: This should only be called after the entire list of real seeds have been initialized!
     
     Computes usr supplied number of permutations (dflt=15) of each 'true' matchVersion and screens them
     for real seqs in restrictedList to prevent using KNOWN seed matches for controls. Ctrl seed matches
     are stored in a list located at second index of the list located in dict entry
     self.matchVersions[seedType]. 
     """
     
     # check to see whether this has already been done.
     # If so, complain and die.
     # Else, append an empty list as index_1 after REAL matchSeq for each version
     for seedType in self.matchVersions:
         assert type(self.matchVersions[seedType]) == type([]), \
                'ERROR: %s.matchVersions[%s] is not type: list.' % (self.name,seedType)
         assert len(self.matchVersions[seedType]) == 1, \
                'ERROR: len(%s.matchVersions[%s]) is not 1; ctrls seqs may have already been built.' % (self.name,seedType)
         self.matchVersions[seedType].append([])
     
     # permute and screen each seedVersion
     for seedType in self.matchVersions:
         # If NO third index like here: [0,7,'A1']
         if len(_seedModels[seedType]) == 2:
             # Select numOfCtrls random permutations of matchVersions[seedType] that are not in the 
             # restrictedList.
             matchPermList = [''.join(x) for x in xpermutations.xpermutations(list(self.matchVersions[seedType][0]))]
             while len(self.matchVersions[seedType][1]) < numOfCtrls:
                 permSeq = JamesDefs.randFromList_noReplace(matchPermList)
                 if permSeq not in restrictedList:
                     # Append permuted Seq if not in restrictedList
                     self.matchVersions[seedType][1].append(permSeq)
         # If extra data: use 'instructions index' to only permute the nucs not explicitly
         # defined in the seedModel
         elif len(_seedModels[seedType]) == 3:
             nuc,pos = list(_seedModels[seedType][2])
             # Leave 1-registered bc we will use negIndex bc dealing with rvCmp of miRNA
             # so pos == 1 actually means pos == LAST in matchSeq 
             pos = int(pos) 
             # explode seq to remove defined nuc in place
             seq2Perm = list(self.matchVersions[seedType][0])
             del seq2Perm[-pos]
             # Generate permutations from remaining nucs,
             matchPermList = [x for x in xpermutations.xpermutations(seq2Perm)]
             while len(self.matchVersions[seedType][1]) < numOfCtrls:
                 permSeq = JamesDefs.randFromList_noReplace(matchPermList)
                 # Replace nuc and check restricted list.
                 if pos > 1: permSeq.insert(-pos+1,nuc)
                 else:       permSeq.append(nuc)
                 permSeq = ''.join(permSeq)
                 if permSeq not in restrictedList:
                     # Append permuted Seq if not in restrictedList
                     self.matchVersions[seedType][1].append(permSeq) 
Beispiel #2
0
def shuffSeedsInMatMiRs(inPath,outPathBase,seed=[1,8],num=1):
    """Takes mature miR fasta file from inPath, permutes nucs in <seed>
    <num> times and writes records to outPaths derived from outPathBase
    with unique ctrl numbers.
    
    <seed> coords are as pyhton slice coords (0-based and
    non-inclusive on the right side)"""
    inFile = open(inPath,'rU')
    outFiles = []
    for i in range(num):
        outFiles.append(open('%s.ctrl%s.fas' % (outPathBase,i),'w'))

    for line in inFile:
        if line.startswith('>'):
            for oF in outFiles:
                oF.write(line)
        else:
            line = line.strip('\n')
            seedSeq  = line[seed[0]:seed[1]]
            fvPrm    = line[:seed[0]]
            trPrm    = line[seed[1]:]
            shfSeeds = [x for x in xpermutations.xpermutations(list(seedSeq))]
            sampShfs = sample(shfSeeds,num)
            for i in range(num):
                nLine = '%s%s%s\n' % (fvPrm,''.join(sampShfs[i]),trPrm)
                outFiles[i].write(nLine)
Beispiel #3
0
 def buildCtrlsFromProSeed(self, restrictedList, numOfCtrls=15):
     """
     WARNING: This should only be called after the entire list of real seeds have been initialized!
     
     Computes 15 permutations of the 'true' proSeed matching seqeunce (m2_to_m8) and derives
     matchVersions as in the true case. The permuted sequence is checked agaisnt the restrictedList
     to prevent using KNOWN seed matches for controls. Ctrl seed matches are stored in a list located
     at second index of the list located in dict entry self.matchVersions[seedType]. Each seedVersion 
     of a ctrl set will share the same index number.
     """
     ##assert True==False, \
            ##"""WARNING!!! miRNA.buildCtrlsFromMatchVers() should be used instead!!
            ##If you REALLY want to use this method, modify or remove this assert statement.
            
            ##But seriously...  use miRNA.buildCtrlsFromMatchVers()."""
     # check to see whether this has already been done.
     # If so, complain and die.
     # Else, append an empty list as index_1 after REAL matchSeq for each version
     for seedType in self.matchVersions:
         assert type(self.matchVersions[seedType]) == type([]), \
                'ERROR: %s.matchVersions[%s] is not type: list.' % (self.name,seedType)
         assert len(self.matchVersions[seedType]) == 1, \
                'ERROR: len(%s.matchVersions[%s]) is not 1; ctrls seqs may have already been built.' % (self.name,seedType)
         self.matchVersions[seedType].append([])
     
     proSeed = self.sourceSeq[1:8]
     matchPerms  = [''.join(x) for x in xpermutations.xpermutations(list(self.matchVersions['m2_to_m8'][0]))]
     
     # Select 15 random permutations of matchVersions['m2_to_m8'] that are not in the 
     # restrictedList.
     chosenPerms = []
     while len(chosenPerms) < numOfCtrls:
         permSeq = JamesDefs.randFromList_noReplace(matchPerms)
         if permSeq not in restrictedList:
             chosenPerms.append(permSeq)
     
     # Use each chosenSeq to generate the diff matchVersions
     for seq in chosenPerms:
         # Create Fake miRNA with seq at the seed location to feed to _buildMatchVersions()
         seq = 'N%sNNNNNNNNNNNNN' % (bioDefs.revComp(seq))
         matchVersions = self._buildMatchVersions(seq)
         for seedType in self.matchVersions:
             self.matchVersions[seedType][1].append(matchVersions[seedType][0]) # must use index[0] bc _buildMatchVersions returns a list len==1