def buildCtrlsFromMatchVers(self, restrictedList, numOfCtrls=15): """ WARNING: This should only be called after the entire list of real seeds have been initialized! Computes usr supplied number of permutations (dflt=15) of each 'true' matchVersion and screens them for real seqs in restrictedList to prevent using KNOWN seed matches for controls. Ctrl seed matches are stored in a list located at second index of the list located in dict entry self.matchVersions[seedType]. """ # check to see whether this has already been done. # If so, complain and die. # Else, append an empty list as index_1 after REAL matchSeq for each version for seedType in self.matchVersions: assert type(self.matchVersions[seedType]) == type([]), \ 'ERROR: %s.matchVersions[%s] is not type: list.' % (self.name,seedType) assert len(self.matchVersions[seedType]) == 1, \ 'ERROR: len(%s.matchVersions[%s]) is not 1; ctrls seqs may have already been built.' % (self.name,seedType) self.matchVersions[seedType].append([]) # permute and screen each seedVersion for seedType in self.matchVersions: # If NO third index like here: [0,7,'A1'] if len(_seedModels[seedType]) == 2: # Select numOfCtrls random permutations of matchVersions[seedType] that are not in the # restrictedList. matchPermList = [''.join(x) for x in xpermutations.xpermutations(list(self.matchVersions[seedType][0]))] while len(self.matchVersions[seedType][1]) < numOfCtrls: permSeq = JamesDefs.randFromList_noReplace(matchPermList) if permSeq not in restrictedList: # Append permuted Seq if not in restrictedList self.matchVersions[seedType][1].append(permSeq) # If extra data: use 'instructions index' to only permute the nucs not explicitly # defined in the seedModel elif len(_seedModels[seedType]) == 3: nuc,pos = list(_seedModels[seedType][2]) # Leave 1-registered bc we will use negIndex bc dealing with rvCmp of miRNA # so pos == 1 actually means pos == LAST in matchSeq pos = int(pos) # explode seq to remove defined nuc in place seq2Perm = list(self.matchVersions[seedType][0]) del seq2Perm[-pos] # Generate permutations from remaining nucs, matchPermList = [x for x in xpermutations.xpermutations(seq2Perm)] while len(self.matchVersions[seedType][1]) < numOfCtrls: permSeq = JamesDefs.randFromList_noReplace(matchPermList) # Replace nuc and check restricted list. if pos > 1: permSeq.insert(-pos+1,nuc) else: permSeq.append(nuc) permSeq = ''.join(permSeq) if permSeq not in restrictedList: # Append permuted Seq if not in restrictedList self.matchVersions[seedType][1].append(permSeq)
def buildCtrlsFromProSeed(self, restrictedList, numOfCtrls=15): """ WARNING: This should only be called after the entire list of real seeds have been initialized! Computes 15 permutations of the 'true' proSeed matching seqeunce (m2_to_m8) and derives matchVersions as in the true case. The permuted sequence is checked agaisnt the restrictedList to prevent using KNOWN seed matches for controls. Ctrl seed matches are stored in a list located at second index of the list located in dict entry self.matchVersions[seedType]. Each seedVersion of a ctrl set will share the same index number. """ ##assert True==False, \ ##"""WARNING!!! miRNA.buildCtrlsFromMatchVers() should be used instead!! ##If you REALLY want to use this method, modify or remove this assert statement. ##But seriously... use miRNA.buildCtrlsFromMatchVers().""" # check to see whether this has already been done. # If so, complain and die. # Else, append an empty list as index_1 after REAL matchSeq for each version for seedType in self.matchVersions: assert type(self.matchVersions[seedType]) == type([]), \ 'ERROR: %s.matchVersions[%s] is not type: list.' % (self.name,seedType) assert len(self.matchVersions[seedType]) == 1, \ 'ERROR: len(%s.matchVersions[%s]) is not 1; ctrls seqs may have already been built.' % (self.name,seedType) self.matchVersions[seedType].append([]) proSeed = self.sourceSeq[1:8] matchPerms = [''.join(x) for x in xpermutations.xpermutations(list(self.matchVersions['m2_to_m8'][0]))] # Select 15 random permutations of matchVersions['m2_to_m8'] that are not in the # restrictedList. chosenPerms = [] while len(chosenPerms) < numOfCtrls: permSeq = JamesDefs.randFromList_noReplace(matchPerms) if permSeq not in restrictedList: chosenPerms.append(permSeq) # Use each chosenSeq to generate the diff matchVersions for seq in chosenPerms: # Create Fake miRNA with seq at the seed location to feed to _buildMatchVersions() seq = 'N%sNNNNNNNNNNNNN' % (bioDefs.revComp(seq)) matchVersions = self._buildMatchVersions(seq) for seedType in self.matchVersions: self.matchVersions[seedType][1].append(matchVersions[seedType][0]) # must use index[0] bc _buildMatchVersions returns a list len==1