def _getSubstituteParentHP(self, seqno, HPID): SubsHP = input.SubsHP[seqno] HPLen = self.FG.HP[HPID].length idxlist = self.FG.HP[HPID].idx idxlist5p = [x[0] for x in self.FG.HP[HPID].idx] idxlist3p = [x[1] for x in self.FG.HP[HPID].idx] Sub5p = None if seqno == "master_numbering": #Sub5p = SubsHP[0:max(0, self.subHPlength - HPLen)].extend(bf.FlattenList([self.FG.Sequence.SeqList[seqno][x] for x in sorted(bf.FlattenList(idxlist5p))])) Sub5p = [it for it in SubsHP[0:max(0, self.subHPlength - HPLen)]] Sub5p.extend( bf.FlattenList([ self.FG.Sequence.SeqList[seqno][x] for x in sorted(bf.FlattenList(idxlist5p)) ])) else: Sub5p = SubsHP[0:max(0, self.subHPlength - HPLen)] + "".join([ self.FG.Sequence.SeqList[seqno][x] for x in sorted(bf.FlattenList(idxlist5p)) ]) Sub3p = None if seqno == "master_numbering": Sub3p = [ it for it in [ self.FG.Sequence.SeqList[seqno][x] for x in sorted(bf.FlattenList(idxlist3p)) ] ] Sub3p.extend( bf.FlattenList( list(reversed(SubsHP))[0:max(0, self.subHPlength - HPLen)])) else: Sub3p = "".join([ self.FG.Sequence.SeqList[seqno][x] for x in sorted(bf.FlattenList(idxlist3p)) + list(reversed(SubsHP))[0:max(0, self.subHPlength - HPLen)] ]) if seqno == "master_numbering": Sub3p = [ it for it in [ self.FG.Sequence.SeqList[seqno][x] for x in sorted(bf.FlattenList(idxlist3p)) ] ] Sub3p.extend( bf.FlattenList( (SubsHP[min(0, -self.subHPlength + HPLen):len(SubsHP)]))) else: Sub3p = "".join([ self.FG.Sequence.SeqList[seqno][x] for x in sorted(bf.FlattenList(idxlist3p)) ]) + SubsHP[min(0, -self.subHPlength + HPLen):len(SubsHP)] return Sub5p, Sub3p
def _getHPBulgeMap(self): BPMap = list(self.BPMap) HPMap = [] HPList = [] BPIDList = list(range(0, len(BPMap))) BPBulgeList5p = list(self.BPBulge5p) BPBulgeList3p = list(self.BPBulge3p) # BPID = 108 # print("--->",BPBulgeList3p[BPID]) # print(BPBulgeList5p[BPID]) breakflag = 0 #Iterates through each base pair for i in BPIDList: CurrBP = BPMap[i] # if CurrBP != []: #If we have not already seen this BP before HPList = [CurrBP] BPMap[i] = [] if bf.FlattenList( BPBulgeList3p[i] ) == []: #If this HP is not length 1 (if it is, will have bulges in 3p) for j in range( i + 1, len(BPMap) ): #Iterating through the BPs until we complete the HP NextBP = BPMap[j] HPList.append(NextBP) BPMap[j] = [] if bf.FlattenList(BPBulgeList3p[j]) != []: break HPMap.append(HPList) BPMap = list(self.BPMap) HPBulgeList5p = [] HPBulgeList3p = [] for i in range(0, len(HPMap)): FirstIdx = BPMap.index(HPMap[i][0]) LastIdx = BPMap.index(HPMap[i][-1]) HPBulgeList5p.append(BPBulgeList5p[FirstIdx]) HPBulgeList3p.append(BPBulgeList3p[LastIdx]) #print(i, HPMap[i], "Bulges on 5p end", HPBulgeList5p[i], "Bulges on 3p end", HPBulgeList3p[i]) return HPMap, HPBulgeList5p, HPBulgeList3p
def __init__(self, PS_Goal, SS_Goal, LS_Goal, IUPAC_Goal, MN_Goal): #Defines the sequences self.SeqList = { 'sequence': PS_Goal, 'secstruct': SS_Goal, 'lock': LS_Goal, 'iupac': IUPAC_Goal, 'master_numbering': MN_Goal } #Goes through SS and identifies every bases partner (or -1 for bulge) self.PartnerIdxs = self.getPartnerIdxs()[0] #Creates a list of all base pairs [[0, 360], [10, 40], ... ] #Also gives the immediate 5p and 3p bulges of every bp self.BPMap, self.BPBulge5p, self.BPBulge3p = self._getBPBulgeMap() #Creates a list of all HPs, consisting of lists of their base pairs [[[0, 360], [1,359], [2,358]], ... ] #Also gives the immediate 5p and 3p bulges of every HP self.HPMap, self.HPBulge5p, self.HPBulge3p = self._getHPBulgeMap() #Identifies all neighbors (0 = upstream / towards start; 1 = downstream) of every base pair self.BPSurrBP = [[ self._getSurroundingBPs(i, 0), self._getSurroundingBPs(i, 1) ] for i in self.BPMap] #Identifies all neighbors (0 = upstream / towards start; 1 = downstream) of every hairpin self.HPSurrBP = [[ self._getSurroundingBPs(i[0], 0), self._getSurroundingBPs(i[-1], 1) ] for i in self.HPMap] #Identifies the indices of all bulges, includes [] to signify a 0-length bulge #Crawls from base 0 to end, and lists bulges in order of encountering. All bulges are given in order self.BulgeMap, self.BulgeParentHPMap = self._getBulgeMap() #List of the parent BP of each BP self.BPParentMap = self._getBPParentMap() #List of the parent HP of each HP self.HPParentMap = self._getHPParentMap() #List of the child/sibling HP of each HP. Same function could be used for BPParentMap, but isn't needed self.HPChildMap = self._getChildMapFromParentMap(self.HPParentMap) self.HPSibMap = self._getSiblingMapFromParentMap(self.HPParentMap) #Gets information regarding the relative order of bulges. Every HP is defined as having a parent and child bulge, and these maps list it. self.BulgeChildHPMap = [ bf.FlattenList( [self.HPChildMap[x] for x in self.BulgeParentHPMap[y]]) for y in range(0, len(self.BulgeParentHPMap)) ] self.BulgeChildHPMap[0] = self.HPSibMap[0] + [ 0 ] #Confirms thats the parent HP and its siblings are assigned the same opening bulge self.BulgeParentMap = self._getBulgeParentMap(self.HPParentMap, self.BulgeParentHPMap)
def _getSeq( self, seqno ): #Logic for what to substitute for which kinds of bases (unchanged, hp->bulge (ShellHPs), bulge/other_hp ->hp (SubbedHPs) ShellHPs = list(self.StapleHPs) BulgeHPs = list(self.ShellHPs) ParentHP = self.ParentHP self.NativeIdx = list( sorted(bf.FlattenList(self._getHPIdx() + self._getBulgeIdx()))) SubbedHPIdx = bf.FlattenList([self.FG.HP[x].idx[0] for x in BulgeHPs]) AllIdx = list(range(0, len(self.FG.Sequence))) AllSeq = ['' for x in AllIdx] #Inserts the native bases for idx in self.NativeIdx: AllSeq[idx] = self.FG.Sequence[seqno][idx] #Inserts the bases for all HPs that are being subbed in for idx in SubbedHPIdx: AllSeq[idx] = input.GenSub[ seqno] #This defines what to replace hairpins with when going from a HP->bulge base transition #Inserts the bases for all HPs that are being replaced for HPID in ShellHPs: HPIdx = self.FG.HP[HPID].idx[0] FirstIdx = HPIdx[0] if HPID == ParentHP and any( elem in self.HPList for elem in self.FG.HP[ParentHP].progenyID): LastIdx = HPIdx[1] Sub5p, Sub3p = self._getSubstituteHP(seqno, HPID, 'in') AllSeq[FirstIdx] = Sub5p AllSeq[LastIdx] = Sub3p else: AllSeq[FirstIdx] = self._getSubstituteHP(seqno, HPID, 'out') if seqno == "master_numbering": return [it for it in bf.FlattenList(AllSeq) if it != ''] else: return "".join(AllSeq)
def __init__(self, Sequence): self._parentHPIDmap = Sequence.BulgeParentHPMap self._childHPIDmap = Sequence.BulgeChildHPMap Feature.__init__(self, Sequence, list(Sequence.BulgeMap), "Bulge") #self._numopeningmap = [len(x) for x in self.BulgeInfo[0]] self._numopeningmap = [len(x) for x in self._pairmap] self._kindmap = [self._getBulgekind(x) for x in self._IDlist] self._lengthordermap = [[len(x) for x in self._pairmap[y]] for y in self._IDlist] self._numbasesmap = [ len(bf.FlattenList(self._pairmap[x])) for x in self._IDlist ] self._bulgeweightmap = [self._getBulgeWeight(x) for x in self._IDlist] self._weightmap = list(self._bulgeweightmap)
def _getChildMapFromParentMap(self, ParentMap): #ChildMap = list([[]]*len(ParentMap)) ChildMap = [[] for i in range(0, len(ParentMap))] for i in range(0, len(ParentMap)): ParentID = bf.FlattenList(ParentMap[i]) ChildID = [i] if ParentID != []: #print(ParentID, ChildID) try: ChildMap[ParentID[0]] += ChildID except: ChildMap[ParentID] += ChildID return ChildMap
def __init__(self, Sequence, PairMap, name): ### Feature definition: Any combination of bases forming a coherent linkage or network. i.e. # Entry Name Base layout how to reference # 1. Base (.) - PairMap = [ 0, 1, 2, . = "1," ; ]-[ = "],[" # 2. BasePair (BP) - PairMap = [ [0,100],[1, 99], [-.-.-] # 3. Hairpin (HP) - PairMap = [ [[0, 100],[1, 99]], [-[..]-[..]-] # 4. Bulge - PairMap = [ [[0, 1, 2], [45], [98, 99]], [-[[...][.][..]]-] # 5. HPGroup - PairMap = [ [[[0,100], [1, 99]], [[4, 96]]], [-[Bulge-[BP-BP-BP]-Bulge-[BP-BP-BP]-Bulge-[BP-BP-BP]-Bulge]-] # 6. Path - PairMap = [ [[[[0,100], [1, 99]], [[4, 96]][[..]]]], [-[HPG-HPG-HPG-]-] self.name = name #Name of the structure self.length = len(PairMap) self.parentseq = Sequence self._pairmap = PairMap #PairMap print(name, " PairMap initialized ->", self._pairmap) self._IDlist = list(range(0, len(PairMap))) self._allbases = [ sorted(bf.FlattenList(self._pairmap[x])) for x in list(range(0, len(self._pairmap))) ] if self.name == 'Base': return #Identifies the families based on the functions _getparentIDmap and _getchildrenIDmap which has a small amount of feature-specific code self._parentIDmap = [ Feature._getparentIDmap(self, x) for x in self._IDlist ] self._childIDmap = Sequence._getChildMapFromParentMap( self._parentIDmap) self._siblingIDmap = Sequence._getSiblingMapFromParentMap( self._parentIDmap) self._parentmap = [[self._pairmap[x] if x != [] else [] for x in y] for y in self._parentIDmap] self._childmap = [[self._pairmap[x] if x != [] else [] for x in y] for y in self._childIDmap] self._siblingmap = [[self._pairmap[x] if x != [] else [] for x in y] for y in self._siblingIDmap] self._neighborIDmap = self._getneighborIDmap(self._parentIDmap, self._childIDmap, self._siblingIDmap) self._neighbormap = [[self._pairmap[x] if x != [] else [] for x in y] for y in self._neighborIDmap] self._progenyIDmap = [self._getAllProgenyID(x) for x in self._IDlist] self._leafmap = [self._isLeaf(x) for x in self._IDlist] self._jxnmap = [self._isJunction(x) for x in self._IDlist] self._openmap = [self._isOpening(x) for x in self._IDlist] self._weightmap = [0] * len(PairMap)
def _getAllConnectingHPs(self, HPList, IncludeParentFlag=0): FG = self.FG ParentHP = self.findParentHP(HPList) if type(ParentHP) == int: ParentHP = [ParentHP] if ParentHP not in HPList and IncludeParentFlag ==1 and ParentHP!=[]: HPList.insert(0, ParentHP) # Gets all connecting HPs, searches for their children, and then repeats to be sure HPsConnectingList = bf.FlattenList([HPList]) # for j in range(0,1): #Repeats twice for i in range(0, len(HPList) - 1): for j in range(i, len(HPList)): HPsConnectingList.append( self.getPathBetweenHPs([HPList[i], HPList[j]], 0)) HPsConnectingList = bf.CleanList(HPsConnectingList) # for x in HPsConnectingList: # print(":", any(elem in FG.HP[x].childrenID for elem in HPsConnectingList)) # print("LeafHPs", HPList, "are as follows:", LeafHPs) NeighboringHPs = [FG.HP[x].neighborID for x in HPsConnectingList] HPsConnectingList = bf.CleanList(HPsConnectingList + NeighboringHPs) return list(sorted(HPsConnectingList))
def _getSubstituteHP(self, seqno, HPID, insideorout='out'): SubsHP = input.SubsHP[seqno] HPLen = self.FG.HP[HPID].length if insideorout == 'in': idxlist = self.FG.HP[HPID].idx idxlist5p = [x[0] for x in self.FG.HP[HPID].idx] idxlist3p = [x[1] for x in self.FG.HP[HPID].idx] Sub5p = None if seqno == "master_numbering": Sub5p = [ it for it in SubsHP[0:max(0, self.subHPlength - HPLen)] ] Sub5p.extend( bf.FlattenList([ self.FG.Sequence.SeqList[seqno][x] for x in sorted(bf.FlattenList(idxlist5p)) ])) else: Sub5p = SubsHP[0:max(0, self.subHPlength - HPLen)] + "".join([ self.FG.Sequence.SeqList[seqno][x] for x in sorted(bf.FlattenList(idxlist5p)) ]) Sub3p = None if seqno == "master_numbering": Sub3p = [ it for it in [ self.FG.Sequence.SeqList[seqno][x] for x in sorted(bf.FlattenList(idxlist3p)) ] ] Sub3p.extend( bf.FlattenList(SubsHP[min(0, -self.subHPlength + HPLen):len(SubsHP)])) else: Sub3p = "".join([ self.FG.Sequence.SeqList[seqno][x] for x in sorted(bf.FlattenList(idxlist3p)) ]) + SubsHP[min(0, -self.subHPlength + HPLen):len(SubsHP)] #Sub3p = "".join([Sequence.SeqList[seqno][x] for x in sorted(bf.FlattenList(idxlist3p))]) + SubsHP[0:max(0,5-HPLen)] #Sub3p = "".join([Sequence.SeqList[seqno][x] for x in sorted(bf.FlattenList(idxlist3p))] + list(reversed(SubsHP))[0:max(0, 5 - HPLen)]) #print("Sub5p", Sub5p) #print("Sub3p", Sub3p) return Sub5p, Sub3p else: i = self.FG.HP[HPID].idx[0] SubsHPtemp = None if seqno == "master_numbering": #print(self.FG.Sequence[seqno][i[0]:i[0] + HPLen]) #print(SubsHP[min(HPLen, self.subHPlength):-min(HPLen, self.subHPlength)]) #print(self.FG.Sequence[seqno][i[1] - HPLen + 1:i[1] + 1]) SubsHPtemp = [ it for it in self.FG.Sequence[seqno][i[0]:i[0] + HPLen] ] SubsHPtemp.extend(SubsHP[min(HPLen, self.subHPlength ):-min(HPLen, self.subHPlength)]) SubsHPtemp.extend(self.FG.Sequence[seqno][i[1] - HPLen + 1:i[1] + 1]) #print(SubsHPtemp) else: SubsHPtemp = self.FG.Sequence[seqno][ i[0]:i[0] + HPLen] + SubsHP[min(HPLen, self.subHPlength):-min( HPLen, self.subHPlength )] + self.FG.Sequence[seqno][i[1] - HPLen + 1:i[1] + 1] return SubsHPtemp