def test_parse2(self): os.chdir( self.thisd ) # Need as otherwise tests that happen in other directories change os.cwd() dsspLog = os.path.join(self.testfiles_dir, "3ouf.dssp") dsspP = dssp_parser.DsspParser(dsspLog)
def test_parse1(self): os.chdir( self.thisd ) # Need as otherwise tests that happen in other directories change os.cwd() dsspLog = os.path.join(self.testfiles_dir, "2bhw.dssp") dsspP = dssp_parser.DsspParser(dsspLog) a = [ ' ', ' ', 'T', 'T', ' ', 'T', 'T', 'S', 'S', 'T', 'T', ' ', ' ', ' ', 'T', 'T', 'G', 'G', 'G', ' ', ' ', 'S', ' ', ' ', 'T', 'T', ' ', ' ', 'S', ' ', 'S', 'T', 'T', ' ', ' ', 'S', ' ', ' ', 'T', 'T', ' ', 'T', 'T', ' ', 'S', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'T', 'T', 'T', ' ', ' ', ' ', 'S', ' ', ' ', 'S', 'G', 'G', 'G', 'S', 'G', 'G', 'G', 'G', 'G', 'S', 'T', 'T', ' ', 'E', 'E', 'G', 'G', 'G', ' ', 'T', 'T', 'S', 'E', 'E', 'E', ' ', ' ', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'E', 'E', 'T', 'T', 'E', 'E', ' ', 'S', 'S', 'S', 'S', ' ', ' ', ' ', 'T', 'T', 'S', ' ', 'T', 'T', ' ', 'T', 'T', ' ', 'S', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', ' ', 'S', ' ', 'S', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', ' ', 'T', 'T', 'T', 'S', 'S', 'G', 'G', 'G', 'G', 'T', 'T', 'T', 'T', ' ', 'S', ' ', ' ' ] self.assertEqual(dsspP.assignment[0], a)
def helixFromContacts(self, contacts, dsspLog, minContig=2, maxGap=3): """Return the sequence of the longest contiguous helix from the given contact data source is the native, target the model Get start and stop indices of all contiguous chunks - we can match multiple chains in the model, but any match must be within a single native chain - the source can increment or decrement - the model only ever increments (ncont "sort target inc" argument) - matches can be in-register or out-of-register, but for finding the longest chunk we don't care startstop = [ (10,15), (17, 34), (38, 50) ] # Loop through all chunks, and if any two have a gap of < mingap, join the indices together. # Get the indices of the largest chunk # Get the corresponding AA sequence """ if contacts is None or not len(contacts): return None dsspP = dssp_parser.DsspParser(dsspLog) # # Loop through the contacts finding the start, stop indices in the list of contacts of contiguous chunks # chunks = self.findChunks(contacts=contacts, dsspP=dsspP, ssTest=True, minContig=2) if not chunks: return None # # Go through the start-stop chunks in pairs and see if they can be joined, creating # extended, which is the list of chunks with gaps filled-in # if len(chunks) > 1: # Need to sort the chunks by chain and then startResSeq so that we can join anything on the same chain chunks.sort(key=itemgetter('chainId1', 'startResSeq')) # By chain extended = [] for i, newChunk in enumerate(chunks): # initialise if i == 0: toJoin = newChunk continue chunk, toJoin = self._join_chunks(toJoin, newChunk, dsspP=dsspP, maxGap=maxGap) if chunk is not None: extended.append(chunk) # Last one needs to be handled specially if i == len(chunks) - 1 and toJoin: extended.append(toJoin) # # Find the biggest # biggest = sorted( extended, lambda x, y: abs(x['stopResSeq'] - x['startResSeq']) - abs(y[ 'stopResSeq'] - y['startResSeq']), reverse=True)[0] else: biggest = chunks[0] # # Get the sequence that the start, stop indices define # chainId = biggest['chainId1'] startResSeq = min( biggest['startResSeq'], biggest['stopResSeq']) # use min/max as could be running backwards stopResSeq = max(biggest['startResSeq'], biggest['stopResSeq']) sequence = "" for resSeq in range(startResSeq, stopResSeq + 1): resName = dsspP.getResName(resSeq, chainId) sequence += resName return sequence