def test_sliceIsSequenceCopy(self): prot = Sequence("ABCD") subProt = prot[1:3] self.assertEquals(subProt, Sequence("BC")) del subProt[0] self.assertEquals(subProt, Sequence("C")) self.assertEquals(prot, Sequence("ABCD"))
def __align(self, i, j): """ Yields all best alignments starting from row i and column j. """ if self._resultCount == 0: return # Local alignments are complete when we reach a null score # Global alignments are complete when we reach the beginning of the matrix # Semiglobal alignments are complete when we reach an edge of the matrix if (self._alignMode == "local" and self._alignMatrix[i][j] == 0) \ or (self._alignMode == "global" and i == 0 and j == 0) \ or (self._alignMode == "semiglobal" and (i == 0 or j == 0)): alignDescription = self._alignMode + \ ("-suboptimal" + "(" + str(self._subOptimalDepth) + ")") * self._isSuboptimal # Create result (Sequence obj. for MSA, Aligned obj. otherwise) if self._isMultiple: result = Aligned(self._alignedColSeq, Sequence(self._alignedRowSeq, self._rowSeq.getDescription()), j, i, alignDescription, self._currentAlignScore, self._scoreMatrix, True) else: result = Aligned(Sequence(self._alignedColSeq, self._colSeq.getDescription()), Sequence(self._alignedRowSeq, self._rowSeq.getDescription()), j, i, alignDescription, self._currentAlignScore, self._scoreMatrix, False) # Remember first best alignment for subobtimal lookup if self._bestAlignPath == []: self._bestAlignPath = deepcopy(self._currentAlignPath) self._resultCount -= 1 yield result else: for origin in self._originMatrix[i][j]: self._currentAlignPath.append((i, j)) if origin == "T": # top self._alignedColSeq.insert(0, "-") self._alignedRowSeq.insert(0, self._rowSeq[i - 1]) yield from self.__align(i - 1, j) elif origin == "D": # diagonal self._alignedColSeq.insert(0, self._colSeq[j - 1]) self._alignedRowSeq.insert(0, self._rowSeq[i - 1]) yield from self.__align(i - 1, j - 1) elif origin == "L": # left self._alignedColSeq.insert(0, self._colSeq[j - 1]) self._alignedRowSeq.insert(0, "-") yield from self.__align(i, j - 1) else: raise ValueError("Origin must be T (top), D (diagonal) or L (left)") self._currentAlignPath.pop() del self._alignedColSeq[0] del self._alignedRowSeq[0]
def __initialize(self, seqA, seqB, iniGapPenalty, extGapPenalty): """ Sets all initial values for required data structures. """ # Sequences if len(seqA) == 0 or len(seqB) == 0: raise ValueError("Sequences to align cannot be empty") self._colSeq = seqA self._rowSeq = seqB # Alignments self._isSuboptimal = False self._subOptimalDepth = 0 self._alignedRowSeq = Sequence() if self._isMultiple: # In MSA (align with PSSM) there is no column Sequence self._alignedColSeq = [] else: self._alignedColSeq = Sequence() self._maxAlignScore = 0 # Maximum score found while aligning self._maxScoreRows = [] # Rows of maximum score self._maxScoreCols = [] # Columns of maximum score self._currentAlignPath = [] # indexes of the current alignment self._bestAlignPath = [] # indexes of the alignment with the best score # Gap penalties self._iniGapPenalty = iniGapPenalty self._extGapPenalty = iniGapPenalty if extGapPenalty is None else extGapPenalty # Matrices self._alignMatrix = [[0 for i in range(len(self._colSeq) + 1)] \ for j in range(len(self._rowSeq) + 1)] self._rowGapMatrix = deepcopy(self._alignMatrix) self._colGapMatrix = deepcopy(self._alignMatrix) self._originMatrix = [["" for i in range(len(self._colSeq) + 1)] \ for j in range(len(self._rowSeq) + 1)] # Global alignment : first line and colunm have initial scores and origins if self._alignMode == "global": self.__initAlignValues() # Fill all matrices for row in range(1, len(self._rowSeq) + 1): for col in range(1, len(self._colSeq) + 1): self.__fill(row, col) # Find best scores self.__findBestScore()
def test_setNameMode(self): prot = Sequence("A") self.assertTrue(len(str(prot)) == 1) prot.setNameMode("medium") self.assertTrue(len(str(prot)) == 3) prot.setNameMode("long") self.assertTrue(len(str(prot)) > 3)
def __init__(self, path="", description="", ignore=None): """ Creates a Score object. If 'path' is provided, loads the Score values from an iij file. Otherwise, creates a Score for all possible AminoAcids with values 0. """ self._description = description self._ignore = Sequence(ignore) self._matrix = [] self._aaOrder = {} self._aaSequence = Sequence() # If path is provided, load directly from iij file if path != "": with open(path, 'r') as file: foundAAOrder = False # Have we found the line with the amino acid values and order yet? for line in file: if line[0] != "#": # Comments if not foundAAOrder: # Read aa values and order for aa in line.split(): self._aaSequence.extend(aa) self._aaOrder = { aa: index for aa, index in zip( self._aaSequence, range(len(self._aaSequence))) } foundAAOrder = True else: # Read matrix values self._matrix.append([int(v) for v in line.split()]) # Otherwise initialize matrix with 0 else: lineSize = 1 for aa in AminoAcid.getAllNames(): if AminoAcid(aa) not in self._ignore: self._aaSequence.extend(aa) self._aaOrder[self._aaSequence[-1]] = lineSize - 1 self._matrix.append([0 for i in range(lineSize)]) lineSize += 1
def getSequencesFromFasta(path): """ Loads the FASTA file located in 'path' and yields the Sequences it contains. """ with open(path, 'r') as fastaFile: newProtein = None for line in fastaFile: line_s = line.strip() if line_s != "" and line_s[0] == ">": if newProtein is not None: yield newProtein newProtein = Sequence(None, line_s[1:]) else: newProtein.extend(line_s) if len(newProtein) > 0: yield newProtein
def test_differentValuesNotEqual(self): prot1 = Sequence("AB") prot2 = Sequence("BA") self.assertNotEqual(prot1, prot2)
def test_getDescription_staysAfterCopy(self): description = "this is a description!" prot = Sequence("A", description) prot2 = Sequence(prot) self.assertEquals(prot2.getDescription(), description)
def test_differentNamesEqual(self): prot1 = Sequence(["Methionine", "pyrrolysine", "CYSTEINE", "K"]) prot2 = Sequence("MOCK") self.assertEquals(prot1, prot2)
def test_copyEqualsOriginal(self): prot1 = Sequence("methionine") prot2 = Sequence(prot1) self.assertEquals(prot1, prot2)
def test_count(self): prot = Sequence("ABCAX") self.assertEquals(prot.count(AminoAcid("A")), 2)
def test_remove(self): prot = Sequence("TRANKIL") prot.remove(AminoAcid("A")) self.assertEquals(prot, Sequence("TRNKIL"))
def test_extend(self): prot = Sequence("OK") prot.extend(Sequence("L")) prot.extend("M") self.assertEquals(prot, Sequence("OKLM"))
def test_insert(self): prot = Sequence("KL") prot.insert(0, "M") self.assertEquals(prot, Sequence("MKL"))
def test_setSeparator(self): prot = Sequence("AE") prot.setSeparator("~") self.assertEquals(len(str(prot).split("~")), 2)
def test_sliceSingleItemIsAminoAcid(self): prot = Sequence("ABC") aa = prot[0] self.assertEquals(aa, AminoAcid("A"))
def test_sliceMultipleItemsIsSequence(self): prot = Sequence("ABC") seq = prot[0:2] self.assertEquals(seq, Sequence("AB"))
def test_delete(self): prot = Sequence("YEP") del prot[1] self.assertEquals(prot, Sequence("YP"))