class TestAlign2(unittest.TestCase): def setUp(self): self.seq1 = "TTACCGGCCAACTAA" self.seq2 = "ACCGTGTCACTAC" self.SW = SmithWaterman(self.seq1, self.seq2) def test_align(self): expected_seq1 = "ACCG-GCCAACTA" expected_seq2 = "ACCGTGTCA-CTA" output_seq1, output_seq2 = self.SW.align() print(self.SW.scoreMatrix) print("output seq1:", output_seq1) print("output seq2:", output_seq2) self.assertEqual(output_seq2, expected_seq2) self.assertEqual(output_seq1, expected_seq1)
class TestAlign(unittest.TestCase): def setUp(self): self.seq1 = "AGCACACA" self.seq2 = "ACACACTA" self.SW = SmithWaterman(self.seq1, self.seq2) def test_align(self): expected_seq1 = "AGCACAC-A" expected_seq2 = "A-CACACTA" output_seq1, output_seq2 = self.SW.align() print(self.SW.scoreMatrix) print(output_seq1) print(output_seq2) self.assertEqual(output_seq2, expected_seq2) self.assertEqual(output_seq1, expected_seq1)
class TestAlign(unittest.TestCase): def setUp(self): self.seq1 = "AGCACACA" self.seq2 = "ACACACTA" self.SW = SmithWaterman(self.seq1, self.seq2) def test_align(self): expected_seq1 = "AGCACAC-A" expected_seq2 = "A-CACACTA" output_seq1, output_seq2 = self.SW.align() print(self.SW.scoreMatrix) print(output_seq1) print(output_seq2) self.assertEqual(output_seq2, expected_seq2) self.assertEqual(output_seq1, expected_seq1)
class TestAlign2(unittest.TestCase): def setUp(self): self.seq1 = "TTACCGGCCAACTAA" self.seq2 = "ACCGTGTCACTAC" self.SW = SmithWaterman(self.seq1, self.seq2) def test_align(self): expected_seq1 = "ACCG-GCCAACTA" expected_seq2 = "ACCGTGTCA-CTA" output_seq1, output_seq2 = self.SW.align() print(self.SW.scoreMatrix) print("output seq1:",output_seq1) print("output seq2:",output_seq2) self.assertEqual(output_seq2, expected_seq2) self.assertEqual(output_seq1, expected_seq1)
def align(ref, match, matrix, algorithm, gapOpen, gapExtend, ksdsspCache, ssMatrix=defaults[SS_SCORES], ssFraction=defaults[SS_MIXTURE], gapOpenHelix=defaults[HELIX_OPEN], gapOpenStrand=defaults[STRAND_OPEN], gapOpenOther=defaults[OTHER_OPEN], computeSS=defaults[COMPUTE_SS]): similarityMatrix = SmithWaterman.matrices[matrix] ssf = ssFraction ssm = ssMatrix if ssf is not None and ssf is not False and computeSS: needCompute = [] if ref.molecule not in ksdsspCache: needCompute.append(ref.molecule) ksdsspCache.add(ref.molecule) if match.molecule not in ksdsspCache: needCompute.append(match.molecule) ksdsspCache.add(match.molecule) if needCompute: from chimera.initprefs import ksdsspPrefs, \ KSDSSP_ENERGY, KSDSSP_HELIX_LENGTH, \ KSDSSP_STRAND_LENGTH from Midas import ksdssp ksdssp(needCompute, energy=ksdsspPrefs[KSDSSP_ENERGY], helixLen=ksdsspPrefs[KSDSSP_HELIX_LENGTH], strandLen=ksdsspPrefs[KSDSSP_STRAND_LENGTH]) if algorithm == "nw": score, seqs = NeedlemanWunsch.nw(ref, match, scoreGap=-gapExtend, scoreGapOpen=0-gapOpen, similarityMatrix=similarityMatrix, returnSeqs=True, ssMatrix=ssMatrix, ssFraction=ssFraction, gapOpenHelix=-gapOpenHelix, gapOpenStrand=-gapOpenStrand, gapOpenOther=-gapOpenOther) gappedRef, gappedMatch = seqs elif algorithm =="sw": refName = ref.molecule.name if not ref.name.startswith("principal"): refName += ", " + ref.name gappedRef = StructureSequence(ref.molecule, refName) matchName = match.molecule.name if not match.name.startswith("principal"): matchName += ", " + match.name gappedMatch = StructureSequence(match.molecule, matchName) def ssLet(r): if not r: return ' ' if r.isHelix: return 'H' elif r.isStrand: return 'S' return 'O' if ssf is False or ssf is None: ssf = 0.0 ssm = None if ssm: # account for missing structure (blank SS letter) ssm = ssm.copy() for let in "HSO ": ssm[(let, ' ')] = 0.0 ssm[(' ', let)] = 0.0 score, alignment = SmithWaterman.align(str(ref), str(match), similarityMatrix, float(gapOpen), float(gapExtend), gapChar=".", ssMatrix=ssm, ssFraction=ssf, gapOpenHelix=float(gapOpenHelix), gapOpenStrand=float(gapOpenStrand), gapOpenOther=float(gapOpenOther), ss1="".join([ssLet(r) for r in ref.residues]), ss2="".join([ssLet(r) for r in match.residues])) gappedRef.extend(alignment[0]) gappedMatch.extend(alignment[1]) # Smith-Waterman may not be entirety of sequences... for orig, gapped in [(ref, gappedRef), (match, gappedMatch)]: ungapped = gapped.ungapped() for i in range(len(orig) - len(ungapped) + 1): if ungapped == orig[i:i+len(ungapped)]: break else: raise ValueError("Smith-Waterman result not" " a subsequence of original sequence") gapped.residues = orig.residues[i:i+len(ungapped)] resMap = {} gapped.resMap = resMap for j in range(len(ungapped)): resMap[gapped.residues[j]] = j else: raise ValueError("Unknown sequence alignment algorithm: %s" % algorithm) return score, gappedRef, gappedMatch