def test_run_blossum(self): pairs_to_result = { ('ILDMDVVEGSAARFDCKVEGYPDPEVMWFKDDNPVKESRHFQIDYDEEGN', 'RDPVKTHEGWGVMLPCNPPAHYPGLSYRWLLNEFPNFIPTDGRHFVSQTT'): 4, ('ILDMDVVEGSAARFDCKVEGYPDPEVMWFKDDNPVKESRHFQIDYDEEGN', 'ISDTEADIGSNLRWGCAAAGKPRPMVRWLRNGEPLASQNRVEVLA'): 37, ('ILDMDVVEGSAARFDCKVEGYPDPEVMWFKDDNPVKESRHFQIDYDEEGN', 'RRLIPAARGGEISILCQPRAAPKATILWSKGTEILGNSTRVTVTSD'): -4, ('RDPVKTHEGWGVMLPCNPPAHYPGLSYRWLLNEFPNFIPTDGRHFVSQTT', 'ISDTEADIGSNLRWGCAAAGKPRPMVRWLRNGEPLASQNRVEVLA'): 3, ('RDPVKTHEGWGVMLPCNPPAHYPGLSYRWLLNEFPNFIPTDGRHFVSQTT', 'RRLIPAARGGEISILCQPRAAPKATILWSKGTEILGNSTRVTVTSD'): 9, ('ISDTEADIGSNLRWGCAAAGKPRPMVRWLRNGEPLASQNRVEVLA', 'RRLIPAARGGEISILCQPRAAPKATILWSKGTEILGNSTRVTVTSD'): 24 } sequence_file = '../data/guideline_tests/needlemanwunsch.fa' sequences = parse_fasta_files([sequence_file]) # init the needleman settings = ScoringSettings(substitution_matrix=MatrixInfo.blosum62, gap_penalty=6, similarity=True) nw = NeedlemanWunsch(settings, complete_traceback=False, verbose=False) results = nw.pairwise_alignments(sequences) for result in results: seqs = (str(result.seq1.seq), str(result.seq2.seq)) expected_score = pairs_to_result[seqs] self.assertEqual(result.score, expected_score)
def run(self, sequences): """ Run function for feng doolittle. :param sequences: a list of SeqRecords :return: MultiAlignment object """ # perform pairwise sequence alignments nw = NeedlemanWunsch(settings=ScoringSettings(), verbose=self.verbose) alignments = nw.pairwise_alignments(sequences) alignments = sorted(alignments, key=lambda x: x.score, reverse=True) LOGGER.info("Needleman Wunsch Alignments:\n%s" % "\n".join([str(x) for x in alignments])) # Convert the scores to approximate pairwise evolutionary distances. for alignment in alignments: if self.similarity_scoring_method == SimilarityScoringMethod.SCORE2DISTANCE or \ self.similarity_scoring_method == SimilarityScoringMethod.SCORE2DISTANCE_EXTENDED: alignment.score = self.convert_to_evolutionary_distances(alignment, self.similarity_scoring_method, self.nw_settings) elif self.similarity_scoring_method == SimilarityScoringMethod.PURE_ALIGNMENT: alignment.score *= -1 else: raise NotImplementedError( f'similarity_scoring_method {self.similarity_scoring_method} not supported/implemented.') # 2. Construct a guide tree # init the xpgma xpgma = Xpgma(clustering_method=self.clustering_method) tree = xpgma.run(alignments) # 3. Start from the root of the tree to compute MSA. msa = self.compute_msa(tree) res_str = f'Tree: {tree}\n' + "\n".join([x.seq for x in msa.sequences]) LOGGER.info(f'Tree: {tree}') LOGGER.info("GENERATED MSA:\nSCORE:%f\nMSA:\n\n%s" % (msa.score, res_str)) return msa
def test_calculate_guide_tree(self): nw = NeedlemanWunsch() sequences = utils.parse_fasta_files(["../data/xpgma/xpgma1.fa"]) alignments = nw.pairwise_alignments(sequences) xpgma = Xpgma() xpgma.create_distance_matrix(alignments) guidetree = xpgma.calculate_guide_tree() expected = '((A:2.00,B:2.00):0.00,C:2.00)' expected_nodes = "{'A': A:2.00, 'B': B:2.00, 'C': C:2.00, 'AB': (A:2.00,B:2.00):0.00, 'ABC': ABC:NONE}" self.assertEqual(str(guidetree), expected) self.assertEqual(str(guidetree.nodes), expected_nodes)
def test_convert_to_evolutionary_distances(self): # perform pairwise sequence alignments nw = NeedlemanWunsch() sequences = utils.parse_fasta_files( ["../data/feng_test/conversion.fa"]) alignments = nw.pairwise_alignments(sequences) feng = FengDoolittle() # Convert the scores to approximate pairwise evolutionary distances. alignment = alignments[0] print(f'Alignment: {alignment} ') alignment.score = feng.convert_to_evolutionary_distances(alignment) print(f'Score: {alignment.score} ') self.assertAlmostEqual(first=2.70805020110221, second=alignment.score)
def run_xpgma(): sequences = parse_input(args.input, args.file_filter) # perform pairwise sequence alignments nw = NeedlemanWunsch(verbose=args.verbose) alignments = nw.pairwise_alignments(sequences) LOGGER.info("Needleman Wunsch Alignments:\n%s" % "\n".join([str(x) for x in alignments])) # init the xpgma xpgma = Xpgma(clustering_method=args.mode) # create a distance matrix. xpgma.create_distance_matrix(alignments) # calculate the guide tree xpgma.calculate_guide_tree()
def test_create_distance_matrix(self): nw = NeedlemanWunsch() sequences = utils.parse_fasta_files(["../data/xpgma/xpgma1.fa"]) alignments = nw.pairwise_alignments(sequences) xpgma = Xpgma() xpgma.create_distance_matrix(alignments) self.assertDictEqual( xpgma.distances, { 'A': { 'B': 4.0, 'C': 4.0 }, 'B': { 'A': 4.0, 'C': 4.0 }, 'C': { 'A': 4.0, 'B': 4.0 } })