def test_run_blossum(self):
        pairs_to_result = {
            ('ILDMDVVEGSAARFDCKVEGYPDPEVMWFKDDNPVKESRHFQIDYDEEGN', 'RDPVKTHEGWGVMLPCNPPAHYPGLSYRWLLNEFPNFIPTDGRHFVSQTT'):
            4,
            ('ILDMDVVEGSAARFDCKVEGYPDPEVMWFKDDNPVKESRHFQIDYDEEGN', 'ISDTEADIGSNLRWGCAAAGKPRPMVRWLRNGEPLASQNRVEVLA'):
            37,
            ('ILDMDVVEGSAARFDCKVEGYPDPEVMWFKDDNPVKESRHFQIDYDEEGN', 'RRLIPAARGGEISILCQPRAAPKATILWSKGTEILGNSTRVTVTSD'):
            -4,
            ('RDPVKTHEGWGVMLPCNPPAHYPGLSYRWLLNEFPNFIPTDGRHFVSQTT', 'ISDTEADIGSNLRWGCAAAGKPRPMVRWLRNGEPLASQNRVEVLA'):
            3,
            ('RDPVKTHEGWGVMLPCNPPAHYPGLSYRWLLNEFPNFIPTDGRHFVSQTT', 'RRLIPAARGGEISILCQPRAAPKATILWSKGTEILGNSTRVTVTSD'):
            9,
            ('ISDTEADIGSNLRWGCAAAGKPRPMVRWLRNGEPLASQNRVEVLA', 'RRLIPAARGGEISILCQPRAAPKATILWSKGTEILGNSTRVTVTSD'):
            24
        }

        sequence_file = '../data/guideline_tests/needlemanwunsch.fa'
        sequences = parse_fasta_files([sequence_file])
        # init the needleman
        settings = ScoringSettings(substitution_matrix=MatrixInfo.blosum62,
                                   gap_penalty=6,
                                   similarity=True)
        nw = NeedlemanWunsch(settings, complete_traceback=False, verbose=False)
        results = nw.pairwise_alignments(sequences)
        for result in results:
            seqs = (str(result.seq1.seq), str(result.seq2.seq))
            expected_score = pairs_to_result[seqs]
            self.assertEqual(result.score, expected_score)
Beispiel #2
0
 def run(self, sequences):
     """
     Run function for feng doolittle.
     :param sequences: a list of SeqRecords
     :return: MultiAlignment object
     """
     # perform pairwise sequence alignments
     nw = NeedlemanWunsch(settings=ScoringSettings(), verbose=self.verbose)
     alignments = nw.pairwise_alignments(sequences)
     alignments = sorted(alignments, key=lambda x: x.score, reverse=True)
     LOGGER.info("Needleman Wunsch Alignments:\n%s" % "\n".join([str(x) for x in alignments]))
     # Convert the scores to approximate pairwise evolutionary distances.
     for alignment in alignments:
         if self.similarity_scoring_method == SimilarityScoringMethod.SCORE2DISTANCE or \
                 self.similarity_scoring_method == SimilarityScoringMethod.SCORE2DISTANCE_EXTENDED:
             alignment.score = self.convert_to_evolutionary_distances(alignment, self.similarity_scoring_method,
                                                                      self.nw_settings)
         elif self.similarity_scoring_method == SimilarityScoringMethod.PURE_ALIGNMENT:
             alignment.score *= -1
         else:
             raise NotImplementedError(
                     f'similarity_scoring_method {self.similarity_scoring_method} not supported/implemented.')
     # 2. Construct a guide tree
     # init the xpgma
     xpgma = Xpgma(clustering_method=self.clustering_method)
     tree = xpgma.run(alignments)
     # 3. Start from the root of the tree to compute MSA.
     msa = self.compute_msa(tree)
     res_str = f'Tree: {tree}\n' + "\n".join([x.seq for x in msa.sequences])
     LOGGER.info(f'Tree: {tree}')
     LOGGER.info("GENERATED MSA:\nSCORE:%f\nMSA:\n\n%s" % (msa.score, res_str))
     return msa
 def test_calculate_guide_tree(self):
     nw = NeedlemanWunsch()
     sequences = utils.parse_fasta_files(["../data/xpgma/xpgma1.fa"])
     alignments = nw.pairwise_alignments(sequences)
     xpgma = Xpgma()
     xpgma.create_distance_matrix(alignments)
     guidetree = xpgma.calculate_guide_tree()
     expected = '((A:2.00,B:2.00):0.00,C:2.00)'
     expected_nodes = "{'A': A:2.00, 'B': B:2.00, 'C': C:2.00, 'AB': (A:2.00,B:2.00):0.00, 'ABC': ABC:NONE}"
     self.assertEqual(str(guidetree), expected)
     self.assertEqual(str(guidetree.nodes), expected_nodes)
 def test_convert_to_evolutionary_distances(self):
     # perform pairwise sequence alignments
     nw = NeedlemanWunsch()
     sequences = utils.parse_fasta_files(
         ["../data/feng_test/conversion.fa"])
     alignments = nw.pairwise_alignments(sequences)
     feng = FengDoolittle()
     # Convert the scores to approximate pairwise evolutionary distances.
     alignment = alignments[0]
     print(f'Alignment: {alignment} ')
     alignment.score = feng.convert_to_evolutionary_distances(alignment)
     print(f'Score: {alignment.score} ')
     self.assertAlmostEqual(first=2.70805020110221, second=alignment.score)
def run_xpgma():
    sequences = parse_input(args.input, args.file_filter)
    # perform pairwise sequence alignments
    nw = NeedlemanWunsch(verbose=args.verbose)
    alignments = nw.pairwise_alignments(sequences)
    LOGGER.info("Needleman Wunsch Alignments:\n%s" %
                "\n".join([str(x) for x in alignments]))
    # init the xpgma
    xpgma = Xpgma(clustering_method=args.mode)
    # create a distance matrix.
    xpgma.create_distance_matrix(alignments)
    # calculate the guide tree
    xpgma.calculate_guide_tree()
 def test_create_distance_matrix(self):
     nw = NeedlemanWunsch()
     sequences = utils.parse_fasta_files(["../data/xpgma/xpgma1.fa"])
     alignments = nw.pairwise_alignments(sequences)
     xpgma = Xpgma()
     xpgma.create_distance_matrix(alignments)
     self.assertDictEqual(
         xpgma.distances, {
             'A': {
                 'B': 4.0,
                 'C': 4.0
             },
             'B': {
                 'A': 4.0,
                 'C': 4.0
             },
             'C': {
                 'A': 4.0,
                 'B': 4.0
             }
         })