Ejemplo n.º 1
0
    def test_build_tree(self, temp_mock):
        builder = TreeBuilder()

        # I could mock _run_raxml but, because I'm using a regex to parse the
        # results, I want to know straight away if the output of RAxML changes in a
        # way that breaks this.

        random_file = tempfile.NamedTemporaryFile('w', delete=False)
        random_filename = random_file.name
        random_folder = tempfile.mkdtemp()

        temp_mock.NamedTemporaryFile.return_value = random_file
        temp_mock.mkdtemp.return_value = random_folder

        fasta_file = open(os.path.join(test_data(), 'animals.mfa'), 'r')
        builder.load_fasta_sequences(fasta_file)
        fasta_file.close()

        tree = builder.build_tree()

        tree_nodes = sorted([node.name for node in tree.get_terminals()])
        expected_tree_nodes = [
            "Carp", "Chicken", "Cow", "Frog", "Human", "Loach", "Mouse", "Rat",
            "Seal", "Whale"
        ]

        self.assertIsInstance(tree, Bio.Phylo.Newick.Tree)
        self.assertEqual(tree_nodes, expected_tree_nodes)
        self.assertFalse(os.path.isdir(random_folder))
        self.assertFalse(os.path.isfile(random_filename))
Ejemplo n.º 2
0
    def test_run_fastml(self):
        builder = TreeBuilder()

        fasta_filename = os.path.join(test_data(), 'animals.mfa')
        tree_filename = os.path.join(test_data(),
                                     'animals.terminal_nodes.newick')
        output_directory = tempfile.mkdtemp()

        # fastml doesn't like comments in fasta files, guesses it is a different
        # format and gets confused when a sequence has a '>' in it.  I'm therefore
        # creating a temporary file without the comments in it.
        fasta_file_without_comments = create_fasta_without_comments(
            fasta_filename)

        fastml_stdout, fastml_stderr = builder._run_fastml(
            'fastml', {}, tree_filename, fasta_file_without_comments,
            output_directory)

        self.assertTrue(
            os.path.isfile(os.path.join(output_directory, 'all_nodes.newick')))
        self.assertTrue(
            os.path.isfile(os.path.join(output_directory, 'all_nodes.mfa')))

        shutil.rmtree(output_directory)
        os.remove(fasta_file_without_comments)
Ejemplo n.º 3
0
    def test_create_temporary_phylip(self, temp_mock):
        temp_file = StringIO()
        temp_mock.return_value = temp_file

        builder = TreeBuilder()

        fasta_file = StringIO("""\
>cluster_A
NNAACAAAANN
>cluster_B
CCAACAAAANN
""")

        expected_output = """\
 2 11
cluster_A  NNAACAAAAN N
cluster_B  CCAACAAAAN N
"""

        builder.load_fasta_sequences(fasta_file)
        output_file = builder._create_temporary_phylip(builder.sequences)

        self.assertEqual(output_file, temp_file)

        output_file.seek(0)
        self.assertEqual(output_file.read(), expected_output)
Ejemplo n.º 4
0
    def test_write_tree(self):
        builder = TreeBuilder()

        tree_filename = os.path.join(test_data(),
                                     'animals.terminal_nodes.newick')
        tree = Bio.Phylo.read(tree_filename, 'newick')

        output = StringIO()

        builder._write_tree(tree, output)

        with open(tree_filename, 'r') as tree_file:
            expected = tree_file.read()
        output.seek(0)

        self.assertEqual(output.read(), expected)
Ejemplo n.º 5
0
    def test_write_output(self):
        builder = TreeBuilder()
        builder.sequences = {"seq1": "a sequence object"}
        builder.tree = "Tree"
        builder.sequences_output_file = StringIO()
        builder.tree_output_file = StringIO()

        builder._write_tree = MagicMock()
        builder._write_sequences = MagicMock()

        builder.write_output()

        builder._write_tree.assert_called_with("Tree",
                                               builder.tree_output_file)
        builder._write_sequences.assert_called_with(
            ["a sequence object"], builder.sequences_output_file)
Ejemplo n.º 6
0
    def test_run_raxml(self):
        builder = TreeBuilder()

        fasta_file = open(os.path.join(test_data(), 'animals.mfa'), 'r')
        builder.load_fasta_sequences(fasta_file)
        fasta_file.close()

        phylip_file = builder._create_temporary_phylip(builder.sequences)
        phylip_file.close()

        output_directory = tempfile.mkdtemp()
        raxml_stdout, raxml_stderr = builder._run_raxml(
            'raxmlHPC', {}, phylip_file.name, output_directory)

        self.assertTrue('Best-scoring ML tree written to' in raxml_stdout)

        os.remove(phylip_file.name)
        shutil.rmtree(output_directory)
Ejemplo n.º 7
0
    def test_write_sequences(self):
        builder = TreeBuilder()

        fasta_filename = os.path.join(test_data(), 'animals.mfa')
        fasta_file = open(fasta_filename, 'r')
        sequences = Bio.SeqIO.parse(fasta_file, 'fasta')

        output = StringIO()

        builder._write_sequences(sequences, output)

        fasta_file.close()

        with open(fasta_filename, 'r') as fasta_file:
            expected = fasta_file.readlines()
            expected = "".join(expected[5:])  # Remove comments from the top
        output.seek(0)

        self.assertEqual(output.read(), expected)
Ejemplo n.º 8
0
    def test_get_raxml_tree_file(self):
        builder = TreeBuilder()

        fake_stdout = """\
Inference[0] final GAMMA-based Likelihood: -385.279958 tree written to file/tmp/tmpaA9uJN/RAxML_result.raxml_output


Starting final GAMMA-basedthorough Optimization on tree 0 likelihood -385.279958 .... 

FinalGAMMA-based Score of best tree -385.279958

Program execution info written to/tmp/tmpaA9uJN/RAxML_info.raxml_output
Best-scoring ML tree written to:  /tmp/tmpaA9uJN/RAxML_bestTree.raxml_output

Overall execution time: 0.219131  secs or 0.000061 hours or 0.000003 days

"""

        output = builder._get_raxml_tree_file(fake_stdout)
        self.assertEqual(output, '/tmp/tmpaA9uJN/RAxML_bestTree.raxml_output')

        fake_stdout = """\
Final tree written to:      /tmp/tmpaA9uJN/raxml_output
"""

        output = builder._get_raxml_tree_file(fake_stdout)
        self.assertEqual(output, '/tmp/tmpaA9uJN/raxml_output')

        fake_stdout = """\
Oh dear

Something went wrong

I've written something to /tmp/wrong.log

But it is definitly not what you're looking for.

Sorry
"""

        output = builder._get_raxml_tree_file(fake_stdout)
        self.assertEqual(output, None)
Ejemplo n.º 9
0
    def test_add_hereditary_nodes(self, temp_mock):
        builder = TreeBuilder()

        output_directory = tempfile.mkdtemp()
        temp_mock.mkdtemp.return_value = output_directory
        temp_mock.NamedTemporaryFile.side_effect = \
            (tempfile.NamedTemporaryFile(dir=output_directory, delete=False) for i in xrange(10))

        fasta_filename = os.path.join(test_data(), 'animals.mfa')
        with open(fasta_filename) as fasta_file:
            builder.load_fasta_sequences(fasta_file)

        tree_filename = os.path.join(test_data(),
                                     'animals.terminal_nodes.newick')
        builder.tree = Bio.Phylo.read(tree_filename, 'newick')

        builder.add_hereditary_nodes()

        self.assertIsInstance(builder.tree, Bio.Phylo.Newick.Tree)
        tree_nodes = sorted([node.name for node in builder.tree.find_clades()])

        self.assertItemsEqual(tree_nodes, builder.sequences.keys())
        self.assertEqual(len(tree_nodes), 19)
        self.assertFalse(os.path.isdir(output_directory))