Ejemplo n.º 1
0
    def test_MrAIC(self):
        """Tests that creating genetrees with the --mraic
            command set calculates the correct evolutionary models.
        """

        test_data = open('alignments/3.oneliners', 'rU')
        mr_job = ProcessPhyloData([
            '-r', 'local', '--setup-cmd', 'mkdir -p tmp', '--gene-trees',
            '--mraic', '--archive=../gzips/osx.phylo.tar.gz#bin', "-"
        ])

        mr_job.sandbox(stdin=test_data)
        results = []
        with mr_job.make_runner() as runner:
            runner.run()
            for line in runner.stream_output():
                # Use the job's specified protocol to read the output
                key, value = mr_job.parse_output_line(line)
                results.append(value)

        # get models from output
        models = [
            item.split(" ")[1].strip("'").split('=')[-1] for item in results
        ]

        # check results.
        # result looks like: ['GTR', 'HKY', 'HKY'])
        self.assertIn('GTR', models)
        self.assertIn('HKY', models)
        self.assertEqual(len(results), 3)
    def test_MrAIC(self):
        """Tests that creating genetrees with the --mraic
            command set calculates the correct evolutionary models.
        """
        
        test_data = open('alignments/3.oneliners','rU')
        mr_job = ProcessPhyloData(['-r', 'local', '--setup-cmd', 
                                    'mkdir -p tmp','--gene-trees',
                                    '--mraic',
                                    '--archive=../gzips/osx.phylo.tar.gz#bin',
                                    "-"])
    
        mr_job.sandbox(stdin=test_data)
        results = []
        with mr_job.make_runner() as runner:
            runner.run()
            for line in runner.stream_output():
                # Use the job's specified protocol to read the output
                key, value = mr_job.parse_output_line(line)
                results.append(value) 
        
        # get models from output
        models = [ item.split(" ")[1].strip("'").split('=')[-1] for item in results]

        # check results.
        # result looks like: ['GTR', 'HKY', 'HKY'])
        self.assertIn('GTR', models)
        self.assertIn('HKY', models)
        self.assertEqual(len(results),3)
Ejemplo n.º 3
0
    def test_Bootstrapping(self):
        """Tests that bootstraps produces the correct number or replicates"""

        test_data = open('alignments/3.oneliners', 'rU')
        mr_job = ProcessPhyloData([
            '-r', 'local', '--setup-cmd', 'mkdir -p tmp', '--full-analysis',
            '--bootreps=5', '--archive=../gzips/osx.phylo.tar.gz#bin', "-"
        ])

        mr_job.sandbox(stdin=test_data)
        results = []
        with mr_job.make_runner() as runner:
            runner.run()
            for line in runner.stream_output():
                # Use the job's specified protocol to read the output
                key, value = mr_job.parse_output_line(line)
                results.append(value)

        self.assertEqual(len(results), 15)
Ejemplo n.º 4
0
    def test_GeneTrees(self):
        """Tests that creating genetrees with Cloudforest
            makes the appropriate number of trees.
        """

        test_data = open('alignments/3.oneliners', 'rU')
        mr_job = ProcessPhyloData([
            '-r', 'local', '--setup-cmd', 'mkdir -p tmp', '--gene-trees',
            '--archive=../gzips/osx.phylo.tar.gz#bin', "-"
        ])
        mr_job.sandbox(stdin=test_data)
        results = []
        with mr_job.make_runner() as runner:
            runner.run()
            for line in runner.stream_output():
                # Use the job's specified protocol to read the output
                key, value = mr_job.parse_output_line(line)
                results.append(value)
        self.assertEqual(len(results), 3)
    def test_Bootstrapping(self):
        """Tests that bootstraps produces the correct number or replicates"""

        test_data = open('alignments/3.oneliners','rU')
        mr_job = ProcessPhyloData(['-r', 'local', 
                                    '--setup-cmd', 'mkdir -p tmp',
                                    '--full-analysis',
                                    '--bootreps=5',
                                    '--archive=../gzips/osx.phylo.tar.gz#bin',
                                    "-"])
    
        mr_job.sandbox(stdin=test_data)
        results = []
        with mr_job.make_runner() as runner:
            runner.run()
            for line in runner.stream_output():
                # Use the job's specified protocol to read the output
                key, value = mr_job.parse_output_line(line)
                results.append(value)

        self.assertEqual(len(results), 15)
Ejemplo n.º 6
0
    def test_get_genetrees(self):
        """[MrJob] Generate genetrees"""

        test_data = open('alignments/3.oneliners', 'rU')
        mr_job = ProcessPhyloData([
                '-r', 'local',
                '--setup-cmd',
                'mkdir -p tmp',
                '--gene-trees',
                '--archive=../gzips/osx.phylo.tar.gz#bin',
                "-"]
            )
        mr_job.sandbox(stdin=test_data)
        results = []
        with mr_job.make_runner() as runner:
            runner.run()
            for line in runner.stream_output():
                # Use the job's specified protocol to read the output
                key, value = mr_job.parse_output_line(line)
                results.append(value)
        #TODO:better checking of tree structure here - do distance checking
        self.assertEqual(len(results), 3)
    def test_GeneTrees(self):
        """Tests that creating genetrees with Cloudforest
            makes the appropriate number of trees.
        """

        test_data = open('alignments/3.oneliners', 'rU')
        mr_job = ProcessPhyloData([
                '-r', 'local',
                '--setup-cmd',
                'mkdir -p tmp',
                '--gene-trees',
                '--archive=../gzips/osx.phylo.tar.gz#bin',
                "-"]
            )
        mr_job.sandbox(stdin=test_data)
        results = []
        with mr_job.make_runner() as runner:
            runner.run()
            for line in runner.stream_output():
                # Use the job's specified protocol to read the output
                key, value = mr_job.parse_output_line(line)
                results.append(value)
        self.assertEqual(len(results), 3)
Ejemplo n.º 8
0
 def test_MrAIC(self):
     """[MrJob] Select model and generate genetrees"""
     test_data = open('alignments/3.oneliners', 'rU')
     mr_job = ProcessPhyloData([
             '-r', 'local',
             '--setup-cmd',
             'mkdir -p tmp',
             '--gene-trees',
             '--mraic',
             '--archive=../gzips/osx.phylo.tar.gz#bin',
             '-']
         )
     mr_job.sandbox(stdin=test_data)
     results = []
     with mr_job.make_runner() as runner:
         runner.run()
         for line in runner.stream_output():
             # Use the job's specified protocol to read the output
             key, value = mr_job.parse_output_line(line)
             results.append(value)
     # check models
     expected_models = ['HKY', 'GTR', 'HKY']
     observed_models = self.get_models_from_mrjob_out(results)
     assert observed_models == expected_models