Beispiel #1
0
    def test_build_tree(self):
        taxid_data_fp = get_data_path('taxid_data.txt')
        taxid_data = taxidtool.parse_taxid_data(taxid_data_fp)
        sequences_input_fp = get_data_path('sequences.fasta')
        inclusion_roots = ["6"]
        exclusion_roots = ["20"]

        results = taxidtool.build_tree(taxid_data, sequences_input_fp,
                                       inclusion_roots, exclusion_roots)

        self.assertEqual(set(results.keys()), {'6', '16', '18', '22'})
Beispiel #2
0
    def test_build_tree(self):
        taxid_data_fp = get_data_path('taxid_data.txt')
        taxid_data = taxidtool.parse_taxid_data(taxid_data_fp)
        sequences_input_fp = get_data_path('sequences.fasta')
        inclusion_roots = ["6"]
        exclusion_roots = ["20"]

        results = taxidtool.build_tree(taxid_data,
                                       sequences_input_fp,
                                       inclusion_roots,
                                       exclusion_roots)

        self.assertEqual(set(results.keys()), {'6', '16', '18', '22'})
Beispiel #3
0
    def test_write_taxid_data(self):
        nodes_fp = get_data_path('nodes.dmp')
        names_fp = get_data_path('names.dmp')
        gi_taxid_nucl_fp = get_data_path("gi_taxid_nucl.dmp")
        results = taxidtool.build_taxid_data(nodes_fp, names_fp,
                                             gi_taxid_nucl_fp)

        with tempfile.NamedTemporaryFile() as fh:
            taxidtool.write_taxid_data(results, fh.name)
            data = taxidtool.parse_taxid_data(fh.name)
            with tempfile.NamedTemporaryFile() as fh2:
                taxidtool.write_taxid_data(data, fh2.name)
                data2 = taxidtool.parse_taxid_data(fh2.name)
                self.assertEqual(set(data.keys()), set(data2.keys()))
Beispiel #4
0
    def test_write_taxid_data(self):
        nodes_fp = get_data_path('nodes.dmp')
        names_fp = get_data_path('names.dmp')
        gi_taxid_nucl_fp = get_data_path("gi_taxid_nucl.dmp")
        results = taxidtool.build_taxid_data(nodes_fp,
                                             names_fp,
                                             gi_taxid_nucl_fp)

        with tempfile.NamedTemporaryFile() as fh:
            taxidtool.write_taxid_data(results, fh.name)
            data = taxidtool.parse_taxid_data(fh.name)
            with tempfile.NamedTemporaryFile() as fh2:
                taxidtool.write_taxid_data(data, fh2.name)
                data2 = taxidtool.parse_taxid_data(fh2.name)
                self.assertEqual(set(data.keys()), set(data2.keys()))
Beispiel #5
0
    def test_build_taxid_data(self):
        nodes_fp = get_data_path('nodes.dmp')
        names_fp = get_data_path('names.dmp')
        gi_taxid_nucl_fp = get_data_path("gi_taxid_nucl.dmp")
        results = taxidtool.build_taxid_data(nodes_fp, names_fp,
                                             gi_taxid_nucl_fp)
        for taxid, record in results.items():
            self.assertEqual(record.taxid, taxid)
            self.assertEqual(record.sci_name, "Node%s" % taxid)
            for gi in record.assoc_gis:
                self.assertTrue(gi.startswith(taxid))

        self.assertEqual(results['23'].parents, ['1', '3', '7'])
        self.assertEqual(results['19'].children, [])
        self.assertEqual(results['23'].rank, 'species')
        self.assertEqual(set(results['1'].children), set(['3', '2']))
Beispiel #6
0
    def test_build_taxid_data(self):
        nodes_fp = get_data_path('nodes.dmp')
        names_fp = get_data_path('names.dmp')
        gi_taxid_nucl_fp = get_data_path("gi_taxid_nucl.dmp")
        results = taxidtool.build_taxid_data(nodes_fp,
                                             names_fp,
                                             gi_taxid_nucl_fp)
        for taxid, record in results.items():
            self.assertEqual(record.taxid, taxid)
            self.assertEqual(record.sci_name, "Node%s" % taxid)
            for gi in record.assoc_gis:
                self.assertTrue(gi.startswith(taxid))

        self.assertEqual(results['23'].parents, ['1', '3', '7'])
        self.assertEqual(results['19'].children, [])
        self.assertEqual(results['23'].rank, 'species')
        self.assertEqual(set(results['1'].children), set(['3', '2']))
Beispiel #7
0
    def test_build_branch(self):
        taxid_data_fp = get_data_path('taxid_data.txt')
        taxid_data = taxidtool.parse_taxid_data(taxid_data_fp)
        inclusion_roots = ["6"]

        results = taxidtool.build_branch(taxid_data, inclusion_roots)

        self.assertEqual(set(results.keys()), {'6', '16', '18', '20', '22'})
Beispiel #8
0
    def test_build_branch(self):
        taxid_data_fp = get_data_path('taxid_data.txt')
        taxid_data = taxidtool.parse_taxid_data(taxid_data_fp)
        inclusion_roots = ["6"]

        results = taxidtool.build_branch(taxid_data, inclusion_roots)

        self.assertEqual(set(results.keys()), {'6', '16', '18', '20', '22'})
Beispiel #9
0
    def test_build_gis_to_taxids(self):
        taxid_data_fp = get_data_path('taxid_data.txt')
        taxid_data = taxidtool.parse_taxid_data(taxid_data_fp)

        reverse_lookup = taxidtool.build_gis_to_taxids(taxid_data)

        for taxid, record in taxid_data.items():
            for gi in record.assoc_gis:
                self.assertEqual(taxid, reverse_lookup[gi])
Beispiel #10
0
    def test_build_gis_to_taxids(self):
        taxid_data_fp = get_data_path('taxid_data.txt')
        taxid_data = taxidtool.parse_taxid_data(taxid_data_fp)

        reverse_lookup = taxidtool.build_gis_to_taxids(taxid_data)

        for taxid, record in taxid_data.items():
            for gi in record.assoc_gis:
                self.assertEqual(taxid, reverse_lookup[gi])
Beispiel #11
0
 def test_parse_nodes_dmp(self):
     fp = get_data_path('nodes.dmp')
     results = taxidtool.parse_nodes_dmp(fp)
     for taxid, record in results.items():
         self.assertEqual(record.taxid, taxid)
     self.assertEqual(results['12'].parent_taxid, '4')
     self.assertEqual(results['2'].rank, 'kingdom')
     self.assertEqual(results['5'].parent_taxid, '3')
     self.assertEqual(results['8'].rank, 'species')
Beispiel #12
0
 def test_parse_nodes_dmp(self):
     fp = get_data_path('nodes.dmp')
     results = taxidtool.parse_nodes_dmp(fp)
     for taxid, record in results.items():
         self.assertEqual(record.taxid, taxid)
     self.assertEqual(results['12'].parent_taxid, '4')
     self.assertEqual(results['2'].rank, 'kingdom')
     self.assertEqual(results['5'].parent_taxid, '3')
     self.assertEqual(results['8'].rank, 'species')
Beispiel #13
0
 def test_prepare(self):
     indiana = Indiana(self.yax_dir, pipeline=self.arch_config_fp)
     with tempfile.NamedTemporaryFile(mode='w', dir=self.yax_dir,
                                      suffix='.ini') as fh:
         config = configparser.ConfigParser()
         config.read(get_data_path('test_run.ini'))
         config['module3']['input_file'] = self.arch_config_fp
         config['module4']['input_dir'] = self.yax_dir
         config.write(fh)
         fh.flush()
         indiana.prepare(fh.name[:-4])
Beispiel #14
0
 def test_prepare(self):
     indiana = Indiana(self.yax_dir, pipeline=self.arch_config_fp)
     with tempfile.NamedTemporaryFile(mode='w',
                                      dir=self.yax_dir,
                                      suffix='.ini') as fh:
         config = configparser.ConfigParser()
         config.read(get_data_path('test_run.ini'))
         config['module3']['input_file'] = self.arch_config_fp
         config['module4']['input_dir'] = self.yax_dir
         config.write(fh)
         fh.flush()
         indiana.prepare(fh.name[:-4])
Beispiel #15
0
    def test_parse_taxid_data(self):
        taxid_data_fp = get_data_path('taxid_data.txt')
        results = taxidtool.parse_taxid_data(taxid_data_fp)

        for taxid, record in results.items():
            self.assertEqual(record.taxid, taxid)
            self.assertEqual(record.sci_name, "Node%s" % taxid)
            for gi in record.assoc_gis:
                self.assertTrue(gi.startswith(taxid))

        self.assertEqual(results['23'].parents, ['1', '3', '7'])
        self.assertEqual(results['19'].children, [])
        self.assertEqual(results['23'].rank, 'species')
        self.assertEqual(set(results['1'].children), set(['3', '2']))
Beispiel #16
0
    def test_parse_taxid_data(self):
        taxid_data_fp = get_data_path('taxid_data.txt')
        results = taxidtool.parse_taxid_data(taxid_data_fp)

        for taxid, record in results.items():
            self.assertEqual(record.taxid, taxid)
            self.assertEqual(record.sci_name, "Node%s" % taxid)
            for gi in record.assoc_gis:
                self.assertTrue(gi.startswith(taxid))

        self.assertEqual(results['23'].parents, ['1', '3', '7'])
        self.assertEqual(results['19'].children, [])
        self.assertEqual(results['23'].rank, 'species')
        self.assertEqual(set(results['1'].children), set(['3', '2']))
Beispiel #17
0
 def setUp(self):
     self._yax_dir = tempfile.TemporaryDirectory()
     self.yax_dir = self._yax_dir.name
     self.arch_config_fp = get_data_path('arch_config.py',
                                         subfolder='test_pipeline')
Beispiel #18
0
 def test_parse_names_dmp(self):
     fp = get_data_path('names.dmp')
     results = taxidtool.parse_names_dmp(fp)
     for key, value in results.items():
         self.assertEqual(value, "Node%s" % key)
Beispiel #19
0
 def setUp(self):
     self._yax_dir = tempfile.TemporaryDirectory()
     self.yax_dir = self._yax_dir.name
     self.arch_config_fp = get_data_path('arch_config.py',
                                         subfolder='test_pipeline')
Beispiel #20
0
 def test_parse_names_dmp(self):
     fp = get_data_path('names.dmp')
     results = taxidtool.parse_names_dmp(fp)
     for key, value in results.items():
         self.assertEqual(value, "Node%s" % key)
Beispiel #21
0
 def test_parse_gi_taxid_dmp(self):
     fp = get_data_path('gi_taxid_nucl.dmp')
     results = taxidtool.parse_gi_taxid_dmp(fp)
     for taxid, gis in results.items():
         for gi in gis:
             self.assertTrue(gi.startswith(taxid))
Beispiel #22
0
 def test_parse_gi_taxid_dmp(self):
     fp = get_data_path('gi_taxid_nucl.dmp')
     results = taxidtool.parse_gi_taxid_dmp(fp)
     for taxid, gis in results.items():
         for gi in gis:
             self.assertTrue(gi.startswith(taxid))