예제 #1
0
 def test_retrieve_hierarchy(self):
     scrapper = NCBITaxonomyScrapper(self.file)
     expected_list = [{
         'name': 'cellular organisms',
         'rank': 'no rank',
         'tax_id': '131567'
     }, {
         'name': 'Bacteria',
         'rank': 'superkingdom',
         'tax_id': '2'
     }, {
         'name': 'Terrabacteria group',
         'rank': 'clade',
         'tax_id': '1783272'
     }, {
         'name': 'Firmicutes',
         'rank': 'phylum',
         'tax_id': '1239'
     }, {
         'name': 'Bacilli',
         'rank': 'class',
         'tax_id': '91061'
     }, {
         'name': 'Lactobacillales',
         'rank': 'order',
         'tax_id': '186826'
     }]
     tested_list = scrapper.retrieve_hierarchy()
     self.assertListEqual(tested_list, expected_list)
예제 #2
0
 def test_retrieve_current_item(self):
     scrapper = NCBITaxonomyScrapper(self.file)
     expected_dict = {
         'rank': 'family',
         'tax_id': '33958',
         'name': 'Lactobacillaceae'
     }
     tested_dict = scrapper.retrieve_current_item()
     self.assertDictEqual(tested_dict, expected_dict)
예제 #3
0
 def get(self, tax_id: int, get_model: bool = True) -> Union[NCBITaxonomyScrapper.model, dict]:
     """
     :param tax_id: NCBI taxonomy ID to retrieve data from
     :param get_model: return pydantic model (return dict if False)
     """
     full_url = f"https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id={tax_id}&mode=info"
     response = self.session.get(full_url)
     self.last_url_requested = full_url
     response.raise_for_status()
     scrapper = NCBITaxonomyScrapper(response.content)
     if not scrapper.result_found():
         raise requests.exceptions.HTTPError(f"{tax_id} not found in NCBI taxonomy db.")
     if get_model:
         return scrapper.validated_entry
     return scrapper.validated_entry.dict()
예제 #4
0
 def test_retrieve_current_item_variant_1(self):
     """
     Some entry has different display and more information, for instance tax_id 12345
     """
     file_path = os.path.join(os.path.dirname(__file__),
                              'data/tax_12345.html')
     tax_file = open(file_path, "rb")
     scrapper = NCBITaxonomyScrapper(tax_file)
     expected_dict = {
         'rank': 'species',
         'tax_id': '12345',
         'name': 'Bacillus virus GA1'
     }
     tested_dict = scrapper.retrieve_current_item()
     self.assertDictEqual(tested_dict, expected_dict)
예제 #5
0
 def test_retrieve_current_item_no_link(self):
     """
     Some entry has their names without a link, for instance tax_id 339588
     """
     file_path = os.path.join(os.path.dirname(__file__),
                              'data/tax_339588.html')
     tax_file = open(file_path, "rb")
     scrapper = NCBITaxonomyScrapper(tax_file)
     expected_dict = {
         'rank': 'species',
         'tax_id': '339588',
         'name': 'Peyssonnelia inamoena'
     }
     tested_dict = scrapper.retrieve_current_item()
     self.assertDictEqual(tested_dict, expected_dict)
예제 #6
0
 def test_extract_tax_id_from_url(self):
     tested_url = "wwwtax.cgi?mode=Undef&id=131567&lvl=3&keep=1&srchmode=1&unlock"
     expected_id = "131567"
     tested_id = NCBITaxonomyScrapper.extract_tax_id_from_url(tested_url)
     self.assertEqual(tested_id, expected_id)
예제 #7
0
 def test_result_found_error_page(self):
     file_path = os.path.join(os.path.dirname(__file__),
                              'data/error_page.html')
     tax_file = open(file_path, "rb")
     scrapper = NCBITaxonomyScrapper(tax_file)
     self.assertFalse(scrapper.result_found())
예제 #8
0
 def test_result_found(self):
     scrapper = NCBITaxonomyScrapper(self.file)
     self.assertTrue(scrapper.result_found())