Esempio n. 1
0
def set_taxonomy(csv_file_path):
    """
    Set the niamoto taxonomy from a csv file.
    The csv must have a header and it must contains at least the following
    columns:
    - id: The unique identifier of the taxon, in the provider’s referential.
    - parent_id: The parent’s id of the taxon. If the taxon is a root, let the
                 value blank.
    - rank: The rank of the taxon, can be a value among: ‘REGNUM’, ‘PHYLUM’,
            ‘CLASSIS’, ‘ORDO’, ‘FAMILIA’, ‘GENUS’, ‘SPECIES’, ‘INFRASPECIES’.
    - full_name: The full name of the taxon.
    - rank_name: The rank name of the taxon.
    All the additional columns will be considered as synonyms, their values
    must therefore be integers corresponding to the corresponding value in the
    referential pointed by the synonym key.
    :param csv_file_path: The csv file path.
    :return: (number_of_taxon_inserted, synonyms_registered)
    """
    if not os.path.exists(csv_file_path) or os.path.isdir(csv_file_path):
        raise DataSourceNotFoundError(
            "The csv file '{}' had not been found.".format(csv_file_path))
    dataframe = pd.DataFrame.from_csv(csv_file_path, index_col='id')
    return TaxonomyManager.set_taxonomy(dataframe)
Esempio n. 2
0
def populate_ncpippn_taxon_database(dataframe):
    """
    Populate a Niamoto database with a taxonomic referential.
    :param dataframe: The dataframe containing the taxonomic referential.
    """
    TaxonomyManager.set_taxonomy(dataframe)
 def test_set_taxonomy(self):
     result, synonyms = TaxonomyManager.set_taxonomy(
         pd.DataFrame(
             columns=['full_name', 'rank_name', 'rank', 'parent_id'], ))
     self.assertEqual(result, 0)
     data = pd.DataFrame.from_records([
         {
             'id': 0,
             'full_name': 'Family One',
             'rank_name': 'One',
             'rank': niamoto_db_meta.TaxonRankEnum.FAMILIA,
             'parent_id': None,
         },
     ],
                                      index='id')
     result, synonyms = TaxonomyManager.set_taxonomy(data)
     self.assertEqual(result, 1)
     data = pd.DataFrame.from_records([
         {
             'id': 0,
             'full_name': 'Family One',
             'rank_name': 'One',
             'rank': niamoto_db_meta.TaxonRankEnum.FAMILIA,
             'parent_id': None,
             'gbif': 5,
             'taxref': 1,
         },
         {
             'id': 1,
             'full_name': 'Genus Two',
             'rank_name': 'Two',
             'rank': niamoto_db_meta.TaxonRankEnum.GENUS,
             'parent_id': 0,
             'gbif': 10,
             'taxref': 2,
         },
         {
             'id': 2,
             'full_name': 'Species Three',
             'rank_name': 'Three',
             'rank': niamoto_db_meta.TaxonRankEnum.SPECIES,
             'parent_id': None,
             'gbif': 7,
             'taxref': 3,
         },
     ],
                                      index='id')
     result, synonyms = TaxonomyManager.set_taxonomy(data)
     self.assertEqual(result, 3)
     df = TaxonomyManager.get_raw_taxon_dataframe()
     self.assertEqual(len(df), 3)
     synonym_keys = TaxonomyManager.get_synonym_keys()
     self.assertEqual(len(synonym_keys), 2)
     identity_synonyms = TaxonomyManager.get_synonyms_for_key(
         TaxonomyManager.IDENTITY_SYNONYM_KEY)
     self.assertEqual(len(identity_synonyms), 3)
     null_synonyms = TaxonomyManager.get_synonyms_for_key(None)
     self.assertEqual(len(null_synonyms), 0)
     gbif_synonyms = TaxonomyManager.get_synonyms_for_key("gbif")
     self.assertEqual(len(gbif_synonyms), 3)
     taxref_synonyms = TaxonomyManager.get_synonyms_for_key('taxref')
     self.assertEqual(len(taxref_synonyms), 3)