Example #1
0
 def test_get_synonyms_map(self):
     synonym_key = "synonym_key_1"
     TaxonomyManager.register_synonym_key("synonym_key_1")
     data = [
         {
             'id': 0,
             'full_name': 'Family One',
             'rank_name': 'One',
             'rank': niamoto_db_meta.TaxonRankEnum.FAMILIA,
             'parent_id': None,
             'synonyms': {
                 synonym_key: 10,
             },
             'mptt_left': 0,
             'mptt_right': 0,
             'mptt_tree_id': 0,
             'mptt_depth': 0,
         },
         {
             'id': 1,
             'full_name': 'Family Two',
             'rank_name': 'Two',
             'rank': niamoto_db_meta.TaxonRankEnum.FAMILIA,
             'parent_id': None,
             'synonyms': {
                 synonym_key: 20,
             },
             'mptt_left': 0,
             'mptt_right': 0,
             'mptt_tree_id': 0,
             'mptt_depth': 0,
         },
     ]
     ins = niamoto_db_meta.taxon.insert().values(data)
     with Connector.get_connection() as connection:
         connection.execute(ins)
     synonyms = TaxonomyManager.get_synonyms_for_key(synonym_key)
     self.assertEqual(synonyms.loc[10], 0)
     self.assertEqual(synonyms.loc[20], 1)
 def map_provider_taxon_ids(self, dataframe):
     """
     Map provider's taxon ids with Niamoto taxon ids when importing data.
     :param dataframe: The dataframe where the mapping has to be done.
     ids. The index must correspond to the provider's pk. The dataframe
     corresponds to the provider's dataframe.
     :return: A series with the same index, the niamoto corresponding
     taxon id as values.
     """
     m = "(provider_id='{}', synonym_key='{}'): " \
         "Mapping provider's taxon ids..."
     LOGGER.debug(
         m.format(self.data_provider.db_id, self.data_provider.synonym_key))
     synonyms = TaxonomyManager.get_synonyms_for_key(
         self.data_provider.synonym_key)
     dataframe["provider_taxon_id"] = dataframe["taxon_id"]
     dataframe["taxon_id"] = dataframe["taxon_id"].map(synonyms)
     m = "(provider_id='{}', synonym_key='{}'): {} taxon ids had " \
         "been mapped."
     LOGGER.debug(
         m.format(self.data_provider.db_id, self.data_provider.synonym_key,
                  len(synonyms)))
 def update_synonym_mapping(self, connection=None):
     """
     Update the synonym mapping of an already stored dataframe.
     To be called when a synonym had been defined or modified, but not
     the occurrences.
     :param connection: If passed, use an existing connection.
     """
     # Log start
     m = "(provider_id='{}', synonym_key='{}'): Updating synonym " \
         "mapping..."
     LOGGER.debug(
         m.format(self.data_provider.db_id, self.data_provider.synonym_key))
     close_after = False
     if connection is None:
         connection = Connector.get_engine().connect()
         close_after = True
     # Start
     df = self.get_niamoto_occurrence_dataframe(connection)
     if close_after:
         connection.close()
     synonyms = TaxonomyManager.get_synonyms_for_key(
         self.data_provider.synonym_key)
     mapping = df["provider_taxon_id"].map(synonyms)
     if len(df) > 0:
         df["taxon_id"] = mapping
         df = df[['provider_id', 'provider_pk', 'taxon_id']]
         s = io.StringIO()
         df.where((pd.notnull(df)), None).rename(columns={
             'provider_id': 'prov_id',
             'provider_pk': 'prov_pk',
         }).to_csv(s, columns=['taxon_id', 'prov_id', 'prov_pk'])
         s.seek(0)
         sql_create_temp = \
             """
             DROP TABLE IF EXISTS {tmp};
             CREATE TABLE {tmp} (
                 id float,
                 taxon_id float,
                 prov_id float,
                 prov_pk float
             );
             """.format(**{
                 'tmp': 'tmp_niamoto'
             })
         sql_copy_from = \
             """
             COPY {tmp} FROM STDIN CSV HEADER DELIMITER ',';
             """.format(**{
                 'tmp': 'tmp_niamoto'
             })
         sql_update = \
             """
             UPDATE {occurrence_table}
             SET taxon_id = {tmp}.taxon_id::int
             FROM {tmp}
             WHERE {occurrence_table}.provider_id = {tmp}.prov_id::int
                 AND {occurrence_table}.provider_pk = {tmp}.prov_pk::int;
             DROP TABLE {tmp};
             """.format(**{
                 'tmp': 'tmp_niamoto',
                 'occurrence_table': '{}.{}'.format(
                     settings.NIAMOTO_SCHEMA, occurrence.name
                 )
             })
         raw_connection = Connector.get_engine().raw_connection()
         cur = raw_connection.cursor()
         cur.execute(sql_create_temp)
         cur.copy_expert(sql_copy_from, s)
         cur.execute(sql_update)
         cur.close()
         raw_connection.commit()
         raw_connection.close()
     # Log end
     m = "(provider_id='{}', synonym_key='{}'): {} synonym mapping had " \
         "been updated."
     LOGGER.debug(
         m.format(self.data_provider.db_id, self.data_provider.synonym_key,
                  len(synonyms)))
     return mapping, synonyms
 def test_set_taxonomy(self):
     result, synonyms = TaxonomyManager.set_taxonomy(
         pd.DataFrame(
             columns=['full_name', 'rank_name', 'rank', 'parent_id'], ))
     self.assertEqual(result, 0)
     data = pd.DataFrame.from_records([
         {
             'id': 0,
             'full_name': 'Family One',
             'rank_name': 'One',
             'rank': niamoto_db_meta.TaxonRankEnum.FAMILIA,
             'parent_id': None,
         },
     ],
                                      index='id')
     result, synonyms = TaxonomyManager.set_taxonomy(data)
     self.assertEqual(result, 1)
     data = pd.DataFrame.from_records([
         {
             'id': 0,
             'full_name': 'Family One',
             'rank_name': 'One',
             'rank': niamoto_db_meta.TaxonRankEnum.FAMILIA,
             'parent_id': None,
             'gbif': 5,
             'taxref': 1,
         },
         {
             'id': 1,
             'full_name': 'Genus Two',
             'rank_name': 'Two',
             'rank': niamoto_db_meta.TaxonRankEnum.GENUS,
             'parent_id': 0,
             'gbif': 10,
             'taxref': 2,
         },
         {
             'id': 2,
             'full_name': 'Species Three',
             'rank_name': 'Three',
             'rank': niamoto_db_meta.TaxonRankEnum.SPECIES,
             'parent_id': None,
             'gbif': 7,
             'taxref': 3,
         },
     ],
                                      index='id')
     result, synonyms = TaxonomyManager.set_taxonomy(data)
     self.assertEqual(result, 3)
     df = TaxonomyManager.get_raw_taxon_dataframe()
     self.assertEqual(len(df), 3)
     synonym_keys = TaxonomyManager.get_synonym_keys()
     self.assertEqual(len(synonym_keys), 2)
     identity_synonyms = TaxonomyManager.get_synonyms_for_key(
         TaxonomyManager.IDENTITY_SYNONYM_KEY)
     self.assertEqual(len(identity_synonyms), 3)
     null_synonyms = TaxonomyManager.get_synonyms_for_key(None)
     self.assertEqual(len(null_synonyms), 0)
     gbif_synonyms = TaxonomyManager.get_synonyms_for_key("gbif")
     self.assertEqual(len(gbif_synonyms), 3)
     taxref_synonyms = TaxonomyManager.get_synonyms_for_key('taxref')
     self.assertEqual(len(taxref_synonyms), 3)