def add_sdm(cls,
             taxon_id,
             raster_file_path,
             tile_dimension=None,
             register=False,
             properties={}):
     """
     Add a sdm raster in database and register it in the sdm_registry.
     :param taxon_id: The id of the corresponding taxon
         (must exist in database)
     :param raster_file_path: The path to the SDM raster file.
     :param tile_dimension: The tile dimension (width, height), if None,
         tile dimension will be chosen automatically by PostGIS.
     :param register: Register the raster as a filesystem (out-db) raster.
         (-R option of raster2pgsql).
     :param properties: A dict of arbitrary properties.
     """
     TaxonomyManager.assert_taxon_exists_in_database(taxon_id)
     super(SSDMManager, cls).add_raster(
         "{}_{}".format(cls.TAXON_ID_PREFIX, taxon_id),
         raster_file_path,
         tile_dimension=tile_dimension,
         register=register,
         properties=properties,
         taxon_id=taxon_id,
     )
 def test_base_data_provider(self):
     TestDataProvider.register_data_provider('test_data_provider_1')
     test_data_provider = TestDataProvider('test_data_provider_1')
     self.assertIsNotNone(test_data_provider)
     self.assertIsNotNone(test_data_provider.db_id)
     # Test with synonym key
     TaxonomyManager.register_synonym_key("synonym")
     TestDataProvider.register_data_provider(
         'test_data_provider_2',
         synonym_key="synonym"
     )
Beispiel #3
0
 def test_add_single_synonym(self):
     synonym_key = "synonym_key_1"
     TaxonomyManager.register_synonym_key("synonym_key_1")
     data = [
         {
             'id': 0,
             'full_name': 'Family One',
             'rank_name': 'One',
             'rank': niamoto_db_meta.TaxonRankEnum.FAMILIA,
             'parent_id': None,
             'synonyms': {},
             'mptt_left': 0,
             'mptt_right': 0,
             'mptt_tree_id': 0,
             'mptt_depth': 0,
         },
     ]
     ins = niamoto_db_meta.taxon.insert().values(data)
     with Connector.get_connection() as connection:
         connection.execute(ins)
     TaxonomyManager.add_synonym_for_single_taxon(0, synonym_key, 1)
     df1 = TaxonomyManager.get_raw_taxon_dataframe()
     self.assertEqual(df1.loc[0]['synonyms'], {synonym_key: 1})
     TaxonomyManager.add_synonym_for_single_taxon(0, synonym_key, 2)
     df2 = TaxonomyManager.get_raw_taxon_dataframe()
     self.assertEqual(df2.loc[0]['synonyms'], {
         synonym_key: 1,
         synonym_key: 2,
     })
 def test_update_data_provider(self):
     add_data_provider(
         "pl@ntnote_provider_1",
         "PLANTNOTE",
     )
     TaxonomyManager.register_synonym_key("YO")
     update_data_provider(
         "pl@ntnote_provider_1",
         new_name="pl@ntnote",
         synonym_key="YO"
     )
     l2 = get_data_provider_list()
     self.assertIn("pl@ntnote", list(l2['name']))
     self.assertIn("YO", list(l2['synonym_key']))
Beispiel #5
0
 def test_construct_mptt_less_simple_tree(self):
     tree = [[1, [1, [
         1,
     ]]], [
         1,
     ], [
         1,
     ], [
         1,
     ], [1, [3, [1, [1, [
         1,
     ]]]]]]
     data, last_id = make_taxon_tree(tree)
     self.assertEqual(len(data), 24)
     ins = niamoto_db_meta.taxon.insert().values(data)
     with Connector.get_connection() as connection:
         connection.execute(ins)
     TaxonomyManager.make_mptt()
     mptt = TaxonomyManager.get_raw_taxon_dataframe()
     self.assertEqual(list(mptt['mptt_tree_id']), [
         1, 1, 1, 1, 1, 1, 7, 7, 9, 9, 11, 11, 13, 13, 13, 13, 13, 13, 13,
         13, 13, 13, 13, 13
     ])
     self.assertEqual(list(mptt['mptt_left']), [
         1, 2, 4, 5, 8, 9, 1, 2, 1, 2, 1, 2, 1, 2, 4, 5, 7, 9, 12, 13, 15,
         16, 19, 20
     ])
     self.assertEqual(list(mptt['mptt_right']), [
         12, 3, 7, 6, 11, 10, 4, 3, 4, 3, 4, 3, 24, 3, 11, 6, 8, 10, 23, 14,
         18, 17, 22, 21
     ])
     self.assertEqual(list(mptt['mptt_depth']), [
         0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 0, 1, 0, 1, 1, 2, 2, 2, 1, 2, 2, 3,
         2, 3
     ])
     self.assertEqual([
         int(i) if pd.notnull(i) else None
         for i in mptt['parent_id'].tolist()
     ], [
         None, 1, 1, 3, 1, 5, None, 7, None, 9, None, 11, None, 13, 13, 15,
         15, 15, 13, 19, 19, 21, 19, 23
     ])
Beispiel #6
0
 def test_construct_mptt_simple_tree(self):
     tree = [[1, [
         1,
     ]], [
         1,
     ]]
     data, last_id = make_taxon_tree(tree)
     ins = niamoto_db_meta.taxon.insert().values(data)
     with Connector.get_connection() as connection:
         connection.execute(ins)
     TaxonomyManager.make_mptt()
     mptt = TaxonomyManager.get_raw_taxon_dataframe()
     self.assertEqual(list(mptt['mptt_tree_id']), [1, 1, 1, 1, 5, 5])
     self.assertEqual(list(mptt['mptt_left']), [1, 2, 4, 5, 1, 2])
     self.assertEqual(list(mptt['mptt_right']), [8, 3, 7, 6, 4, 3])
     self.assertEqual(list(mptt['mptt_depth']), [0, 1, 1, 2, 0, 1])
     self.assertEqual([
         int(i) if pd.notnull(i) else None
         for i in mptt['parent_id'].tolist()
     ], [None, 1, 1, 3, None, 5])
Beispiel #7
0
 def test_get_synonym_key(self):
     self.assertRaises(NoRecordFoundError, TaxonomyManager.get_synonym_key,
                       'Not existing')
     TaxonomyManager.register_synonym_key("test")
     TaxonomyManager.get_synonym_key("test")
     with Connector.get_connection() as connection:
         TaxonomyManager.get_synonym_key("test", bind=connection)
Beispiel #8
0
 def test_duplicate_synonym(self):
     synonym_key = "synonym_key_1"
     TaxonomyManager.register_synonym_key("synonym_key_1")
     data = [
         {
             'id': 0,
             'full_name': 'Family One',
             'rank_name': 'One',
             'rank': niamoto_db_meta.TaxonRankEnum.FAMILIA,
             'parent_id': None,
             'synonyms': {},
             'mptt_left': 0,
             'mptt_right': 0,
             'mptt_tree_id': 0,
             'mptt_depth': 0,
         },
         {
             'id': 1,
             'full_name': 'Family Two',
             'rank_name': 'Two',
             'rank': niamoto_db_meta.TaxonRankEnum.FAMILIA,
             'parent_id': None,
             'synonyms': {},
             'mptt_left': 0,
             'mptt_right': 0,
             'mptt_tree_id': 0,
             'mptt_depth': 0,
         },
     ]
     ins = niamoto_db_meta.taxon.insert().values(data)
     with Connector.get_connection() as connection:
         connection.execute(ins)
     TaxonomyManager.add_synonym_for_single_taxon(0, synonym_key, 1)
     self.assertRaises(
         IntegrityError,
         TaxonomyManager.add_synonym_for_single_taxon,
         1,
         synonym_key,
         1,
     )
Beispiel #9
0
 def test_get_synonyms_map(self):
     synonym_key = "synonym_key_1"
     TaxonomyManager.register_synonym_key("synonym_key_1")
     data = [
         {
             'id': 0,
             'full_name': 'Family One',
             'rank_name': 'One',
             'rank': niamoto_db_meta.TaxonRankEnum.FAMILIA,
             'parent_id': None,
             'synonyms': {
                 synonym_key: 10,
             },
             'mptt_left': 0,
             'mptt_right': 0,
             'mptt_tree_id': 0,
             'mptt_depth': 0,
         },
         {
             'id': 1,
             'full_name': 'Family Two',
             'rank_name': 'Two',
             'rank': niamoto_db_meta.TaxonRankEnum.FAMILIA,
             'parent_id': None,
             'synonyms': {
                 synonym_key: 20,
             },
             'mptt_left': 0,
             'mptt_right': 0,
             'mptt_tree_id': 0,
             'mptt_depth': 0,
         },
     ]
     ins = niamoto_db_meta.taxon.insert().values(data)
     with Connector.get_connection() as connection:
         connection.execute(ins)
     synonyms = TaxonomyManager.get_synonyms_for_key(synonym_key)
     self.assertEqual(synonyms.loc[10], 0)
     self.assertEqual(synonyms.loc[20], 1)
Beispiel #10
0
 def test_get_not_empty_raw_taxon_dataset(self):
     data = [
         {
             'id': 0,
             'full_name': 'Family One',
             'rank_name': 'One',
             'rank': niamoto_db_meta.TaxonRankEnum.FAMILIA,
             'parent_id': None,
             'synonyms': {},
             'mptt_left': 0,
             'mptt_right': 0,
             'mptt_tree_id': 0,
             'mptt_depth': 0,
         },
         {
             'id': 1,
             'full_name': 'Genus Two',
             'rank_name': 'Two',
             'rank': niamoto_db_meta.TaxonRankEnum.GENUS,
             'parent_id': 0,
             'synonyms': {},
             'mptt_left': 0,
             'mptt_right': 0,
             'mptt_tree_id': 0,
             'mptt_depth': 0,
         },
         {
             'id': 2,
             'full_name': 'Species Three',
             'rank_name': 'Three',
             'rank': niamoto_db_meta.TaxonRankEnum.SPECIES,
             'parent_id': None,
             'synonyms': {},
             'mptt_left': 1,
             'mptt_right': 0,
             'mptt_tree_id': 0,
             'mptt_depth': 0,
         },
     ]
     ins = niamoto_db_meta.taxon.insert().values(data)
     with Connector.get_connection() as connection:
         connection.execute(ins)
     df1 = TaxonomyManager.get_raw_taxon_dataframe()
     self.assertEqual(len(df1), 3)
Beispiel #11
0
 def _process(self,
              *args,
              include_mptt=False,
              include_synonyms=False,
              flatten=False,
              **kwargs):
     """
     Return the taxon dataframe.
     :param include_mptt: If True, include the mptt columns.
     :param include_synonyms: If True, include the stored synonyms for each
         taxon.
     :param flatten: If True, flattens the taxonomy hierarchy and include
         it in the resulting dataframe.
     """
     with Connector.get_connection() as connection:
         keys = TaxonomyManager.get_synonym_keys()['name']
         synonyms = []
         if include_synonyms:
             synonyms = [
                 meta.taxon.c.synonyms[k].label(k) for k in keys
                 if k != 'niamoto'
             ]
         mptt = []
         if include_mptt:
             mptt = [
                 meta.taxon.c.mptt_left.label('mptt_left'),
                 meta.taxon.c.mptt_right.label('mptt_right'),
                 meta.taxon.c.mptt_tree_id.label('mptt_tree_id'),
                 meta.taxon.c.mptt_depth.label('mptt_depth'),
             ]
         sel = select([
             meta.taxon.c.id.label('id'),
             meta.taxon.c.full_name.label('full_name'),
             meta.taxon.c.rank_name.label('rank_name'),
             cast(meta.taxon.c.rank, String).label('rank'),
             meta.taxon.c.parent_id.label('parent_id'),
         ] + synonyms + mptt)
         df = pd.read_sql(sel, connection, index_col='id')
         #  Replace None values with nan
         df.fillna(value=pd.np.NAN, inplace=True)
         if flatten:
             df = _flatten(df)
         return df, [], {'index_label': 'id'}
 def update_data_provider(cls,
                          current_name,
                          *args,
                          new_name=None,
                          properties={},
                          synonym_key=None,
                          return_object=True,
                          **kwargs):
     if new_name is None:
         new_name = current_name
     m = "DataProvider(current_name='{}', new_name='{}', type_name='{}'," \
         "properties='{}', synonym_key='{}'): updating data provider...'"
     LOGGER.debug(
         m.format(current_name, new_name, cls.get_type_name(), properties,
                  synonym_key))
     with Connector.get_connection() as connection:
         cls.assert_data_provider_exists(current_name, bind=connection)
         synonym_key_id = None
         if synonym_key is not None:
             synonym_key_id = TaxonomyManager.get_synonym_key(
                 synonym_key, bind=connection)['id']
         upd = niamoto_db_meta.data_provider.update().values({
             'name':
             new_name,
             'properties':
             properties,
             'synonym_key_id':
             synonym_key_id,
             'date_update':
             datetime.now(),
         }).where(niamoto_db_meta.data_provider.c.name == current_name)
         connection.execute(upd)
     m = "DataProvider(current_name='{}', new_name='{}', type_name='{}'," \
         " properties='{}', synonym_key='{}'): Data provider had been " \
         "successfully updated!'"
     LOGGER.debug(
         m.format(current_name, new_name, cls.get_type_name(), properties,
                  synonym_key))
     if return_object:
         return cls(new_name, *args, **kwargs)
 def map_provider_taxon_ids(self, dataframe):
     """
     Map provider's taxon ids with Niamoto taxon ids when importing data.
     :param dataframe: The dataframe where the mapping has to be done.
     ids. The index must correspond to the provider's pk. The dataframe
     corresponds to the provider's dataframe.
     :return: A series with the same index, the niamoto corresponding
     taxon id as values.
     """
     m = "(provider_id='{}', synonym_key='{}'): " \
         "Mapping provider's taxon ids..."
     LOGGER.debug(
         m.format(self.data_provider.db_id, self.data_provider.synonym_key))
     synonyms = TaxonomyManager.get_synonyms_for_key(
         self.data_provider.synonym_key)
     dataframe["provider_taxon_id"] = dataframe["taxon_id"]
     dataframe["taxon_id"] = dataframe["taxon_id"].map(synonyms)
     m = "(provider_id='{}', synonym_key='{}'): {} taxon ids had " \
         "been mapped."
     LOGGER.debug(
         m.format(self.data_provider.db_id, self.data_provider.synonym_key,
                  len(synonyms)))
def set_taxonomy(csv_file_path):
    """
    Set the niamoto taxonomy from a csv file.
    The csv must have a header and it must contains at least the following
    columns:
    - id: The unique identifier of the taxon, in the provider’s referential.
    - parent_id: The parent’s id of the taxon. If the taxon is a root, let the
                 value blank.
    - rank: The rank of the taxon, can be a value among: ‘REGNUM’, ‘PHYLUM’,
            ‘CLASSIS’, ‘ORDO’, ‘FAMILIA’, ‘GENUS’, ‘SPECIES’, ‘INFRASPECIES’.
    - full_name: The full name of the taxon.
    - rank_name: The rank name of the taxon.
    All the additional columns will be considered as synonyms, their values
    must therefore be integers corresponding to the corresponding value in the
    referential pointed by the synonym key.
    :param csv_file_path: The csv file path.
    :return: (number_of_taxon_inserted, synonyms_registered)
    """
    if not os.path.exists(csv_file_path) or os.path.isdir(csv_file_path):
        raise DataSourceNotFoundError(
            "The csv file '{}' had not been found.".format(csv_file_path))
    dataframe = pd.DataFrame.from_csv(csv_file_path, index_col='id')
    return TaxonomyManager.set_taxonomy(dataframe)
 def register_data_provider(cls,
                            name,
                            *args,
                            properties={},
                            synonym_key=None,
                            return_object=True,
                            **kwargs):
     m = "DataProvider(name='{}', type_name='{}', properties='{}', " \
         "synonym_key='{}'): Registering data provider...'"
     LOGGER.debug(
         m.format(name, cls.get_type_name(), properties, synonym_key))
     with Connector.get_connection() as connection:
         cls.assert_data_provider_does_not_exist(name, bind=connection)
         synonym_key_id = None
         if synonym_key is not None:
             synonym_key_id = TaxonomyManager.get_synonym_key(
                 synonym_key, bind=connection)['id']
         ins = niamoto_db_meta.data_provider.insert({
             'name':
             name,
             'provider_type_key':
             cls.get_type_name(),
             'properties':
             properties,
             'synonym_key_id':
             synonym_key_id,
             'date_create':
             datetime.now(),
         })
         connection.execute(ins)
     m = "DataProvider(name='{}', type_name='{}', properties='{}', " \
         "synonym_key='{}'): Data provider had been successfully" \
         " registered!'"
     LOGGER.debug(
         m.format(name, cls.get_type_name(), properties, synonym_key))
     if return_object:
         return cls(name, *args, **kwargs)
def upgrade():
    op.create_table('data_provider_type',
                    sa.Column('id', sa.Integer(), nullable=False),
                    sa.Column('name', sa.String(length=100), nullable=False),
                    sa.PrimaryKeyConstraint('id'),
                    sa.UniqueConstraint(
                        'name', name=op.f('uq_data_provider_type_name')),
                    schema='niamoto')
    op.create_table('taxon',
                    sa.Column('id', sa.Integer(), nullable=False),
                    sa.Column('full_name', sa.Text(), nullable=False),
                    sa.Column('rank_name', sa.Text(), nullable=False),
                    sa.Column('rank', taxon_rank_enum, nullable=False),
                    sa.Column('parent_id',
                              sa.Integer(),
                              nullable=True,
                              index=True),
                    sa.Column('synonyms', postgresql.JSONB(), nullable=False),
                    sa.Column('mptt_left', sa.Integer(), nullable=False),
                    sa.Column('mptt_right', sa.Integer(), nullable=False),
                    sa.Column('mptt_tree_id', sa.Integer(), nullable=False),
                    sa.Column('mptt_depth', sa.Integer(), nullable=False),
                    sa.CheckConstraint('mptt_depth >= 0',
                                       name=op.f('ck_taxon_mptt_depth_gt_0')),
                    sa.CheckConstraint('mptt_left >= 0',
                                       name=op.f('ck_taxon_mptt_left_gt_0')),
                    sa.CheckConstraint('mptt_right >= 0',
                                       name=op.f('ck_taxon_mptt_right_gt_0')),
                    sa.CheckConstraint(
                        'mptt_tree_id >= 0',
                        name=op.f('ck_taxon_mptt_tree_id_gt_0')),
                    sa.ForeignKeyConstraint(
                        ['parent_id'],
                        ['niamoto.taxon.id'],
                        deferrable=True,
                    ),
                    sa.PrimaryKeyConstraint('id'),
                    sa.UniqueConstraint('full_name',
                                        name=op.f('uq_taxon_full_name')),
                    schema='niamoto')
    op.create_table('synonym_key_registry',
                    sa.Column('id', sa.Integer(), nullable=False),
                    sa.Column('name', sa.String(length=100), nullable=False),
                    sa.Column('date_create', sa.DateTime(), nullable=False),
                    sa.Column('date_update', sa.DateTime(), nullable=True),
                    sa.PrimaryKeyConstraint('id'),
                    sa.UniqueConstraint(
                        'name', name=op.f('uq_synonym_key_registry_name')),
                    schema='niamoto')
    op.create_table('raster_registry',
                    sa.Column('id', sa.Integer(), nullable=False),
                    sa.Column('name', sa.String(length=100), nullable=False),
                    sa.Column('date_create', sa.DateTime(), nullable=False),
                    sa.Column('date_update', sa.DateTime(), nullable=True),
                    sa.PrimaryKeyConstraint('id'),
                    sa.UniqueConstraint('name',
                                        name=op.f('uq_raster_registry_name')),
                    schema='niamoto')
    op.create_table('vector_registry',
                    sa.Column('id', sa.Integer(), nullable=False),
                    sa.Column('name', sa.String(length=100), nullable=False),
                    sa.Column('date_create', sa.DateTime(), nullable=False),
                    sa.Column('date_update', sa.DateTime(), nullable=True),
                    sa.PrimaryKeyConstraint('id'),
                    sa.UniqueConstraint('name',
                                        name=op.f('uq_vector_registry_name')),
                    schema='niamoto')
    op.create_table('dimension_registry',
                    sa.Column('id', sa.Integer(), nullable=False),
                    sa.Column('name', sa.String(length=100), nullable=False),
                    sa.Column('dimension_key',
                              sa.String(length=100),
                              nullable=False),
                    sa.Column('date_create', sa.DateTime(), nullable=False),
                    sa.Column('date_update', sa.DateTime(), nullable=True),
                    sa.PrimaryKeyConstraint('id'),
                    sa.UniqueConstraint(
                        'name', name=op.f('uq_dimension_registry_name')),
                    schema='niamoto')
    op.create_table('data_provider',
                    sa.Column('id', sa.Integer(), nullable=False),
                    sa.Column('name', sa.String(length=100), nullable=False),
                    sa.Column('provider_type_id',
                              sa.Integer(),
                              nullable=False,
                              index=True),
                    sa.Column('synonym_key_id',
                              sa.Integer(),
                              nullable=True,
                              index=True),
                    sa.Column('properties', postgresql.JSONB(),
                              nullable=False),
                    sa.Column('date_create', sa.DateTime(), nullable=False),
                    sa.Column('date_update', sa.DateTime(), nullable=True),
                    sa.Column('last_sync', sa.DateTime(), nullable=True),
                    sa.ForeignKeyConstraint(
                        ['provider_type_id'],
                        ['niamoto.data_provider_type.id'],
                    ),
                    sa.ForeignKeyConstraint(
                        ['synonym_key_id'],
                        ['niamoto.synonym_key_registry.id'],
                        ondelete='SET NULL',
                    ),
                    sa.PrimaryKeyConstraint('id'),
                    sa.UniqueConstraint('name',
                                        name=op.f('uq_data_provider_name')),
                    schema='niamoto')
    op.create_table(
        'occurrence',
        sa.Column('id', sa.Integer(), nullable=False),
        sa.Column('provider_id', sa.Integer(), nullable=False, index=True),
        sa.Column('provider_pk', sa.Integer(), nullable=False, index=True),
        sa.Column(
            'location',
            geoalchemy2.types.Geometry(geometry_type='POINT', srid=4326),
        ),
        sa.Column('taxon_id', sa.Integer(), nullable=True, index=True),
        sa.Column('provider_taxon_id', sa.Integer(), nullable=True),
        sa.Column('properties', postgresql.JSONB(), nullable=False),
        sa.ForeignKeyConstraint(
            ['provider_id'],
            ['niamoto.data_provider.id'],
            onupdate='CASCADE',
            ondelete='CASCADE',
        ),
        sa.ForeignKeyConstraint(
            ['taxon_id'],
            ['niamoto.taxon.id'],
            onupdate="CASCADE",
            ondelete="SET NULL",
            deferrable=True,
        ),
        sa.PrimaryKeyConstraint('id'),
        sa.UniqueConstraint(
            'id',
            'provider_id',
            'provider_pk',
            name=op.f('uq_occurrence_id__provider_id__provider_pk')),
        schema='niamoto')
    op.create_table(
        'plot',
        sa.Column('id', sa.Integer(), nullable=False),
        sa.Column('provider_id', sa.Integer(), nullable=False, index=True),
        sa.Column('provider_pk', sa.Integer(), nullable=False),
        sa.Column('name', sa.String(length=100), nullable=False),
        sa.Column('location',
                  geoalchemy2.types.Geometry(geometry_type='POINT', srid=4326),
                  nullable=False),
        sa.Column('properties', postgresql.JSONB(), nullable=False),
        sa.ForeignKeyConstraint(
            ['provider_id'],
            ['niamoto.data_provider.id'],
            onupdate='CASCADE',
            ondelete='CASCADE',
        ),
        sa.PrimaryKeyConstraint('id'),
        sa.UniqueConstraint('id',
                            'provider_id',
                            'provider_pk',
                            name=op.f('uq_plot_id__provider_id__provider_pk')),
        sa.UniqueConstraint('name', name=op.f('uq_plot_name')),
        schema='niamoto')
    op.create_table(
        'plot_occurrence',
        sa.Column('plot_id', sa.Integer(), nullable=False, index=True),
        sa.Column('occurrence_id', sa.Integer(), nullable=False, index=True),
        sa.Column('provider_id', sa.Integer(), nullable=True, index=True),
        sa.Column('provider_plot_pk', sa.Integer(), nullable=True),
        sa.Column('provider_occurrence_pk', sa.Integer(), nullable=True),
        sa.Column('occurrence_identifier', sa.String(length=50),
                  nullable=True),
        sa.ForeignKeyConstraint(
            ['occurrence_id', 'provider_id', 'provider_occurrence_pk'],
            [
                'niamoto.occurrence.id', 'niamoto.occurrence.provider_id',
                'niamoto.occurrence.provider_pk'
            ],
            onupdate='CASCADE',
            ondelete='CASCADE',
        ),
        sa.ForeignKeyConstraint(
            ['plot_id', 'provider_id', 'provider_plot_pk'],
            [
                'niamoto.plot.id', 'niamoto.plot.provider_id',
                'niamoto.plot.provider_pk'
            ],
            onupdate='CASCADE',
            ondelete='CASCADE',
        ),
        sa.PrimaryKeyConstraint('plot_id', 'occurrence_id'),
        sa.UniqueConstraint(
            'plot_id',
            'occurrence_identifier',
            name=op.f('uq_plot_occurrence_plot_id__occurrence_identifier'),
            deferrable=True,
        ),
        schema='niamoto')
    connection = op.get_bind()
    PlantnoteDataProvider.register_data_provider_type(bind=connection, )
    CsvDataProvider.register_data_provider_type(bind=connection, )
    TaxonomyManager.register_synonym_key(
        'niamoto',
        bind=connection,
    )
 def test_set_taxonomy(self):
     result, synonyms = TaxonomyManager.set_taxonomy(
         pd.DataFrame(
             columns=['full_name', 'rank_name', 'rank', 'parent_id'], ))
     self.assertEqual(result, 0)
     data = pd.DataFrame.from_records([
         {
             'id': 0,
             'full_name': 'Family One',
             'rank_name': 'One',
             'rank': niamoto_db_meta.TaxonRankEnum.FAMILIA,
             'parent_id': None,
         },
     ],
                                      index='id')
     result, synonyms = TaxonomyManager.set_taxonomy(data)
     self.assertEqual(result, 1)
     data = pd.DataFrame.from_records([
         {
             'id': 0,
             'full_name': 'Family One',
             'rank_name': 'One',
             'rank': niamoto_db_meta.TaxonRankEnum.FAMILIA,
             'parent_id': None,
             'gbif': 5,
             'taxref': 1,
         },
         {
             'id': 1,
             'full_name': 'Genus Two',
             'rank_name': 'Two',
             'rank': niamoto_db_meta.TaxonRankEnum.GENUS,
             'parent_id': 0,
             'gbif': 10,
             'taxref': 2,
         },
         {
             'id': 2,
             'full_name': 'Species Three',
             'rank_name': 'Three',
             'rank': niamoto_db_meta.TaxonRankEnum.SPECIES,
             'parent_id': None,
             'gbif': 7,
             'taxref': 3,
         },
     ],
                                      index='id')
     result, synonyms = TaxonomyManager.set_taxonomy(data)
     self.assertEqual(result, 3)
     df = TaxonomyManager.get_raw_taxon_dataframe()
     self.assertEqual(len(df), 3)
     synonym_keys = TaxonomyManager.get_synonym_keys()
     self.assertEqual(len(synonym_keys), 2)
     identity_synonyms = TaxonomyManager.get_synonyms_for_key(
         TaxonomyManager.IDENTITY_SYNONYM_KEY)
     self.assertEqual(len(identity_synonyms), 3)
     null_synonyms = TaxonomyManager.get_synonyms_for_key(None)
     self.assertEqual(len(null_synonyms), 0)
     gbif_synonyms = TaxonomyManager.get_synonyms_for_key("gbif")
     self.assertEqual(len(gbif_synonyms), 3)
     taxref_synonyms = TaxonomyManager.get_synonyms_for_key('taxref')
     self.assertEqual(len(taxref_synonyms), 3)
Beispiel #18
0
 def test_register_unregister_synonym_key(self):
     TaxonomyManager.register_synonym_key("synonym_key_1")
     self.assertRaises(RecordAlreadyExistsError,
                       TaxonomyManager.register_synonym_key,
                       "synonym_key_1")
     df = TaxonomyManager.get_synonym_keys()
     self.assertEqual(len(df), 1)
     TaxonomyManager.unregister_synonym_key("synonym_key_1")
     self.assertRaises(NoRecordFoundError,
                       TaxonomyManager.unregister_synonym_key,
                       "synonym_key")
     df = TaxonomyManager.get_synonym_keys()
     self.assertEqual(len(df), 0)
     # Test with bind
     with Connector.get_connection() as connection:
         TaxonomyManager.register_synonym_key("test", bind=connection)
         TaxonomyManager.assert_synonym_key_exists("test", bind=connection)
         TaxonomyManager.unregister_synonym_key("test", bind=connection)
         TaxonomyManager.assert_synonym_key_does_not_exists("test",
                                                            bind=connection)
Beispiel #19
0
 def test_register_unregister_unique_constraints(self):
     TaxonomyManager._register_unique_synonym_key_constraint("Yo")
     TaxonomyManager._unregister_unique_synonym_key_constraint("Yo")
Beispiel #20
0
def populate_ncpippn_taxon_database(dataframe):
    """
    Populate a Niamoto database with a taxonomic referential.
    :param dataframe: The dataframe containing the taxonomic referential.
    """
    TaxonomyManager.set_taxonomy(dataframe)
Beispiel #21
0
 def tearDown(self):
     TaxonomyManager.delete_all_taxa()
Beispiel #22
0
 def test_get_empty_raw_taxon_dataset(self):
     df1 = TaxonomyManager.get_raw_taxon_dataframe()
     self.assertEqual(len(df1), 0)
Beispiel #23
0
 def tearDown(self):
     TaxonomyManager.delete_all_taxa()
     TaxonomyManager.unregister_all_synonym_keys()
Beispiel #24
0
 def test_get_synonym_keys(self):
     df = TaxonomyManager.get_synonym_keys()
     self.assertEqual(len(df), 0)
 def update_synonym_mapping(self, connection=None):
     """
     Update the synonym mapping of an already stored dataframe.
     To be called when a synonym had been defined or modified, but not
     the occurrences.
     :param connection: If passed, use an existing connection.
     """
     # Log start
     m = "(provider_id='{}', synonym_key='{}'): Updating synonym " \
         "mapping..."
     LOGGER.debug(
         m.format(self.data_provider.db_id, self.data_provider.synonym_key))
     close_after = False
     if connection is None:
         connection = Connector.get_engine().connect()
         close_after = True
     # Start
     df = self.get_niamoto_occurrence_dataframe(connection)
     if close_after:
         connection.close()
     synonyms = TaxonomyManager.get_synonyms_for_key(
         self.data_provider.synonym_key)
     mapping = df["provider_taxon_id"].map(synonyms)
     if len(df) > 0:
         df["taxon_id"] = mapping
         df = df[['provider_id', 'provider_pk', 'taxon_id']]
         s = io.StringIO()
         df.where((pd.notnull(df)), None).rename(columns={
             'provider_id': 'prov_id',
             'provider_pk': 'prov_pk',
         }).to_csv(s, columns=['taxon_id', 'prov_id', 'prov_pk'])
         s.seek(0)
         sql_create_temp = \
             """
             DROP TABLE IF EXISTS {tmp};
             CREATE TABLE {tmp} (
                 id float,
                 taxon_id float,
                 prov_id float,
                 prov_pk float
             );
             """.format(**{
                 'tmp': 'tmp_niamoto'
             })
         sql_copy_from = \
             """
             COPY {tmp} FROM STDIN CSV HEADER DELIMITER ',';
             """.format(**{
                 'tmp': 'tmp_niamoto'
             })
         sql_update = \
             """
             UPDATE {occurrence_table}
             SET taxon_id = {tmp}.taxon_id::int
             FROM {tmp}
             WHERE {occurrence_table}.provider_id = {tmp}.prov_id::int
                 AND {occurrence_table}.provider_pk = {tmp}.prov_pk::int;
             DROP TABLE {tmp};
             """.format(**{
                 'tmp': 'tmp_niamoto',
                 'occurrence_table': '{}.{}'.format(
                     settings.NIAMOTO_SCHEMA, occurrence.name
                 )
             })
         raw_connection = Connector.get_engine().raw_connection()
         cur = raw_connection.cursor()
         cur.execute(sql_create_temp)
         cur.copy_expert(sql_copy_from, s)
         cur.execute(sql_update)
         cur.close()
         raw_connection.commit()
         raw_connection.close()
     # Log end
     m = "(provider_id='{}', synonym_key='{}'): {} synonym mapping had " \
         "been updated."
     LOGGER.debug(
         m.format(self.data_provider.db_id, self.data_provider.synonym_key,
                  len(synonyms)))
     return mapping, synonyms