Esempio n. 1
0
 def _process(self, *args, properties=None, drop_null_properties=False,
              **kwargs):
     """
     Return the occurrence dataframe.
     :param properties: List of properties to retain. Can be a python list
         or a comma (',') separated string.
     """
     with Connector.get_connection() as connection:
         sel_keys = select([
             func.jsonb_object_keys(
                 meta.occurrence.c.properties
             ).distinct(),
         ])
         if properties is None:
             keys = [i[0] for i in connection.execute(sel_keys).fetchall()]
         else:
             if isinstance(properties, str):
                 properties = properties.split(',')
             keys = properties
         props = [meta.occurrence.c.properties[k].label(k) for k in keys]
         sel = select([
             meta.occurrence.c.id.label('id'),
             meta.occurrence.c.taxon_id.label('taxon_id'),
             cast(meta.taxon.c.rank.label('rank'), String).label('rank'),
             meta.taxon.c.full_name.label('full_name'),
             func.st_x(meta.occurrence.c.location).label('x'),
             func.st_y(meta.occurrence.c.location).label('y'),
         ] + props).select_from(
             meta.occurrence.outerjoin(
                 meta.taxon,
                 meta.taxon.c.id == meta.occurrence.c.taxon_id
             )
         )
         df = pd.read_sql(sel, connection, index_col='id')
         df['taxon_id'] = df['taxon_id'].apply(pd.to_numeric)
         #  Replace None values with nan
         df.fillna(value=pd.np.NAN, inplace=True)
         if drop_null_properties:
             for k in keys:
                 df = df[df[k].notnull()]
         return df, [], {'index_label': 'id'}
 def update_data_provider(cls,
                          current_name,
                          *args,
                          new_name=None,
                          properties={},
                          synonym_key=None,
                          return_object=True,
                          **kwargs):
     if new_name is None:
         new_name = current_name
     m = "DataProvider(current_name='{}', new_name='{}', type_name='{}'," \
         "properties='{}', synonym_key='{}'): updating data provider...'"
     LOGGER.debug(
         m.format(current_name, new_name, cls.get_type_name(), properties,
                  synonym_key))
     with Connector.get_connection() as connection:
         cls.assert_data_provider_exists(current_name, bind=connection)
         synonym_key_id = None
         if synonym_key is not None:
             synonym_key_id = TaxonomyManager.get_synonym_key(
                 synonym_key, bind=connection)['id']
         upd = niamoto_db_meta.data_provider.update().values({
             'name':
             new_name,
             'properties':
             properties,
             'synonym_key_id':
             synonym_key_id,
             'date_update':
             datetime.now(),
         }).where(niamoto_db_meta.data_provider.c.name == current_name)
         connection.execute(upd)
     m = "DataProvider(current_name='{}', new_name='{}', type_name='{}'," \
         " properties='{}', synonym_key='{}'): Data provider had been " \
         "successfully updated!'"
     LOGGER.debug(
         m.format(current_name, new_name, cls.get_type_name(), properties,
                  synonym_key))
     if return_object:
         return cls(new_name, *args, **kwargs)
Esempio n. 3
0
 def test_duplicate_synonym(self):
     synonym_key = "synonym_key_1"
     TaxonomyManager.register_synonym_key("synonym_key_1")
     data = [
         {
             'id': 0,
             'full_name': 'Family One',
             'rank_name': 'One',
             'rank': niamoto_db_meta.TaxonRankEnum.FAMILIA,
             'parent_id': None,
             'synonyms': {},
             'mptt_left': 0,
             'mptt_right': 0,
             'mptt_tree_id': 0,
             'mptt_depth': 0,
         },
         {
             'id': 1,
             'full_name': 'Family Two',
             'rank_name': 'Two',
             'rank': niamoto_db_meta.TaxonRankEnum.FAMILIA,
             'parent_id': None,
             'synonyms': {},
             'mptt_left': 0,
             'mptt_right': 0,
             'mptt_tree_id': 0,
             'mptt_depth': 0,
         },
     ]
     ins = niamoto_db_meta.taxon.insert().values(data)
     with Connector.get_connection() as connection:
         connection.execute(ins)
     TaxonomyManager.add_synonym_for_single_taxon(0, synonym_key, 1)
     self.assertRaises(
         IntegrityError,
         TaxonomyManager.add_synonym_for_single_taxon,
         1,
         synonym_key,
         1,
     )
Esempio n. 4
0
 def test_get_synonyms_map(self):
     synonym_key = "synonym_key_1"
     TaxonomyManager.register_synonym_key("synonym_key_1")
     data = [
         {
             'id': 0,
             'full_name': 'Family One',
             'rank_name': 'One',
             'rank': niamoto_db_meta.TaxonRankEnum.FAMILIA,
             'parent_id': None,
             'synonyms': {
                 synonym_key: 10,
             },
             'mptt_left': 0,
             'mptt_right': 0,
             'mptt_tree_id': 0,
             'mptt_depth': 0,
         },
         {
             'id': 1,
             'full_name': 'Family Two',
             'rank_name': 'Two',
             'rank': niamoto_db_meta.TaxonRankEnum.FAMILIA,
             'parent_id': None,
             'synonyms': {
                 synonym_key: 20,
             },
             'mptt_left': 0,
             'mptt_right': 0,
             'mptt_tree_id': 0,
             'mptt_depth': 0,
         },
     ]
     ins = niamoto_db_meta.taxon.insert().values(data)
     with Connector.get_connection() as connection:
         connection.execute(ins)
     synonyms = TaxonomyManager.get_synonyms_for_key(synonym_key)
     self.assertEqual(synonyms.loc[10], 0)
     self.assertEqual(synonyms.loc[20], 1)
 def setUpClass(cls):
     super(TestBasePlotOccurrenceProvider, cls).setUpClass()
     data_provider_1 = TestDataProvider.register_data_provider(
         'test_data_provider_1')
     data_provider_2 = TestDataProvider.register_data_provider(
         'test_data_provider_2', )
     TestDataProvider.register_data_provider('test_data_provider_3')
     plot_1 = test_data.get_plot_data_1(data_provider_1)
     plot_2 = test_data.get_plot_data_2(data_provider_2)
     occ_1 = test_data.get_occurrence_data_1(data_provider_1)
     occ_2 = test_data.get_occurrence_data_2(data_provider_2)
     plot_occ_1 = test_data.get_plot_occurrence_data_1(data_provider_1)
     plot_occ_2 = test_data.get_plot_occurrence_data_2(data_provider_2)
     ins_1 = niamoto_db_meta.plot.insert().values(plot_1 + plot_2)
     ins_2 = niamoto_db_meta.occurrence.insert().values(occ_1 + occ_2)
     ins_3 = niamoto_db_meta.plot_occurrence.insert().values(plot_occ_1 +
                                                             plot_occ_2)
     with Connector.get_connection() as connection:
         connection.execute(ins_1)
         connection.execute(ins_2)
         connection.execute(ins_3)
     fix_db_sequences()
Esempio n. 6
0
 def register_synonym_key(cls, synonym_key, bind=None):
     """
     Register a synonym key in database.
     :param synonym_key: The synonym key to register
     :param bind: If passed, use and existing engine or connection.
     """
     now = datetime.now()
     ins = meta.synonym_key_registry.insert({
         'name': synonym_key,
         'date_create': now,
     })
     if bind is not None:
         cls.assert_synonym_key_does_not_exists(synonym_key, bind=bind)
         bind.execute(ins)
         cls._register_unique_synonym_key_constraint(synonym_key, bind=bind)
         return
     with Connector.get_connection() as connection:
         cls.assert_synonym_key_does_not_exists(synonym_key,
                                                bind=connection)
         connection.execute(ins)
         cls._register_unique_synonym_key_constraint(synonym_key,
                                                     bind=connection)
     LOGGER.debug("synonym_key {} registered.".format(synonym_key))
 def register_data_provider(cls,
                            name,
                            *args,
                            properties={},
                            synonym_key=None,
                            return_object=True,
                            **kwargs):
     m = "DataProvider(name='{}', type_name='{}', properties='{}', " \
         "synonym_key='{}'): Registering data provider...'"
     LOGGER.debug(
         m.format(name, cls.get_type_name(), properties, synonym_key))
     with Connector.get_connection() as connection:
         cls.assert_data_provider_does_not_exist(name, bind=connection)
         synonym_key_id = None
         if synonym_key is not None:
             synonym_key_id = TaxonomyManager.get_synonym_key(
                 synonym_key, bind=connection)['id']
         ins = niamoto_db_meta.data_provider.insert({
             'name':
             name,
             'provider_type_key':
             cls.get_type_name(),
             'properties':
             properties,
             'synonym_key_id':
             synonym_key_id,
             'date_create':
             datetime.now(),
         })
         connection.execute(ins)
     m = "DataProvider(name='{}', type_name='{}', properties='{}', " \
         "synonym_key='{}'): Data provider had been successfully" \
         " registered!'"
     LOGGER.debug(
         m.format(name, cls.get_type_name(), properties, synonym_key))
     if return_object:
         return cls(name, *args, **kwargs)
 def test_get_dataframe_and_sync(self):
     pt_provider = PlantnoteDataProvider(
         'pl@ntnote_provider',
         self.TEST_DB_PATH,
     )
     with Connector.get_connection() as connection:
         prov = pt_provider.plot_occurrence_provider
         df1 = prov.get_niamoto_plot_occurrence_dataframe(connection)
         self.assertEqual(len(df1), 0)
         # Sync plots and occurrences
         plot_prov = pt_provider.plot_provider
         occ_prov = pt_provider.occurrence_provider
         plot_prov.sync(connection)
         occ_prov.sync(connection)
         # Sync plot-occurrences
         df2 = prov.get_provider_plot_occurrence_dataframe()
         cols = df2.columns
         for i in [
                 'occurrence_identifier',
         ]:
             self.assertIn(i, cols)
         prov.sync(connection)
         df3 = prov.get_niamoto_plot_occurrence_dataframe(connection)
         self.assertEqual(len(df2), len(df3))
 def get_dimension(cls, dimension_name):
     """
     Load a registered dimension.
     :param dimension_name: The name of the dimension to load.
     :return: The loaded dimension.
     """
     sel = sa.select([
         meta.dimension_registry.c.dimension_type_key,
         meta.dimension_registry.c.label_column,
         meta.dimension_registry.c.properties,
     ]).where(meta.dimension_registry.c.name == dimension_name)
     with Connector.get_connection() as connection:
         cls.assert_dimension_is_registered(dimension_name, connection)
         result = connection.execute(sel).fetchone()
         dim_type, label_column, properties = result
         column_labels = {}
         if 'column_labels' in properties:
             column_labels = properties['column_labels']
     return DIMENSION_TYPE_REGISTRY[dim_type]['class'].load(
         dimension_name,
         label_col=label_column,
         properties=properties,
         column_labels=column_labels,
     )
 def extract_raster_values_to_plots(cls, raster_name):
     with Connector.get_connection() as connection:
         with connection.begin():
             RasterManager.assert_raster_exists(raster_name,
                                                connection=connection)
             m = "Extracting '{}' raster values to plots properties."
             LOGGER.debug(m.format(raster_name))
             sql = \
                 """
                 WITH {raster_name} AS (
                   SELECT plot.id AS id,
                     ST_Value(raster.rast, plot.location) AS rast_value
                   FROM {plot_table} AS plot
                   LEFT JOIN {raster_table} AS raster
                   ON ST_Intersects(raster.rast, plot.location)
                 )
                 UPDATE {plot_table}
                 SET properties = (
                   {plot_table}.properties || jsonb_build_object(
                     '{prefix}{raster_name}', {raster_name}.rast_value
                   )
                 ) FROM {raster_name}
                 WHERE {raster_name}.id = {plot_table}.id
                 """.format(**{
                     'raster_name': raster_name,
                     'raster_table': '{}.{}'.format(
                         settings.NIAMOTO_RASTER_SCHEMA,
                         raster_name
                     ),
                     'plot_table': '{}.{}'.format(
                         settings.NIAMOTO_SCHEMA,
                         meta.plot.name
                     ),
                     'prefix': RASTER_PROPERTY_PREFIX,
                 })
             connection.execute(sql)
Esempio n. 11
0
 def tearDownClass(cls):
     engine = Connector.get_engine()
     meta.metadata.drop_all(engine)
     with Connector.get_connection() as connection:
         inspector = Inspector.from_engine(connection)
         # Drop vectors
         tables = inspector.get_table_names(
             schema=settings.NIAMOTO_VECTOR_SCHEMA)
         for tb in tables:
             connection.execute("DROP TABLE IF EXISTS {} CASCADE;".format(
                 "{}.{}".format(settings.NIAMOTO_VECTOR_SCHEMA, tb)))
         # Drop rasters
         tables = inspector.get_table_names(
             schema=settings.NIAMOTO_RASTER_SCHEMA)
         for tb in tables:
             connection.execute("DROP TABLE IF EXISTS {} CASCADE;".format(
                 "{}.{}".format(settings.NIAMOTO_RASTER_SCHEMA, tb)))
         # Drop fact tables
         tables = inspector.get_table_names(
             schema=settings.NIAMOTO_FACT_TABLES_SCHEMA)
         for tb in tables:
             connection.execute("DROP TABLE IF EXISTS {} CASCADE;".format(
                 "{}.{}".format(settings.NIAMOTO_FACT_TABLES_SCHEMA, tb)))
         # Drop dimensions
         tables = inspector.get_table_names(
             schema=settings.NIAMOTO_DIMENSIONS_SCHEMA)
         for tb in tables:
             connection.execute("DROP TABLE IF EXISTS {} CASCADE;".format(
                 "{}.{}".format(settings.NIAMOTO_DIMENSIONS_SCHEMA, tb)))
         # Drop SDMs
         tables = inspector.get_table_names(
             schema=settings.NIAMOTO_SSDM_SCHEMA)
         for tb in tables:
             connection.execute("DROP TABLE IF EXISTS {} CASCADE;".format(
                 "{}.{}".format(settings.NIAMOTO_SSDM_SCHEMA, tb)))
     super(BaseTestNiamotoSchemaCreated, cls).tearDownClass()
Esempio n. 12
0
 def get_geometry_column(cls, vector_name):
     """
     Find the geometry column of a raster, inspecting the geometry_columns
     table. Assume that there is a single geometry column, if several are
     queried return the first one.
     :return: The geometry column name, type and srid (name, type, srid).
     """
     with Connector.get_connection() as connection:
         cls.assert_vector_exists(vector_name, connection)
         sql = \
             """
             SELECT f_geometry_column,
                 type,
                 srid
             FROM public.geometry_columns
             WHERE f_table_schema = '{}'
                 AND f_table_name = '{}'
             LIMIT 1;
             """.format(
                 settings.NIAMOTO_VECTOR_SCHEMA,
                 vector_name
             )
         result = connection.execute(sql)
         return result.fetchone()
 def test_get_insert_dataframe(self):
     data_provider_1 = TestDataProvider('test_data_provider_1')
     with Connector.get_connection() as connection:
         prov1 = BasePlotOccurrenceProvider(data_provider_1)
         df1 = prov1.get_niamoto_plot_occurrence_dataframe(connection)
     #  1. Nothing to insert
     data_1 = pd.DataFrame.from_records([
         {
             'plot_id': 1,
             'occurrence_id': 1,
             'occurrence_identifier': 'PLOT2_001',
         },
     ],
                                        index=['plot_id', 'occurrence_id'])
     reindexed_data_1 = prov1.get_reindexed_provider_dataframe(data_1)
     ins = prov1.get_insert_dataframe(df1, reindexed_data_1)
     self.assertEqual(len(ins), 0)
     # 2. Everything to insert
     data_2 = pd.DataFrame.from_records([
         {
             'plot_id': 0,
             'occurrence_id': 1,
             'occurrence_identifier': 'PLOT1_002',
         },
         {
             'plot_id': 0,
             'occurrence_id': 2,
             'occurrence_identifier': 'PLOT1_003',
         },
         {
             'plot_id': 0,
             'occurrence_id': 5,
             'occurrence_identifier': 'PLOT1_003',
         },
     ],
                                        index=['plot_id', 'occurrence_id'])
     reindexed_data_2 = prov1.get_reindexed_provider_dataframe(data_2)
     ins = prov1.get_insert_dataframe(df1, reindexed_data_2)
     self.assertEqual(len(ins), 3)
     # 3. Partial insert
     data_3 = pd.DataFrame.from_records([
         {
             'plot_id': 0,
             'occurrence_id': 0,
             'occurrence_identifier': 'PLOT1_001',
         },
         {
             'plot_id': 0,
             'occurrence_id': 2,
             'occurrence_identifier': 'PLOT1_003',
         },
         {
             'plot_id': 0,
             'occurrence_id': 5,
             'occurrence_identifier': 'PLOT1_003',
         },
     ],
                                        index=['plot_id', 'occurrence_id'])
     reindexed_data_3 = prov1.get_reindexed_provider_dataframe(data_3)
     ins = prov1.get_insert_dataframe(df1, reindexed_data_3)
     self.assertEqual(len(ins), 2)
     self.assertEqual(list(ins['provider_occurrence_pk']), [2, 5])
Esempio n. 14
0
 def update_raster(cls,
                   name,
                   raster_file_path=None,
                   new_name=None,
                   tile_dimension=None,
                   register=False,
                   properties=None):
     """
     Update an existing raster in database and update it the Niamoto
     raster registry. Uses raster2pgsql command. The raster is cut in
     tiles, using the dimension tile_width x tile_width. All rasters
     are stored in the settings.NIAMOTO_RASTER_SCHEMA schema. c.f.
     https://postgis.net/docs/using_raster_dataman.html#RT_Raster_Loader
     for more details on raster2pgsql.
     :param name: The name of the raster. If None, the raster data won't
         be updated.
     :param raster_file_path: The path to the raster file.
     :param new_name: The new name of the raster (not changed if None).
     :param tile_dimension: The tile dimension (width, height), if None,
         tile dimension will be chosen automatically by PostGIS.
     :param register: Register the raster as a filesystem (out-db) raster.
         (-R option of raster2pgsql).
     :param properties: A dict of arbitrary properties.
     """
     cls.assert_raster_exists(name)
     if new_name is None:
         new_name = name
     else:
         cls.assert_raster_does_not_exist(new_name)
     if raster_file_path is not None:
         if not os.path.exists(raster_file_path):
             raise FileNotFoundError(
                 "The raster {} does not exist".format(raster_file_path))
         if tile_dimension is not None:
             dim = "{}x{}".format(tile_dimension[0], tile_dimension[1])
         else:
             dim = 'auto'
         os.environ["PGPASSWORD"] = settings.NIAMOTO_DATABASE["PASSWORD"]
         d = "-d"
         if new_name != name:
             d = "-c"
         tb = "{}.{}".format(cls.DB_SCHEMA, new_name)
         raster2pgsql_args = [
             "raster2pgsql",
             d,
             "-C",
             "-Y",
             '-t',
             dim,
             '-I',
             '-M',
             raster_file_path,
             tb,
         ]
         if register:
             raster2pgsql_args.append('-R')
         p1 = subprocess.Popen(raster2pgsql_args,
                               stdout=subprocess.PIPE,
                               stderr=subprocess.PIPE)
         with open(LOG_FILE, mode='a') as log_file:
             p2 = subprocess.call([
                 "psql",
                 "-q",
                 "-U",
                 settings.NIAMOTO_DATABASE["USER"],
                 "-h",
                 settings.NIAMOTO_DATABASE["HOST"],
                 "-p",
                 settings.NIAMOTO_DATABASE["PORT"],
                 "-d",
                 settings.NIAMOTO_DATABASE["NAME"],
                 "-w",
             ],
                                  stdin=p1.stdout,
                                  stdout=log_file,
                                  stderr=log_file)
             stdout, stderr = p1.communicate()
         if stderr:
             LOGGER.debug(stderr)
         os.environ["PGPASSWORD"] = ""
         if p2 != 0 or p1.returncode != 0:
             raise RuntimeError("raster import failed.")
     upd_values = {'name': new_name, 'date_update': datetime.now()}
     if properties is not None:
         upd_values['properties'] = properties
     upd = cls.REGISTRY_TABLE.update() \
         .values(upd_values)\
         .where(cls.REGISTRY_TABLE.c.name == name)
     with Connector.get_connection() as connection:
         connection.execute(upd)
         if new_name != name:
             if raster_file_path is not None:
                 connection.execute("DROP TABLE IF EXISTS {};".format(
                     "{}.{}".format(cls.DB_SCHEMA, name)))
             else:
                 connection.execute("ALTER TABLE {} RENAME TO {};".format(
                     '{}.{}'.format(cls.DB_SCHEMA, name),
                     '{}.{}'.format(cls.DB_SCHEMA, new_name)))
 def test_get_update_dataframe(self):
     data_provider_1 = TestDataProvider('test_data_provider_1')
     with Connector.get_connection() as connection:
         prov1 = BasePlotOccurrenceProvider(data_provider_1)
         df1 = prov1.get_niamoto_plot_occurrence_dataframe(connection)
     #  1. Nothing to update
     data_1 = pd.DataFrame.from_records([
         {
             'plot_id': 0,
             'occurrence_id': 1,
             'occurrence_identifier': 'PLOT1_002',
         },
     ],
                                        index=['plot_id', 'occurrence_id'])
     reindexed_data_1 = prov1.get_reindexed_provider_dataframe(data_1)
     upd = prov1.get_update_dataframe(df1, reindexed_data_1)
     self.assertEqual(len(upd), 0)
     # 2. Everything to update
     data_2 = pd.DataFrame.from_records([
         {
             'plot_id': 0,
             'occurrence_id': 0,
             'occurrence_identifier': 'PLOT1_002',
         },
         {
             'plot_id': 1,
             'occurrence_id': 2,
             'occurrence_identifier': 'PLOT1_003',
         },
         {
             'plot_id': 2,
             'occurrence_id': 5,
             'occurrence_identifier': 'PLOT1_003',
         },
     ],
                                        index=['plot_id', 'occurrence_id'])
     reindexed_data_2 = prov1.get_reindexed_provider_dataframe(data_2)
     upd = prov1.get_update_dataframe(df1, reindexed_data_2)
     self.assertEqual(len(upd), 3)
     # 3. Partial update
     data_3 = pd.DataFrame.from_records([
         {
             'plot_id': 0,
             'occurrence_id': 0,
             'occurrence_identifier': 'PLOT1_002',
         },
         {
             'plot_id': 1,
             'occurrence_id': 2,
             'occurrence_identifier': 'PLOT1_003',
         },
         {
             'plot_id': 1,
             'occurrence_id': 5,
             'occurrence_identifier': 'PLOT1_003',
         },
     ],
                                        index=['plot_id', 'occurrence_id'])
     reindexed_data_3 = prov1.get_reindexed_provider_dataframe(data_3)
     upd = prov1.get_update_dataframe(df1, reindexed_data_3)
     self.assertEqual(len(upd), 2)
     l = list(upd['occurrence_id'])
     l.sort()
     self.assertEqual(l, [0, 2])
Esempio n. 16
0
 def add_raster(cls,
                name,
                raster_file_path,
                tile_dimension=None,
                register=False,
                properties={},
                **kwargs):
     """
     Add a raster in database and register it the Niamoto raster registry.
     Uses raster2pgsql command. The raster is cut in tiles, using the
     dimension tile_width x tile_width. All rasters
     are stored in the settings.NIAMOTO_RASTER_SCHEMA schema. c.f.
     https://postgis.net/docs/using_raster_dataman.html#RT_Raster_Loader
     for more details on raster2pgsql.
     :param name: The name of the raster.
     :param raster_file_path: The path to the raster file.
     :param tile_dimension: The tile dimension (width, height), if None,
         tile dimension will be chosen automatically by PostGIS.
     :param register: Register the raster as a filesystem (out-db) raster.
         (-R option of raster2pgsql).
     :param properties: A dict of arbitrary properties.
     """
     if not os.path.exists(raster_file_path):
         raise FileNotFoundError(
             "The raster {} does not exist".format(raster_file_path))
     cls.assert_raster_does_not_exist(name)
     cls.assert_raster_schema_exists()
     if tile_dimension is not None:
         dim = "{}x{}".format(tile_dimension[0], tile_dimension[1])
     else:
         dim = 'auto'
     tb = "{}.{}".format(cls.DB_SCHEMA, name)
     os.environ["PGPASSWORD"] = settings.NIAMOTO_DATABASE["PASSWORD"]
     raster2pgsql_args = [
         "raster2pgsql",
         "-c",
         "-Y",
         '-C',
         '-t',
         dim,
         '-I',
         '-M',
         raster_file_path,
         tb,
     ]
     if register:
         raster2pgsql_args.append('-R')
     p1 = subprocess.Popen(raster2pgsql_args,
                           stdout=subprocess.PIPE,
                           stderr=subprocess.PIPE)
     with open(LOG_FILE, mode='a') as log_file:
         p2 = subprocess.call([
             "psql",
             "-q",
             "-U",
             settings.NIAMOTO_DATABASE["USER"],
             "-h",
             settings.NIAMOTO_DATABASE["HOST"],
             "-p",
             settings.NIAMOTO_DATABASE["PORT"],
             "-d",
             settings.NIAMOTO_DATABASE["NAME"],
             "-w",
         ],
                              stdin=p1.stdout,
                              stdout=log_file,
                              stderr=log_file)
         stdout, stderr = p1.communicate()
     if stderr:
         LOGGER.debug(stderr)
     if stdout:
         LOGGER.debug(stdout)
     os.environ["PGPASSWORD"] = ""
     if p2 != 0 or p1.returncode != 0:
         raise RuntimeError(
             "raster import failed, check the logs for more details.")
     values = {
         'name': name,
         'date_create': datetime.now(),
         'properties': properties,
     }
     values.update(kwargs)
     ins = cls.REGISTRY_TABLE.insert().values(values)
     with Connector.get_connection() as connection:
         connection.execute(ins)
Esempio n. 17
0
 def update_vector(cls,
                   name,
                   vector_file_path=None,
                   new_name=None,
                   properties=None):
     """
     Update an existing vector in database and update it the Niamoto
     vector registry. Uses ogr2ogr. All vectors are stored in the
     settings.NIAMOTO_RASTER_SCHEMA schema.
     :param name: The name of the vector.
     :param vector_file_path: The path to the vector file. If None, the
         vector data won't be updated.
     :param new_name: The new name of the vector (not changed if None).
     :param properties: A dict of arbitrary properties.
     """
     LOGGER.debug("VectorManager.update_vector({}, {}, new_name={})".format(
         name, vector_file_path, new_name))
     cls.assert_vector_exists(name)
     if new_name is None:
         new_name = name
     else:
         cls.assert_vector_does_not_exist(new_name)
     if vector_file_path is not None:
         if not os.path.exists(vector_file_path):
             raise FileNotFoundError(
                 "The vector {} does not exist".format(vector_file_path))
         with Connector.get_connection() as connection:
             connection.execute("DROP TABLE IF EXISTS {};".format(
                 "{}.{}".format(settings.NIAMOTO_VECTOR_SCHEMA, name)))
         tb = "{}.{}".format(settings.NIAMOTO_VECTOR_SCHEMA, new_name)
         pg_str = "dbname='{}' host='{}' port='{}' user='******' password='******'"
         pg_str = pg_str.format(
             settings.NIAMOTO_DATABASE["NAME"],
             settings.NIAMOTO_DATABASE["HOST"],
             settings.NIAMOTO_DATABASE["PORT"],
             settings.NIAMOTO_DATABASE["USER"],
             settings.NIAMOTO_DATABASE["PASSWORD"],
         )
         p = subprocess.Popen([
             "ogr2ogr",
             "-overwrite",
             "-lco",
             "SCHEMA={}".format(settings.NIAMOTO_VECTOR_SCHEMA),
             "-lco",
             "OVERWRITE=YES",
             '-f',
             'PostgreSQL',
             'PG:{}'.format(pg_str),
             '-nln',
             tb,
             '-nlt',
             'PROMOTE_TO_MULTI',
             vector_file_path,
         ],
                              stdout=subprocess.PIPE,
                              stderr=subprocess.PIPE)
         stdout, stderr = p.communicate()
         if stdout:
             LOGGER.debug(str(stdout))
         if stderr:
             LOGGER.debug(str(stderr))
         if p.returncode != 0:
             raise RuntimeError("vector import failed.")
     upd_values = {'name': new_name, 'date_update': datetime.now()}
     if properties is not None:
         upd_values['properties'] = properties
     upd = meta.vector_registry.update() \
         .values(upd_values)\
         .where(meta.vector_registry.c.name == name)
     with Connector.get_connection() as connection:
         if new_name != name:
             connection.execute(upd)
 def tearDown(self):
     del1 = niamoto_db_meta.data_provider.delete()
     with Connector.get_connection() as connection:
         connection.execute(del1)
 def sync(self,
          insert=True,
          update=True,
          delete=True,
          sync_occurrence=True,
          sync_plot=True,
          sync_plot_occurrence=True):
     """
     Sync Niamoto database with providers data.
     :param insert: if False, skip insert operation.
     :param update: if False, skip update operation.
     :param delete: if False, skip delete operation.
     :param sync_occurrence: if False, skip occurrence sync.
     :param sync_plot: if False, skip plot sync.
     :param sync_plot_occurrence: if skip plot-occurrence sync.
     :return A dict containing the insert / update / delete dataframes for
     each specialized provider:
         {
             'occurrence': {
                 'insert': insert_df,
                 'update': update_df,
                 "delete': delete_df,
             },
             'plot': { ... },
             'plot_occurrence': { ... },
         }
     """
     t = time.time()
     LOGGER.debug("\r" + "-" * 80)
     LOGGER.info("*** Data sync starting ('{}' - {})...".format(
         self.name, self.get_type_name()))
     with Connector.get_connection() as connection:
         with connection.begin():
             i1, u1, d1 = self.occurrence_provider.sync(
                 connection,
                 insert=insert,
                 update=update,
                 delete=delete,
             ) if sync_occurrence else ([], [], [])
             i2, u2, d2 = self.plot_provider.sync(
                 connection,
                 insert=insert,
                 update=update,
                 delete=delete,
             ) if sync_plot else ([], [], [])
         with connection.begin():
             i3, u3, d3 = self.plot_occurrence_provider.sync(
                 connection,
                 insert=insert,
                 update=update,
                 delete=delete,
             ) if sync_plot_occurrence else ([], [], [])
         upd = niamoto_db_meta.data_provider.update().values({
             'last_sync':
             datetime.now(),
         }).where(niamoto_db_meta.data_provider.c.name == self.name)
         connection.execute(upd)
         m = "*** Data sync with '{}' done (total time: {:.2f} s)!"
         LOGGER.info(m.format(self.name, time.time() - t))
         LOGGER.debug("\r" + "-" * 80)
         return {
             'occurrence': {
                 'insert': i1,
                 'update': u1,
                 'delete': d1,
             },
             'plot': {
                 'insert': i2,
                 'update': u2,
                 'delete': d2,
             },
             'plot_occurrence': {
                 'insert': i3,
                 'update': u3,
                 'delete': d3,
             },
         }
Esempio n. 20
0
 def get_vector_geo_dataframe(cls,
                              vector_name,
                              geojson_filter=None,
                              geojson_cut=False):
     """
     Return a registered vector as a GeoDataFrame.
     :param vector_name: The name of the vector.
     :param geojson_filter: Optional: if specified (as str), will only
         return the features intersecting with the geojson.
     :param geojson_cut: If True, return the intersection with the geojson
         filter (cut the intersecting features).
     :return: A GeoDataFrame corresponding to the vector.
     """
     geom_col = cls.get_geometry_column(vector_name)
     pk_cols = cls.get_vector_primary_key_columns(vector_name)
     cols = cls.get_vector_sqlalchemy_table(vector_name).columns
     cols_str = [c.name for c in cols]
     where_statement = ''
     if geojson_filter is not None:
         geojson_postgis = \
             """
                 ST_Transform(
                     ST_SetSRID(
                         ST_MakeValid(ST_GeomFromGeoJSON('{}')),
                         4326
                     ),
                     {}
                 )
             """.format(geojson_filter, geom_col[2])
         where_statement = \
             """
                 WHERE ST_Intersects(
                     {},
                     {}
                 )
             """.format(
                 geom_col[0],
                 geojson_postgis
             )
         if geojson_cut:
             cols_str = []
             for col in cols:
                 if col.name == geom_col[0]:
                     cols_str.append("ST_Intersection({}, {}) AS {}".format(
                         geom_col[0],
                         geojson_postgis,
                         col.name,
                     ))
                 else:
                     cols_str.append(col.name)
     with Connector.get_connection() as connection:
         sql = "SELECT {} FROM {}.{} {};".format(
             ','.join(cols_str),
             settings.NIAMOTO_VECTOR_SCHEMA,
             vector_name,
             where_statement,
         )
         return gpd.read_postgis(
             sql,
             connection,
             index_col=[i[0] for i in pk_cols],
             geom_col=geom_col[0],
             crs='+init=epgs:{}'.format(geom_col[2]),
         )
 def test_get_delete_dataframe(self):
     data_provider_1 = TestDataProvider('test_data_provider_1')
     with Connector.get_connection() as connection:
         pp1 = BasePlotProvider(data_provider_1)
         df1 = pp1.get_niamoto_plot_dataframe(connection)
     #  1. Nothing to delete
     plot_1 = pd.DataFrame.from_records([
         {
             'id': 0,
             'name': 'plot_1_1',
             'location': from_shape(Point(166.5521, -22.0939), srid=4326),
             'properties': '{}',
         },
         {
             'id': 1,
             'name': 'plot_1_2',
             'location': from_shape(Point(166.551, -22.098), srid=4326),
             'properties': '{}',
         },
         {
             'id': 2,
             'location': from_shape(Point(166.552, -22.097), srid=4326),
             'properties': '{}',
         },
         {
             'id': 5,
             'location': from_shape(Point(166.553, -22.099), srid=4326),
             'properties': '{}',
         },
     ],
                                        index='id')
     delete_df = pp1.get_delete_dataframe(df1, plot_1)
     self.assertIn('provider_pk', delete_df.columns)
     self.assertIn('provider_id', delete_df.columns)
     self.assertEqual(len(delete_df[pd.isnull(delete_df['provider_pk'])]),
                      0)
     self.assertEqual(len(delete_df[pd.isnull(delete_df['provider_id'])]),
                      0)
     self.assertEqual(len(delete_df), 0)
     self.assertEqual(len(delete_df[pd.isnull(delete_df['location'])]), 0)
     #  2. Everything to delete
     plot_2 = pd.DataFrame.from_records([
         {
             'id': 10,
             'name': 'plot_1_11',
             'location': from_shape(Point(166.5521, -22.0939), srid=4326),
             'properties': '{}',
         },
     ],
                                        index='id')
     delete_df = pp1.get_delete_dataframe(df1, plot_2)
     self.assertEqual(len(delete_df), 4)
     self.assertEqual(len(delete_df[pd.isnull(delete_df['location'])]), 0)
     #  3. Partial delete
     plot_3 = pd.DataFrame.from_records([
         {
             'id': 0,
             'name': 'plot_1_1',
             'location': from_shape(Point(166.5521, -22.0939), srid=4326),
             'properties': '{}',
         },
     ],
                                        index='id')
     delete_df = pp1.get_delete_dataframe(df1, plot_3)
     self.assertEqual(len(delete_df), 3)
     self.assertEqual(len(delete_df[pd.isnull(delete_df['location'])]), 0)
 def test_publish_to_postgis(self):
     CsvDataProvider.register_data_provider('csv_provider')
     csv_provider = CsvDataProvider(
         'csv_provider',
         occurrence_csv_path=TEST_OCCURRENCE_CSV,
     )
     csv_provider.sync()
     with Connector.get_connection() as connection:
         sel = select([
             meta.occurrence.c.id.label('id'),
             meta.occurrence.c.taxon_id.label('taxon_id'),
             cast(meta.taxon.c.rank.label('rank'), String).label('rank'),
             meta.taxon.c.full_name.label('full_name'),
             cast(meta.occurrence.c.location, String).label('location'),
         ]).select_from(
             meta.occurrence.outerjoin(
                 meta.taxon, meta.taxon.c.id == meta.occurrence.c.taxon_id))
         df = gpd.read_postgis(sel,
                               connection,
                               index_col='id',
                               geom_col='location',
                               crs='+init=epsg:4326')
         BaseDataPublisher._publish_sql(df,
                                        'test_export_postgis',
                                        schema='niamoto')
         engine = Connector.get_engine()
         inspector = Inspector.from_engine(engine)
         self.assertIn(
             'test_export_postgis',
             inspector.get_table_names(schema=settings.NIAMOTO_SCHEMA),
         )
         df2 = gpd.read_postgis(sel,
                                connection,
                                index_col='id',
                                geom_col='location',
                                crs={'init': 'epsg:4326'})
         BaseDataPublisher._publish_sql(
             df2,
             'test_export_postgis',
             schema='niamoto',
             if_exists='truncate',
             truncate_cascade=True,
         )
         # Test geometry types
         polygon = Polygon([(0, 0), (1, 0), (1, 1)])
         linestring = LineString([(0, 0), (0, 1), (1, 1)])
         multipoint = MultiPoint([(1, 2), (3, 4), (5, 6)])
         multilinestring = MultiLineString([[(1, 2), (3, 4), (5, 6)],
                                            [(7, 8), (9, 10)]])
         polygon_2 = Polygon([(1, 1), (1, -1), (-1, -1), (-1, 1)],
                             [[(.5, .5), (.5, -.5), (-.5, -.5), (-.5, .5)]])
         multipolygon = MultiPolygon([polygon, polygon_2])
         geometry = GeometryCollection([polygon, polygon_2])
         BaseDataPublisher._publish_sql(
             gpd.GeoDataFrame([{
                 'A': 1,
                 'geom': polygon
             }], geometry='geom'),
             'test_export_postgis',
             schema='niamoto',
             if_exists='replace',
         )
         BaseDataPublisher._publish_sql(
             gpd.GeoDataFrame([{
                 'A': 1,
                 'geom': linestring
             }],
                              geometry='geom'),
             'test_export_postgis',
             schema='niamoto',
             if_exists='replace',
         )
         BaseDataPublisher._publish_sql(
             gpd.GeoDataFrame([{
                 'A': 1,
                 'geom': multilinestring
             }],
                              geometry='geom'),
             'test_export_postgis',
             schema='niamoto',
             if_exists='replace',
         )
         BaseDataPublisher._publish_sql(
             gpd.GeoDataFrame([{
                 'A': 1,
                 'geom': multipoint
             }],
                              geometry='geom'),
             'test_export_postgis',
             schema='niamoto',
             if_exists='replace',
         )
         BaseDataPublisher._publish_sql(
             gpd.GeoDataFrame([{
                 'A': 1,
                 'geom': multipolygon
             }],
                              geometry='geom'),
             'test_export_postgis',
             schema='niamoto',
             if_exists='replace',
         )
         BaseDataPublisher._publish_sql(
             gpd.GeoDataFrame([{
                 'A': 1,
                 'geom': geometry
             }], geometry='geom'),
             'test_export_postgis',
             schema='niamoto',
             if_exists='replace',
         )
         BaseDataPublisher._publish_sql(
             gpd.GeoDataFrame([{
                 'A': 1,
                 'geom': geometry
             }], geometry='geom'),
             'TEST123',
             schema='niamoto',
             if_exists='replace',
         )
         BaseDataPublisher._publish_sql(
             gpd.GeoSeries([polygon]),
             'test_export_postgis',
             schema='niamoto',
             if_exists='replace',
         )
         self.assertRaises(
             ValueError,
             BaseDataPublisher._publish_sql,
             gpd.GeoSeries([polygon]),
             'test_export_postgis',
             schema='niamoto',
             if_exists='thisisnotallowed',
         )
Esempio n. 23
0
 def set_taxonomy(cls, taxon_dataframe):
     """
     Set the taxonomy. If a taxonomy already exist, delete it and set the
     new one.
     :param taxon_dataframe: A dataframe containing the taxonomy to set.
     The dataframe must contain at least the following columns:
         taxon_id  -> The id of the taxon (must be the index of the
                      DataFrame).
         parent_id -> The id of the taxon's parent.
         rank      -> The rank of the taxon (must be a value in:
                      'REGNUM', 'PHYLUM', 'CLASSIS', 'ORDO', 'FAMILIA',
                      'GENUS', 'SPECIES', 'INFRASPECIES')
         full_name -> The full name of the taxon.
         rank_name -> The rank name of the taxon.
     The remaining columns will be stored as synonyms, using the column
     name.
     """
     LOGGER.debug("Starting set_taxonomy...")
     required_columns = {'parent_id', 'rank', 'full_name', 'rank_name'}
     cols = set(list(taxon_dataframe.columns))
     inter = cols.intersection(required_columns)
     synonym_cols = cols.difference(required_columns)
     if cls.IDENTITY_SYNONYM_KEY in synonym_cols:
         synonym_cols = synonym_cols.difference({cls.IDENTITY_SYNONYM_KEY})
         m = "The '{}' synonym key is a special key reserved by Niamoto," \
             "this column will be ignored."
         LOGGER.warning(m.format(cls.IDENTITY_SYNONYM_KEY))
     if not inter == required_columns:
         m = "The taxon dataframe does not contains the required " \
             "columns {}, csv has: {}".format(required_columns, cols)
         raise MalformedDataSourceError(m)
     if len(synonym_cols) > 0:
         LOGGER.debug(
             "The following synonym keys had been detected: {}".format(
                 ','.join(synonym_cols)))
         synonyms = taxon_dataframe[list(synonym_cols)].apply(
             lambda x: x.to_json(), axis=1)
     else:
         LOGGER.debug("No synonym keys had been detected.")
         synonyms = '{}'
     taxon_dataframe.drop(synonym_cols, axis=1, inplace=True)
     taxon_dataframe['synonyms'] = synonyms
     mptt = ['mptt_tree_id', 'mptt_depth', 'mptt_left', 'mptt_right']
     for col in mptt:
         taxon_dataframe[col] = 0
     taxon_dataframe = cls.construct_mptt(taxon_dataframe)
     taxon_dataframe['taxon_id'] = taxon_dataframe.index
     taxon_dataframe = taxon_dataframe.astype(object).where(
         pd.notnull(taxon_dataframe), None)
     with Connector.get_connection() as connection:
         current_synonym_keys = set(
             pd.read_sql(select([meta.synonym_key_registry.c.name]),
                         connection)['name'])
         to_add = synonym_cols.difference(current_synonym_keys)
         to_delete = current_synonym_keys.difference(
             synonym_cols).difference({cls.IDENTITY_SYNONYM_KEY})
         to_keep = current_synonym_keys.intersection(synonym_cols)
         if len(to_delete) > 0:
             prov = meta.data_provider
             syno = meta.synonym_key_registry
             providers_to_update = pd.read_sql(
                 select([prov.c.name]).select_from(
                     prov.join(syno,
                               syno.c.id == prov.c.synonym_key_id)).where(
                                   syno.c.name.in_(to_delete)), connection)
             msg = "The following synonym keys will be deleted: {}. " \
                   "The following data providers where depending on those" \
                   " synonym keys: {} Please consider updating " \
                   "them, or updating the taxonomy."
             LOGGER.warning(
                 msg.format(to_delete, set(providers_to_update['name'])))
         with connection.begin():
             connection.execute("SET CONSTRAINTS ALL DEFERRED;")
             # Unregister synonym keys
             cls.unregister_all_synonym_keys(
                 bind=connection,
                 exclude=to_keep,
             )
             # Delete existing taxonomy
             cls.delete_all_taxa(bind=connection)
             # Register synonym cols
             for synonym_key in to_add:
                 cls.register_synonym_key(synonym_key, bind=connection)
             # Insert the data
             LOGGER.debug("Inserting the taxonomy in database...")
             if len(taxon_dataframe) > 0:
                 ins = meta.taxon.insert().values(
                     id=bindparam('taxon_id'),
                     full_name=bindparam('full_name'),
                     rank_name=bindparam('rank_name'),
                     rank=bindparam('rank'),
                     parent_id=bindparam('parent_id'),
                     synonyms=cast(bindparam('synonyms'), JSONB),
                     mptt_left=bindparam('mptt_left'),
                     mptt_right=bindparam('mptt_right'),
                     mptt_tree_id=bindparam('mptt_tree_id'),
                     mptt_depth=bindparam('mptt_depth'),
                 )
                 result = connection.execute(
                     ins,
                     taxon_dataframe.to_dict(orient='records')).rowcount
             else:
                 result = 0
             m = "The taxonomy had been successfully set ({} taxa " \
                 "inserted)!"
             LOGGER.debug(m.format(result))
     return result, synonym_cols
Esempio n. 24
0
 def get_reindexed_provider_dataframe(self, dataframe):
     """
     :param dataframe: The provider's DataFrame, or a subset, with index
     being a multi-index composed with
     [plot_id, occurrence_id] (provider's ids).
     :return: The dataframe reindexed:
         provider_plot_pk -> plot_id
         provider_occurrence_pk -> occurrence_id
     """
     LOGGER.debug("reindexing provider's plot-occurrence dataframe...")
     if len(dataframe) == 0:
         return dataframe
     # Set index names
     dataframe.index.set_names(
         ['provider_plot_pk', 'provider_occurrence_pk'],
         inplace=True
     )
     with Connector.get_connection() as connection:
         sel_plot = select([
             plot.c.id.label('plot_id'),
             plot.c.provider_pk.label('provider_plot_pk')
         ]).where(plot.c.provider_id == self.data_provider.db_id)
         sel_occ = select([
             occurrence.c.id.label('occurrence_id'),
             occurrence.c.provider_pk.label('provider_occurrence_pk')
         ]).where(occurrence.c.provider_id == self.data_provider.db_id)
         plot_ids = pd.read_sql(
             sel_plot,
             connection,
             index_col='provider_plot_pk'
         )
         occ_ids = pd.read_sql(
             sel_occ,
             connection,
             index_col='provider_occurrence_pk'
         )
     dataframe.reset_index(inplace=True)
     # Assert plots and occurrences exist in db
     plot_pks = pd.Index(pd.unique(dataframe['provider_plot_pk']))
     plot_not_in_db = plot_pks.difference(plot_ids.index)
     occ_pks = pd.Index(pd.unique(dataframe['provider_occurrence_pk']))
     occ_not_in_db = occ_pks.difference(occ_ids.index)
     if len(plot_not_in_db) != 0:
         raise IncoherentDatabaseStateError(
             "Tried to insert plot_occurrence records with plot records "
             "that do not exist in database."
         )
     if len(occ_not_in_db) != 0:
         raise IncoherentDatabaseStateError(
             "Tried to insert plot_occurrence records with occurrence "
             "records that dot not exist in database."
         )
     dataframe = dataframe.merge(
         plot_ids,
         left_on='provider_plot_pk',
         right_index=True,
     )
     dataframe = dataframe.merge(
         occ_ids,
         left_on='provider_occurrence_pk',
         right_index=True,
     )
     dataframe.set_index(['plot_id', 'occurrence_id'], inplace=True)
     dataframe['provider_id'] = self.data_provider.db_id
     dataframe['plot_id'] = dataframe.index.get_level_values('plot_id')
     dataframe['occurrence_id'] = dataframe.index.get_level_values(
         'occurrence_id'
     )
     return dataframe
Esempio n. 25
0
 def test_get_insert_dataframe(self):
     data_provider_1 = TestDataProvider('test_data_provider_1')
     with Connector.get_connection() as connection:
         op1 = BaseOccurrenceProvider(data_provider_1)
         df1 = op1.get_niamoto_occurrence_dataframe(connection)
     #  1. Nothing to insert
     occ_1 = pd.DataFrame.from_records([
         {
             'id': 0,
             'taxon_id': None,
             'provider_taxon_id': None,
             'location': from_shape(Point(166.5521, -22.0939), srid=4326),
             'properties': '{}',
         },
     ],
                                       index='id')
     ins = op1.get_insert_dataframe(df1, occ_1)
     self.assertEqual(len(ins), 0)
     # 2. Everything to insert
     occ_2 = pd.DataFrame.from_records([
         {
             'id': 10,
             'taxon_id': None,
             'provider_taxon_id': None,
             'location': from_shape(Point(166.5521, -22.0939), srid=4326),
             'properties': '{}',
         },
         {
             'id': 11,
             'taxon_id': None,
             'provider_taxon_id': None,
             'location': from_shape(Point(166.551, -22.098), srid=4326),
             'properties': '{}',
         },
     ],
                                       index='id')
     ins = op1.get_insert_dataframe(df1, occ_2)
     self.assertIn('provider_pk', ins.columns)
     self.assertIn('provider_id', ins.columns)
     self.assertEqual(len(ins[pd.isnull(ins['provider_pk'])]), 0)
     self.assertEqual(len(ins[pd.isnull(ins['provider_id'])]), 0)
     self.assertEqual(len(ins), 2)
     # 3. Partial insert
     occ_3 = pd.DataFrame.from_records([
         {
             'id': 0,
             'taxon_id': None,
             'provider_taxon_id': None,
             'location': from_shape(Point(166.5521, -22.0939), srid=4326),
             'properties': '{}',
         },
         {
             'id': 11,
             'taxon_id': None,
             'provider_taxon_id': None,
             'location': from_shape(Point(166.551, -22.098), srid=4326),
             'properties': '{}',
         },
     ],
                                       index='id')
     ins = op1.get_insert_dataframe(df1, occ_3)
     self.assertEqual(len(ins), 1)