def wrapper(*args, **kwargs): root_dir = os.path.dirname(os.path.abspath(__file__)) info = inspect.getframeinfo(inspect.stack()[1][0]) logger.debug('Run {} (called by file "{}", line {}, in {})'.format( f.__qualname__, info.filename.replace(root_dir, '.'), info.lineno, info.function)) try: return f(*args, **kwargs) except Exception as e: logger.exception(e) if isinstance(e, timeout_decorator.TimeoutError): raise CkanTimeoutError if self.is_ignored(e): return f(*args, **kwargs) if e.__class__.__qualname__ == 'ValidationError': try: err = e.error_dict del err['__type'] msg = ', '.join([ '"{0}" {1}'.format( k, isinstance(v, list) and ', '.join(v) or v) for k, v in err.items() ]) except Exception as e: msg = e.__str__() raise ValidationError(msg) if e.__str__() in ('Indisponible', 'Not Found'): raise CkanNotFoundError raise CkanSyncingError(e.__str__())
def get_epsg(obj): epsg = None if obj.srs: try: epsg = obj.srs.identify_epsg() except SRSException: pass except Exception as e: logger.exception(e) raise e # else: if not epsg: if obj.srs.projected \ and obj.srs.auth_name('PROJCS') == 'EPSG': epsg = obj.srs.auth_code('PROJCS') if obj.srs.geographic \ and obj.srs.auth_name('GEOGCS') == 'EPSG': epsg = obj.srs.auth_code('GEOGCS') if not epsg: epsg = retreive_epsg_through_proj4(obj.srs.proj4) if not epsg: epsg = retreive_epsg_through_regex(obj.srs.name) if not epsg: logger.warning('Unable to determine SRS') raise NotFoundSrsError('SRS Not found') return epsg
def get_content_header_param(txt, param): try: found = re.search('{0}="?([^;"\n\r\t\0\s\X\R\v]+)"?'.format(param), txt) except Exception as e: logger.exception(e) return None else: if found: return found.groups()[0]
def drop_table(table, schema=SCHEMA): sql = 'DROP TABLE {schema}."{table}";'.format(schema=schema, table=table) with connections[DATABASE].cursor() as cursor: try: cursor.execute(sql) except Exception as e: logger.exception(e) if e.__class__.__qualname__ != 'ProgrammingError': raise e cursor.close()
def get_proj4s(): sql = '''SELECT auth_srid, proj4text FROM public.spatial_ref_sys;''' with connections[DATABASE].cursor() as cursor: try: cursor.execute(sql) except Exception as e: logger.exception(e) if e.__class__.__qualname__ != 'ProgrammingError': raise e records = cursor.fetchall() cursor.close() return records
def is_valid_epsg(code): sql = '''SELECT * FROM public.spatial_ref_sys WHERE auth_srid = '{}';'''.format( code) with connections[DATABASE].cursor() as cursor: try: cursor.execute(sql) except Exception as e: logger.exception(e) if e.__class__.__qualname__ != 'ProgrammingError': raise e records = cursor.fetchall() cursor.close() return len(records) == 1
def download(url, media_root, **kwargs): def get_content_header_param(txt, param): try: found = re.search('{0}="?([^;"\n\r\t\0\s\X\R\v]+)"?'.format(param), txt) except Exception as e: logger.exception(e) return None else: if found: return found.groups()[0] max_size = kwargs.get('max_size') for i in range(0, 10): # Try at least ten times before raise try: r = requests.get(url, stream=True) except Exception as e: logger.exception(e) error = e continue else: break else: raise error r.raise_for_status() if int(r.headers.get('Content-Length', 0)) > max_size: raise SizeLimitExceededError(max_size=max_size) directory = create_dir(media_root) filename = os.path.join( directory, get_content_header_param(r.headers.get('Content-Disposition'), 'filename') or urlparse(url).path.split('/')[-1] or 'file') # TODO(@m431m) -> https://github.com/django/django/blob/3c447b108ac70757001171f7a4791f493880bf5b/docs/topics/files.txt#L120 with open(filename, 'wb') as f: for chunk in r.iter_content(chunk_size=1024): if chunk: f.write(chunk) if os.fstat(f.fileno()).st_size > max_size: remove_dir(directory) raise SizeLimitExceededError(max_size=max_size) return directory, filename, r.headers.get('Content-Type')
def transform(wkt, epsg_in, epsg_out=4171): sql = ''' SELECT ST_AsText(ST_Transform(ST_GeomFromText('{wkt}', {epsg_in}), {epsg_out})) AS wkt; '''.format(wkt=wkt, epsg_in=epsg_in, epsg_out=epsg_out) with connections[DATABASE].cursor() as cursor: try: cursor.execute(sql) except Exception as e: logger.exception(e) if e.__class__.__qualname__ != 'ProgrammingError': raise e else: records = cursor.fetchall() cursor.close() return records[0][0]
def rename_table(table, name, schema=SCHEMA): sql = ''' ALTER TABLE IF EXISTS "{table}" RENAME TO "{name}"; ALTER INDEX IF EXISTS "{table}_pkey" RENAME TO "{name}_pkey"; ALTER INDEX IF EXISTS "{table}_fid" RENAME TO "{name}_fid"; ALTER INDEX IF EXISTS "{table}_gix" RENAME TO "{name}_gix"; '''.format(schema=schema, table=table, name=name) with connections[DATABASE].cursor() as cursor: try: cursor.execute(sql) except Exception as e: logger.exception(e) if e.__class__.__qualname__ != 'ProgrammingError': raise e cursor.close()
def wrapper(*args, **kwargs): root_dir = os.path.dirname(os.path.abspath(__file__)) info = inspect.getframeinfo(inspect.stack()[1][0]) logger.debug( 'Run {} (called by file "{}", line {}, in {})'.format( f.__qualname__, info.filename.replace(root_dir, '.'), info.lineno, info.function)) try: return f(*args, **kwargs) except Exception as e: logger.exception(e) if isinstance(e, timeout_decorator.TimeoutError): raise DcatTimeoutError if self.is_ignored(e): return f(*args, **kwargs) raise DcatError("Une erreur critique est survenue lors de l'appel au DCAT distant.")
def intersect(geojson1, geojson2): sql = ''' SELECT ST_AsGeoJSON(ST_Intersection( ST_GeomFromGeoJSON('{geojson1}'), ST_GeomFromGeoJSON('{geojson2}'))) AS geojson; '''.format(geojson1=geojson1, geojson2=geojson2) with connections[DATABASE].cursor() as cursor: try: cursor.execute(sql) except Exception as e: logger.exception(e) if e.__class__.__qualname__ == 'TopologyException': raise SQLError() if e.__class__.__qualname__ != 'ProgrammingError': raise e else: records = cursor.fetchall() cursor.close() return json.loads(records[0][0])
def get_extent(tables, schema='public'): if not tables: return None sub = 'SELECT {the_geom} as the_geom FROM {schema}."{table}"' sql = 'WITH all_geoms AS ({}) SELECT geometry(ST_Extent(the_geom)) FROM all_geoms;'.format( ' UNION '.join([ sub.format(table=table, the_geom=THE_GEOM, schema=schema) for table in tables ])) with connections[DATABASE].cursor() as cursor: try: cursor.execute(sql) except Exception as e: logger.exception(e) if e.__class__.__qualname__ != 'ProgrammingError': raise e records = cursor.fetchall() cursor.close() try: return records[0][0] except Exception: return None
def save(self, *args, harvest=True, **kwargs): Category = apps.get_model(app_label='idgo_admin', model_name='Category') Dataset = apps.get_model(app_label='idgo_admin', model_name='Dataset') License = apps.get_model(app_label='idgo_admin', model_name='License') Resource = apps.get_model(app_label='idgo_admin', model_name='Resource') ResourceFormats = apps.get_model(app_label='idgo_admin', model_name='ResourceFormats') # (1) Supprimer les jeux de données qui ne sont plus synchronisés previous = self.pk and RemoteCsw.objects.get(pk=self.pk) if previous: for dataset in Dataset.harvested_csw.filter( remote_instance=previous): dataset.delete() else: # Dans le cas d'une création, on vérifie si l'URL CSW est valide try: with CswBaseHandler(self.url): pass except CswBaseError as e: raise ValidationError(e.__str__(), code='url') # (2) Sauver l'instance super().save(*args, **kwargs) # (3) Créer/Mettre à jour les jeux de données synchronisés # On récupère dans le `stack` l'utilisateur effectuant l'opération editor = None for entry in inspect.stack(): try: editor = entry[0].f_locals['request'].user._wrapped except (KeyError, AttributeError): continue break if not previous: return if harvest: # Puis on moissonne le catalogue try: ckan_ids = [] geonet_ids = [] with transaction.atomic(): with CswBaseHandler(self.url) as csw: packages = csw.get_packages( xml=self.getrecords or None) for package in packages: if not package['type'] == 'dataset': continue geonet_id = package['id'] update_frequency = dict(Dataset.FREQUENCY_CHOICES).get( package.get('frequency'), 'unknown') update_frequency = package.get('frequency') if not (update_frequency and update_frequency in dict( Dataset.FREQUENCY_CHOICES).keys()): update_frequency = 'unknown' date_creation = package.get('dataset_creation_date', None) if date_creation: try: date_creation = datetime.strptime( date_creation, ISOFORMAT_DATE) except ValueError as e: logger.warning(e) date_creation = None date_modification = package.get( 'dataset_modification_date', None) if date_modification: try: date_modification = datetime.strptime( date_modification, ISOFORMAT_DATE) except ValueError as e: logger.warning(e) date_modification = None date_publication = package.get( 'dataset_publication_date', None) if date_publication: try: date_publication = datetime.strptime( date_publication, ISOFORMAT_DATE) except ValueError as e: logger.warning(e) date_publication = None # Licence license_titles = package.get('license_titles') filters = [ Q(slug__in=license_titles), Q(title__in=license_titles), Q(alternate_titles__overlap=license_titles), ] license = License.objects.filter(reduce( ior, filters)).distinct().first() if not license: try: license = License.objects.get( slug=settings.DEFAULTS_VALUES.get( 'LICENSE')) except License.DoesNotExist: license = License.objects.first() # On pousse la fiche de MD dans Geonet if not geonet.get_record(geonet_id): try: geonet.create_record(geonet_id, package['xml']) except Exception as e: logger.warning( 'La création de la fiche de métadonnées a échoué.' ) logger.error(e) else: geonet_ids.append(geonet_id) geonet.publish( geonet_id) # Toujours publier la fiche else: try: geonet.update_record(geonet_id, package['xml']) except Exception as e: logger.warning( 'La mise à jour de la fiche de métadonnées a échoué.' ) logger.error(e) slug = 'sync{}-{}'.format( str(uuid.uuid4())[:7].lower(), slugify(geonet_id))[:100] kvp = { 'slug': slug, 'title': package.get('title'), 'description': package.get('notes'), 'date_creation': date_creation and date_creation.date(), 'date_modification': date_modification and date_modification.date(), 'date_publication': date_publication and date_publication.date(), 'editor': editor, 'license': license, 'owner_email': self.organisation.email or DEFAULT_CONTACT_EMAIL, 'owner_name': self.organisation.legal_name or DEFAULT_PLATFORM_NAME, 'organisation': self.organisation, 'published': not package.get('private'), 'remote_instance': self, 'remote_dataset': geonet_id, 'update_frequency': update_frequency, 'bbox': package.get('bbox'), # broadcaster_email # broadcaster_name # data_type # geocover 'geonet_id': geonet_id, # granularity # thumbnail # support } dataset, created = Dataset.harvested_csw.update_or_create( **kvp) if created: ckan_ids.append(dataset.ckan_id) categories_name = [ m['name'] for m in package.get('groups', []) ] iso_topic_reverse = dict( (v, k) for k, v in Category._meta.fields[5].choices) filters = [ Q(slug__in=categories_name), Q(name__in=categories_name), Q(iso_topic__in=[ m['name'] for m in package.get('groups', []) ]), Q(iso_topic__in=[ iso_topic_reverse.get(name) for name in categories_name ]), Q(alternate_titles__overlap=categories_name), ] categories = Category.objects.filter( reduce(ior, filters)).distinct() if categories: dataset.categories.set(categories, clear=True) if not created: dataset.keywords.clear() keywords = [ tag['display_name'] for tag in package.get('tags') ] dataset.keywords.add(*keywords) dataset.save(current_user=None, synchronize=True, activate=False) for resource in package.get('resources', []): try: ckan_id = uuid.uuid4() except ValueError as e: logger.exception(e) logger.error( "I can't crash here, so I do not pay any attention to this error." ) continue filters = [] protocol = resource.get('protocol') protocol and filters.append(Q(protocol=protocol)) mimetype = resource.get('mimetype') mimetype and filters.append( Q(mimetype__overlap=[mimetype])) try: format_type = ResourceFormats.objects.get( reduce(iand, filters)) except (ResourceFormats.MultipleObjectsReturned, ResourceFormats.DoesNotExist, TypeError): format_type = None kvp = { 'ckan_id': ckan_id, 'dataset': dataset, 'format_type': format_type, 'title': resource['name'] or resource['url'], 'referenced_url': resource['url'], } try: resource = Resource.objects.get( ckan_id=ckan_id) except Resource.DoesNotExist: resource = Resource.default.create(save_opts={ 'current_user': editor, 'synchronize': True }, **kvp) else: for k, v in kvp.items(): setattr(resource, k, v) resource.save(current_user=editor, synchronize=True) except Exception as e: for id in ckan_ids: logger.warning( 'Delete CKAN package : {id}.'.format(id=str(id))) CkanHandler.purge_dataset(str(id)) for id in geonet_ids: logger.warning('Delete MD : {id}.'.format(id=str(id))) geonet.delete_record(id) logger.error(e) raise CriticalError() else: for id in ckan_ids: CkanHandler.publish_dataset(id=str(id), state='active')
def save(self, *args, **kwargs): Category = apps.get_model(app_label='idgo_admin', model_name='Category') Dataset = apps.get_model(app_label='idgo_admin', model_name='Dataset') License = apps.get_model(app_label='idgo_admin', model_name='License') Resource = apps.get_model(app_label='idgo_admin', model_name='Resource') ResourceFormats = apps.get_model(app_label='idgo_admin', model_name='ResourceFormats') # (1) Supprimer les jeux de données qui ne sont plus synchronisés previous = self.pk and RemoteCkan.objects.get(pk=self.pk) if previous: remote_organisation__in = [ x for x in (previous.sync_with or []) if x not in (self.sync_with or []) ] filter = { 'remote_instance': previous, 'remote_organisation__in': remote_organisation__in, } # TODO: 'Dataset.harvested_ckan.filter(**filter).delete()' ne fonctionne pas for dataset in Dataset.harvested_ckan.filter(**filter): dataset.delete() else: # Dans le cas d'une création, on vérifie si l'URL CKAN est valide try: with CkanBaseHandler(self.url): pass except CkanBaseError as e: raise ValidationError(e.__str__(), code='url') # (2) Sauver l'instance super().save(*args, **kwargs) # (3) Créer/Mettre à jour les jeux de données synchronisés # On récupère dans le `stack` l'utilisateur effectuant l'opération editor = User.objects.get(pk=DEFAULT_USER_ID) for entry in inspect.stack(): try: editor = entry[0].f_locals['request'].user._wrapped except (KeyError, AttributeError): continue break # Puis on moissonne le catalogue if self.sync_with: try: ckan_ids = [] with transaction.atomic(): # TODO: Factoriser for value in self.sync_with: with CkanBaseHandler(self.url) as ckan: ckan_organisation = ckan.get_organisation( value, include_datasets=True, include_groups=True, include_tags=True) if not ckan_organisation.get('package_count', 0): continue for package in ckan_organisation.get('packages'): if not package['state'] == 'active' \ or not package['type'] == 'dataset': continue with CkanBaseHandler(self.url) as ckan: package = ckan.get_package(package['id']) ckan_id = uuid.UUID(package['id']) update_frequency = dict( Dataset.FREQUENCY_CHOICES).get( package.get('frequency'), 'unknown') update_frequency = package.get('frequency') if not (update_frequency and update_frequency in dict(Dataset.FREQUENCY_CHOICES).keys()): update_frequency = 'unknown' metadata_created = package.get( 'metadata_created', None) if metadata_created: metadata_created = datetime.strptime( metadata_created, ISOFORMAT_DATETIME) metadata_modified = package.get( 'metadata_modified', None) if metadata_modified: metadata_modified = datetime.strptime( metadata_modified, ISOFORMAT_DATETIME) try: mapping_licence = MappingLicence.objects.get( remote_ckan=self, slug=package.get('license_id')) except MappingLicence.DoesNotExist: try: license = License.objects.get( slug='other-at') except MappingLicence.DoesNotExist: license = None else: logger.warning("'{}' non trouvé".format( package.get('license_id'))) license = mapping_licence.licence slug = 'sync{}-{}'.format( str(uuid.uuid4())[:7].lower(), package.get('name'))[:100] kvp = { 'slug': slug, 'title': package.get('title'), 'description': package.get('notes'), 'date_creation': metadata_created and metadata_created.date(), 'date_modification': metadata_modified and metadata_modified.date(), # date_publication 'editor': editor, 'license': license, 'owner_email': self.organisation.email or DEFAULT_CONTACT_EMAIL, 'owner_name': self.organisation.legal_name or DEFAULT_PLATFORM_NAME, 'organisation': self.organisation, 'published': not package.get('private'), 'remote_instance': self, 'remote_dataset': ckan_id, 'remote_organisation': value, 'update_frequency': update_frequency, # bbox # broadcaster_email # broadcaster_name # data_type # geocover # geonet_id # granularity # thumbnail # support } dataset, created = Dataset.harvested_ckan.update_or_create( **kvp) mapping_categories = MappingCategory.objects.filter( remote_ckan=self, slug__in=[ m['name'] for m in package.get('groups', []) ]) if mapping_categories: dataset.categories = set( mc.category for mc in mapping_categories) if not created: dataset.keywords.clear() keywords = [ tag['display_name'] for tag in package.get('tags') ] dataset.keywords.add(*keywords) dataset.save(current_user=None, synchronize=True, activate=False) ckan_ids.append(dataset.ckan_id) for resource in package.get('resources', []): try: ckan_id = uuid.UUID(resource['id']) except ValueError as e: logger.exception(e) logger.error( "I can't crash here, so I do not pay any attention to this error." ) continue try: ckan_format = resource['format'].upper() format_type = ResourceFormats.objects.get( ckan_format=ckan_format) except (ResourceFormats. MultipleObjectsReturned, ResourceFormats.DoesNotExist, TypeError) as e: logger.exception(e) logger.error( "I can't crash here, so I do not pay any attention to this error." ) format_type = None kvp = { 'ckan_id': ckan_id, 'dataset': dataset, 'format_type': format_type, 'title': resource['name'], 'referenced_url': resource['url'], } try: resource = Resource.objects.get( ckan_id=ckan_id) except Resource.DoesNotExist: resource = Resource.default.create( save_opts={ 'current_user': None, 'synchronize': True }, **kvp) else: for k, v in kvp.items(): setattr(resource, k, v) resource.save(current_user=None, synchronize=True) except Exception as e: for id in ckan_ids: CkanHandler.purge_dataset(str(id)) logger.error(e) raise CriticalError() else: for id in ckan_ids: CkanHandler.publish_dataset(id=str(id), state='active')
def ogr2postgis(ds, epsg=None, limit_to=1, update={}, filename=None, encoding='utf-8'): sql = [] tables = [] layers = ds.get_layers() if len(layers) > limit_to: raise ExceedsMaximumLayerNumberFixedError(count=len(layers), maximum=limit_to) layers.encoding = encoding # else: for layer in layers: layername = slugify(layer.name).replace('-', '_') if layername == 'ogrgeojson': p = Path(ds._datastore.name) layername = slugify(p.name[:-len(p.suffix)]).replace('-', '_') if epsg and is_valid_epsg(epsg): pass else: epsg = get_epsg(layer) SupportedCrs = apps.get_model(app_label='idgo_admin', model_name='SupportedCrs') try: SupportedCrs.objects.get(auth_name='EPSG', auth_code=epsg) except SupportedCrs.DoesNotExist: raise NotSupportedSrsError('SRS Not Supported') xmin = layer.extent.min_x ymin = layer.extent.min_y xmax = layer.extent.max_x ymax = layer.extent.max_y table_id = update.get(layername, '{0}_{1}'.format(layername, str(uuid4())[:7])) if table_id[0].isdigit(): table_id = '_{}'.format(table_id) tables.append({ 'id': table_id, 'epsg': epsg, 'bbox': bounds_to_wkt(xmin, ymin, xmax, ymax), 'extent': ((xmin, ymin), (xmax, ymax)) }) attrs = {} for i, k in enumerate(layer.fields): t = handle_ogr_field_type(layer.field_types[i].__qualname__, n=layer.field_widths[i], p=layer.field_precisions[i]) attrs[k] = t # Erreur dans Django # Lorsqu'un 'layer' est composé de 'feature' de géométrie différente, # `ft.geom.__class__.__qualname__ == feat.geom_type.name is False` # # > django/contrib/gis/gdal/feature.py # @property # def geom_type(self): # "Return the OGR Geometry Type for this Feture." # return OGRGeomType(capi.get_fd_geom_type(self._layer._ldefn)) # # La fonction est incorrecte puisqu'elle se base sur le 'layer' et non # sur le 'feature' # # Donc dans ce cas on définit le type de géométrie de la couche # comme générique (soit 'Geometry') # Mais ceci est moche : try: test = set(str(feature.geom.geom_type) for feature in layer) except Exception as e: logger.exception(e) raise WrongDataError() # else: if test == {'Polygon', 'MultiPolygon'}: geometry = 'MultiPolygon' elif test == {'Polygon25D', 'MultiPolygon25D'}: geometry = 'MultiPolygonZ' elif test == {'LineString', 'MultiLineString'}: geometry = 'MultiLineString' elif test == {'LineString25D', 'MultiLineString25D'}: geometry = 'MultiLineStringZ' elif test == {'Point', 'MultiPoint'}: geometry = 'MultiPoint' elif test == {'Point25D', 'MultiPoint25D'}: geometry = 'MultiPointZ' else: geometry = len(test) > 1 \ and 'Geometry' or handle_ogr_geom_type(layer.geom_type) sql.append( CREATE_TABLE.format( attrs=',\n '.join( ['"{}" {}'.format(k, v) for k, v in attrs.items()]), # description=layer.name, epsg=epsg, geometry=geometry, owner=OWNER, mra_datagis_user=MRA_DATAGIS_USER, schema=SCHEMA, table=str(table_id), the_geom=THE_GEOM, to_epsg=TO_EPSG)) for feature in layer: attrs = {} for field in feature.fields: k = field.decode() try: v = feature.get(k) except DjangoUnicodeDecodeError as e: logger.exception(e) raise DataDecodingError() if isinstance(v, type(None)): attrs[k] = 'null' elif isinstance( v, (datetime.date, datetime.time, datetime.datetime)): attrs[k] = "'{}'".format(v.isoformat()) elif isinstance(v, str): attrs[k] = "'{}'".format(v.replace("'", "''")) else: attrs[k] = "{}".format(v) if geometry.startswith('Multi'): geom = "ST_Multi(ST_GeomFromtext('{wkt}', {epsg}))" else: geom = "ST_GeomFromtext('{wkt}', {epsg})" sql.append( INSERT_INTO.format(attrs_name=', '.join( ['"{}"'.format(x) for x in attrs.keys()]), attrs_value=', '.join(attrs.values()), geom=geom.format(epsg=epsg, wkt=feature.geom), owner=OWNER, schema=SCHEMA, table=str(table_id), the_geom=THE_GEOM, to_epsg=TO_EPSG)) for table_id in update.values(): rename_table(table_id, '__{}'.format(table_id)) with connections[DATABASE].cursor() as cursor: for q in sql: try: cursor.execute(q) except Exception as e: logger.exception(e) # Revenir à l'état initial for table_id in [table['id'] for table in tables]: drop_table(table_id) for table_id in update.values(): rename_table('__{}'.format(table_id), table_id) # Puis retourner l'erreur raise SQLError(e.__str__()) for table_id in update.values(): drop_table('__{}'.format(table_id)) return tables