def _check_config(self): for key in ('object_type', 'xml_object', 'mode', 'gemeentes', 'download_location'): if not self.read_config.get(key): raise GOBException(f"Missing {key} in read_config") if self.read_config['mode'] == ImportMode.MUTATIONS: if not self.read_config.get("last_full_download_location"): raise GOBException( "Missing last_full_download_location in read_config")
def result(self): if self.fatal: raise GOBException( f"Quality assurance failed for {self.entity_name}" ) if self.duplicates: raise GOBException(f"Duplicate primary key(s) found in source: " f"[{', '.join([str(dup) for dup in self.duplicates])}]") logger.info("Quality assurance passed")
def __iter__(self): for left, right in zip_longest(self.base_api, self.merged_api): if left is None or right is None: raise GOBException("Length of results from API's don't match.") if self._item_key(left) != self._item_key(right): raise GOBException("Rows in API results don't match.") left.update({col: right.get(col) for col in self.attributes}) yield left
def _get_materialized_view(catalog_name: str, collection_name: str, attribute_name: str): if not collection_name: raise GOBException("Need collection_name to update materialized view.") if catalog_name == "rel": return _get_materialized_view_by_relation_name(collection_name) if not attribute_name: raise GOBException("Missing attribute") try: return MaterializedViews().get(catalog_name, collection_name, attribute_name) except Exception as e: logger.error(str(e)) raise GOBException(f"Could not get materialized view for {catalog_name} {collection_name}.")
def extract_dataset_from_msg(msg): """Returns location of dataset file from msg. Example message: message = { "header": { "catalogue": "some catalogue", "collection": "the collection", "application": "the application" } } Where 'application' is optional when there is only one known application for given catalogue and collection :param msg: :return: """ required_keys = ['catalogue', 'collection'] header = msg.get('header', {}) if not all([key in header for key in required_keys]): raise GOBException( f"Missing dataset keys. Expected keys: {','.join(required_keys)}") return get_import_definition(header['catalogue'], header['collection'], header.get('application'))
def _split_table_name(self, table_name: str): split = [part for part in table_name.split('_') if part] if len(split) < 2: raise GOBException("Invalid table name") return split
def _get_materialized_view_by_relation_name(relation_name: str): try: return MaterializedViews().get_by_relation_name(relation_name) except Exception as e: logger.error(str(e)) raise GOBException(f"Could not get materialized view for relation {relation_name}.")
def _determine_relation_evaluation_order(self): """Determines the order in which we should evaluate relations from the root of the entity. :return: """ relations = list(self.relations_hierarchy.keys()) root_relation = [ k for k, v in self.relations_hierarchy.items() if v is None ][0] order = [root_relation] relations.remove(root_relation) while len(relations): extract = [ k for k, v in self.relations_hierarchy.items() if v in order and k in relations ] if len(extract) == 0: raise GOBException( "This should not be possible. Not sure what you want me to do now?" ) order.extend(extract) relations = [ relation for relation in relations if relation not in extract ] order.remove(root_relation) return order, root_relation
def migrate_event_data(self, event, data, catalog_name, collection_name, target_version): """ Migrate data to the target version :param event: :param data: :param catalog_name: :param collection_name: :param target_version: :return: """ while event.version != target_version: migration = self._get_migration(catalog_name, collection_name, event.version) if not migration: logger.error( f"No migration found for {catalog_name}, {collection_name} {event.version}" ) raise GOBException( f"Not able to migrate event for {catalog_name}, {collection_name} to version {target_version}" ) # Apply all conversions on the data self._apply_migration(event, data, migration) return data
def apply_other_event(self, entity): """ Apply an event on an entity The event can be an: - ADD event (reanimation of a DELETED entity) - DELETE or MODIFY event - CONFIRM event (these event only set the last modified date, not the last event id) :param entity: :return: """ gob_events = self.other_events[entity._tid] for gob_event in gob_events: # Check action validity if entity._date_deleted is not None and not isinstance( gob_event, GOB.ADD): # a non-ADD event is trying to be applied on a deleted entity # Only ADD event can be applied on a deleted entity raise GOBException( f"Trying to '{gob_event.name}' a deleted entity") # apply the event on the entity gob_event.apply_to(entity) # and register the last event that has updated this entity # except for CONFIRM events. These events are deleted once they have been applied if not isinstance(gob_event, GOB.CONFIRM): entity._last_event = gob_event.id
def on_start_tasks(self, msg): """Entry method for TaskQueue. Creates tasks and puts task messages on the :param msg: :return: """ header = msg['header'] stepid = header['stepid'] jobid = header['jobid'] process_id = header['process_id'] # Incoming message may be large. Manually load message from file if necessary msg, _ = load_message(msg, json.loads, {'stream_contents': False}) """ tasks: [{'id': 'some_id', 'dependencies': ['some_id', 'some_other_id']} """ tasks = msg['contents']['tasks'] key_prefix = msg['contents']['key_prefix'] extra_msg = msg['contents'].get('extra_msg', {}) extra_header = msg['header'].get('extra', {}) job, step = get_job_step(jobid, stepid) if not step: raise GOBException(f"No jobstep found with id {stepid}") self._validate_dependencies(tasks) self._create_tasks(jobid, stepid, process_id, tasks, key_prefix, extra_msg, extra_header) self._queue_free_tasks_for_jobstep(stepid)
def __exit__(self, exc_type, exc_val, exc_tb): # Write any buffered entities and flush storage if self.add_events or self.other_events: raise GOBException( "Have unapplied events. Call apply_all() before leaving context" ) self.storage.force_flush_entities()
def connect_to_postgresql(config): try: user = f"({config['username']}@{config['database']})" connection = psycopg2.connect( database=config['database'], user=config['username'], password=config['password'], host=config['host'], port=config['port'], ) except psycopg2.OperationalError as e: raise GOBException(f'Database connection for source {config["name"]} {user} failed. Error: {e}.') except KeyError as e: raise GOBException(f'Missing configuration for source {config["name"]}. Error: {e}') else: return connection, user
def _check_configuration(self): for setting, check, message, type in self.config_checks: value = self._get_config_value(setting) if not check(value): msg = f"Checking Postgres config for {setting}. Value is {value}, but {message}" if type == self.ERROR: raise GOBException(msg) else: print(f"WARNING: {msg}")
def connect(self): try: self.user = f"({self.connection_config['username']}@{self.connection_config['database']})" self.connection = psycopg2.connect( database=self.connection_config['database'], user=self.connection_config['username'], password=self.connection_config['password'], host=self.connection_config['host'], port=self.connection_config['port'], sslmode='require', ) except psycopg2.OperationalError as e: raise GOBException( f'Database connection for source {self.connection_config["name"]} {self.user} failed. ' f'Error: {e}.') except KeyError as e: raise GOBException( f'Missing configuration for source {self.connection_config["name"]}. Error: {e}' )
class PostgresDatastore(SqlDatastore): def __init__(self, connection_config: dict, read_config: dict = None): super(PostgresDatastore, self).__init__(connection_config, read_config) self.connection_config['drivername'] = POSTGRES_DRIVER self.connection = None def connect(self): try: self.user = f"({self.connection_config['username']}@{self.connection_config['database']})" self.connection = psycopg2.connect( database=self.connection_config['database'], user=self.connection_config['username'], password=self.connection_config['password'], host=self.connection_config['host'], port=self.connection_config['port'], sslmode='require', ) except psycopg2.OperationalError as e: raise GOBException( f'Database connection for source {self.connection_config["name"]} {self.user} failed. ' f'Error: {e}.') except KeyError as e: raise GOBException( f'Missing configuration for source {self.connection_config["name"]}. Error: {e}' ) def disconnect(self): if hasattr(self, 'connection'): if self.connection: self.connection.close() del self.connection def query(self, query, **kwargs): """Query Postgres :param query: :return: """ arraysize = kwargs.pop('arraysize', None) try: with self.connection.cursor(cursor_factory=DictCursor, **kwargs) as cur: if arraysize: cur.arraysize = arraysize cur.execute(query) while results := cur.fetchmany(): yield from results self.connection.commit() except psycopg2.Error as e: raise GOBException( f'Error executing query: {query[:80]}. Error: {e}')
def split_ref(self, ref) -> tuple: """Splits reference into tuple of (catalog_name, collection_name) :param ref: :return: """ split_res = ref.split(':') if len(split_res) != 2 or not all([len(item) > 0 for item in split_res]): raise GOBException(f"Invalid reference {ref}") return split_res
def create_event(cls, _tid, data, version): # MODIFY has no data attributes only modifications if modifications_key not in data: raise GOBException("MODIFY event requires modifications") mods = { modifications_key: data[modifications_key], hash_key: data[hash_key], **(cls.last_event(data)) } return super().create_event(_tid, mods, version)
def _get_event(name): """ Get the event definition for a given event name :param name: :return: the event definition (class) for the given event name """ try: return _gob_events_dict[name] except KeyError: raise GOBException(f"{name} is an invalid GOB event")
def _ensure_fieldnames_match_existing_file(fieldnames, file): """Raises GOBException if fieldnames don't match the header names present in file :param fieldnames: :param file: :return: """ existing_headers = _get_headers_from_file(file) if existing_headers != fieldnames: raise GOBException( 'Fields from existing file do not match fields to append')
def _apply_filters(raw_value, filters): value = raw_value for filter in filters: name = filter[0] args = filter[1:] if name == "re.sub": value = re.sub(args[0], args[1], value) elif name == "upper": value = value.upper() else: raise GOBException(f"Unknown function {name}") return value
def _check_message(msg: dict): required = [CATALOG_KEY, COLLECTION_KEY, ATTRIBUTE_KEY] header = msg.get('header', {}) for key in required: if not header.get(key): raise GOBException(f"Missing {key} attribute in header") model = GOBModel() sources = GOBSources() if not model.get_catalog(header[CATALOG_KEY]): raise GOBException(f"Invalid catalog name {header[CATALOG_KEY]}") if not model.get_collection(header[CATALOG_KEY], header[COLLECTION_KEY]): raise GOBException(f"Invalid catalog/collection combination: {header[CATALOG_KEY]}/{header[COLLECTION_KEY]}") if not sources.get_field_relations(header[CATALOG_KEY], header[COLLECTION_KEY], header[ATTRIBUTE_KEY]): raise GOBException(f"Missing relation specification for {header[CATALOG_KEY]} {header[COLLECTION_KEY]} " f"{header[ATTRIBUTE_KEY]}")
def _split_object_reference(field: str): """ Splits the object reference in the source column and attribute name :param field: :return: """ try: source, attr = field.split(".") return source, attr except ValueError: raise GOBException( "Object reference should contain exactly one dot (.)")
def connect_to_objectstore(config): """Connect to the objectstore The Amsterdam/objectstore library is used to connect to the objectstore :return: a connection to the given objectstore """ # Get the objectstore config based on the source application name try: user = f"({config['USER']}@{config['TENANT_NAME']})" connection = get_connection(config) except KeyError as e: raise GOBException( f'Missing configuration for source {config["name"]}. Error: {e}') except Exception as e: raise GOBException( f"Objectstore connection for source {config['name']} {user} failed. Error: {e}." ) else: return connection, user
def execute_postgresql_query(connection, query: str) -> None: """Executes Postgres query :param connection: :param query: :return: """ try: with connection.cursor() as cursor: cursor.execute(query) connection.commit() except Error as e: raise GOBException(f'Error executing query: {query[:80]}. Error: {e}')
def execute(self, query: str) -> None: """Executes Postgres query :param query: :return: """ try: with self.connection.cursor() as cur: cur.execute(query) self.connection.commit() except psycopg2.Error as e: raise GOBException( f'Error executing query: {query[:80]}. Error: {e}')
def get_gob_type_from_sql_type(sql_type): """ Get the type definition for a given sqlalchemy type Example: get_gob_type_from_sqlalchemy_type(<class 'sqlalchemy.sql.sqltypes.Integer'>) => GOBType:String :param name: :return: the type definition (class) for the given type name """ for type_map in _gob_postgres_sql_types_list: if sql_type == type_map['sql_type']: return type_map['gob_type'] raise GOBException(f"No GOBType found for SQLType: {sql_type}")
def result(self): """ Checks for fatal errors Any non-True result for any of the validators raises an exception :return: """ results = [validator.result() for validator in self.validators] # Raise an Exception is a fatal validation has failed if False in results: raise GOBException( f"Quality assurance failed for {self.catalog_name}.{self.entity_name}" ) return True
def _split_relation_table_name(table_name: str): split = table_name.split('_') if len(split) < 6: raise GOBException("Invalid table name") # Example: rel_brk_tng_brk_sjt_van_kadastraalsubject # 0 1 2 3 4 5 ...... return { 'src_cat_abbr': split[1], 'src_col_abbr': split[2], 'dst_cat_abbr': split[3], 'dst_col_abbr': split[4], 'reference_name': "_".join(split[5:]), }
def connect(self): """Connect to the datasource The cx_Oracle library is used to connect to the data source for databases :return: a connection to the given database """ # Set the NLS_LANG variable to UTF-8 to get the correct encoding os.environ["NLS_LANG"] = ".UTF8" try: items = ('database', 'username', 'password', 'port', 'host') database, username, password, port, host = [str(self.connection_config[k]) for k in items] self.user = f"({username}@{database})" dsn = self._get_dsn(host, port, database) self.connection = cx_Oracle.Connection(user=username, password=password, dsn=dsn) except KeyError as e: raise GOBException(f'Missing configuration for source {self.connection_config["name"]}. Error: {e}')