Beispiel #1
0
    def _check_config(self):
        for key in ('object_type', 'xml_object', 'mode', 'gemeentes',
                    'download_location'):
            if not self.read_config.get(key):
                raise GOBException(f"Missing {key} in read_config")

        if self.read_config['mode'] == ImportMode.MUTATIONS:
            if not self.read_config.get("last_full_download_location"):
                raise GOBException(
                    "Missing last_full_download_location in read_config")
Beispiel #2
0
    def result(self):
        if self.fatal:
            raise GOBException(
                f"Quality assurance failed for {self.entity_name}"
            )

        if self.duplicates:
            raise GOBException(f"Duplicate primary key(s) found in source: "
                               f"[{', '.join([str(dup) for dup in self.duplicates])}]")

        logger.info("Quality assurance passed")
Beispiel #3
0
    def __iter__(self):

        for left, right in zip_longest(self.base_api, self.merged_api):
            if left is None or right is None:
                raise GOBException("Length of results from API's don't match.")

            if self._item_key(left) != self._item_key(right):
                raise GOBException("Rows in API results don't match.")

            left.update({col: right.get(col) for col in self.attributes})
            yield left
Beispiel #4
0
def _get_materialized_view(catalog_name: str, collection_name: str, attribute_name: str):

    if not collection_name:
        raise GOBException("Need collection_name to update materialized view.")

    if catalog_name == "rel":
        return _get_materialized_view_by_relation_name(collection_name)

    if not attribute_name:
        raise GOBException("Missing attribute")
    try:
        return MaterializedViews().get(catalog_name, collection_name, attribute_name)
    except Exception as e:
        logger.error(str(e))
        raise GOBException(f"Could not get materialized view for {catalog_name} {collection_name}.")
Beispiel #5
0
def extract_dataset_from_msg(msg):
    """Returns location of dataset file from msg.

    Example message:

    message = {
       "header": {
          "catalogue": "some catalogue",
          "collection": "the collection",
          "application": "the application"
       }
    }

    Where 'application' is optional when there is only one known application for given catalogue and collection

    :param msg:
    :return:
    """

    required_keys = ['catalogue', 'collection']
    header = msg.get('header', {})

    if not all([key in header for key in required_keys]):
        raise GOBException(
            f"Missing dataset keys. Expected keys: {','.join(required_keys)}")

    return get_import_definition(header['catalogue'], header['collection'],
                                 header.get('application'))
Beispiel #6
0
    def _split_table_name(self, table_name: str):
        split = [part for part in table_name.split('_') if part]

        if len(split) < 2:
            raise GOBException("Invalid table name")

        return split
Beispiel #7
0
def _get_materialized_view_by_relation_name(relation_name: str):

    try:
        return MaterializedViews().get_by_relation_name(relation_name)
    except Exception as e:
        logger.error(str(e))
        raise GOBException(f"Could not get materialized view for relation {relation_name}.")
Beispiel #8
0
    def _determine_relation_evaluation_order(self):
        """Determines the order in which we should evaluate relations from the root of the entity.

        :return:
        """
        relations = list(self.relations_hierarchy.keys())
        root_relation = [
            k for k, v in self.relations_hierarchy.items() if v is None
        ][0]

        order = [root_relation]
        relations.remove(root_relation)

        while len(relations):
            extract = [
                k for k, v in self.relations_hierarchy.items()
                if v in order and k in relations
            ]

            if len(extract) == 0:
                raise GOBException(
                    "This should not be possible. Not sure what you want me to do now?"
                )

            order.extend(extract)
            relations = [
                relation for relation in relations if relation not in extract
            ]

        order.remove(root_relation)

        return order, root_relation
Beispiel #9
0
    def migrate_event_data(self, event, data, catalog_name, collection_name,
                           target_version):
        """
        Migrate data to the target version

        :param event:
        :param data:
        :param catalog_name:
        :param collection_name:
        :param target_version:
        :return:
        """
        while event.version != target_version:
            migration = self._get_migration(catalog_name, collection_name,
                                            event.version)

            if not migration:
                logger.error(
                    f"No migration found for {catalog_name}, {collection_name} {event.version}"
                )
                raise GOBException(
                    f"Not able to migrate event for {catalog_name}, {collection_name} to version {target_version}"
                )
            # Apply all conversions on the data
            self._apply_migration(event, data, migration)

        return data
    def apply_other_event(self, entity):
        """
        Apply an event on an entity

        The event can be an:
        - ADD event (reanimation of a DELETED entity)
        - DELETE or MODIFY event
        - CONFIRM event (these event only set the last modified date, not the last event id)

        :param entity:
        :return:
        """
        gob_events = self.other_events[entity._tid]

        for gob_event in gob_events:
            # Check action validity
            if entity._date_deleted is not None and not isinstance(
                    gob_event, GOB.ADD):
                # a non-ADD event is trying to be applied on a deleted entity
                # Only ADD event can be applied on a deleted entity
                raise GOBException(
                    f"Trying to '{gob_event.name}' a deleted entity")

            # apply the event on the entity
            gob_event.apply_to(entity)

            # and register the last event that has updated this entity
            # except for CONFIRM events. These events are deleted once they have been applied
            if not isinstance(gob_event, GOB.CONFIRM):
                entity._last_event = gob_event.id
Beispiel #11
0
    def on_start_tasks(self, msg):
        """Entry method for TaskQueue. Creates tasks and puts task messages on the

        :param msg:
        :return:
        """
        header = msg['header']
        stepid = header['stepid']
        jobid = header['jobid']
        process_id = header['process_id']

        # Incoming message may be large. Manually load message from file if necessary
        msg, _ = load_message(msg, json.loads, {'stream_contents': False})
        """
        tasks: [{'id': 'some_id', 'dependencies': ['some_id', 'some_other_id']}
        """
        tasks = msg['contents']['tasks']
        key_prefix = msg['contents']['key_prefix']
        extra_msg = msg['contents'].get('extra_msg', {})
        extra_header = msg['header'].get('extra', {})
        job, step = get_job_step(jobid, stepid)

        if not step:
            raise GOBException(f"No jobstep found with id {stepid}")

        self._validate_dependencies(tasks)
        self._create_tasks(jobid, stepid, process_id, tasks, key_prefix,
                           extra_msg, extra_header)
        self._queue_free_tasks_for_jobstep(stepid)
 def __exit__(self, exc_type, exc_val, exc_tb):
     # Write any buffered entities and flush storage
     if self.add_events or self.other_events:
         raise GOBException(
             "Have unapplied events. Call apply_all() before leaving context"
         )
     self.storage.force_flush_entities()
Beispiel #13
0
def connect_to_postgresql(config):
    try:
        user = f"({config['username']}@{config['database']})"
        connection = psycopg2.connect(
            database=config['database'],
            user=config['username'],
            password=config['password'],
            host=config['host'],
            port=config['port'],
        )
    except psycopg2.OperationalError as e:
        raise GOBException(f'Database connection for source {config["name"]} {user} failed. Error: {e}.')
    except KeyError as e:
        raise GOBException(f'Missing configuration for source {config["name"]}. Error: {e}')
    else:
        return connection, user
Beispiel #14
0
 def _check_configuration(self):
     for setting, check, message, type in self.config_checks:
         value = self._get_config_value(setting)
         if not check(value):
             msg = f"Checking Postgres config for {setting}. Value is {value}, but {message}"
             if type == self.ERROR:
                 raise GOBException(msg)
             else:
                 print(f"WARNING: {msg}")
Beispiel #15
0
 def connect(self):
     try:
         self.user = f"({self.connection_config['username']}@{self.connection_config['database']})"
         self.connection = psycopg2.connect(
             database=self.connection_config['database'],
             user=self.connection_config['username'],
             password=self.connection_config['password'],
             host=self.connection_config['host'],
             port=self.connection_config['port'],
             sslmode='require',
         )
     except psycopg2.OperationalError as e:
         raise GOBException(
             f'Database connection for source {self.connection_config["name"]} {self.user} failed. '
             f'Error: {e}.')
     except KeyError as e:
         raise GOBException(
             f'Missing configuration for source {self.connection_config["name"]}. Error: {e}'
         )
Beispiel #16
0
class PostgresDatastore(SqlDatastore):
    def __init__(self, connection_config: dict, read_config: dict = None):
        super(PostgresDatastore, self).__init__(connection_config, read_config)

        self.connection_config['drivername'] = POSTGRES_DRIVER
        self.connection = None

    def connect(self):
        try:
            self.user = f"({self.connection_config['username']}@{self.connection_config['database']})"
            self.connection = psycopg2.connect(
                database=self.connection_config['database'],
                user=self.connection_config['username'],
                password=self.connection_config['password'],
                host=self.connection_config['host'],
                port=self.connection_config['port'],
                sslmode='require',
            )
        except psycopg2.OperationalError as e:
            raise GOBException(
                f'Database connection for source {self.connection_config["name"]} {self.user} failed. '
                f'Error: {e}.')
        except KeyError as e:
            raise GOBException(
                f'Missing configuration for source {self.connection_config["name"]}. Error: {e}'
            )

    def disconnect(self):
        if hasattr(self, 'connection'):
            if self.connection:
                self.connection.close()
            del self.connection

    def query(self, query, **kwargs):
        """Query Postgres

        :param query:
        :return:
        """
        arraysize = kwargs.pop('arraysize', None)

        try:
            with self.connection.cursor(cursor_factory=DictCursor,
                                        **kwargs) as cur:
                if arraysize:
                    cur.arraysize = arraysize

                cur.execute(query)
                while results := cur.fetchmany():
                    yield from results

            self.connection.commit()
        except psycopg2.Error as e:
            raise GOBException(
                f'Error executing query: {query[:80]}. Error: {e}')
Beispiel #17
0
    def split_ref(self, ref) -> tuple:
        """Splits reference into tuple of (catalog_name, collection_name)

        :param ref:
        :return:
        """
        split_res = ref.split(':')

        if len(split_res) != 2 or not all([len(item) > 0 for item in split_res]):
            raise GOBException(f"Invalid reference {ref}")
        return split_res
Beispiel #18
0
    def create_event(cls, _tid, data, version):
        #   MODIFY has no data attributes only modifications
        if modifications_key not in data:
            raise GOBException("MODIFY event requires modifications")
        mods = {
            modifications_key: data[modifications_key],
            hash_key: data[hash_key],
            **(cls.last_event(data))
        }

        return super().create_event(_tid, mods, version)
Beispiel #19
0
def _get_event(name):
    """
    Get the event definition for a given event name

    :param name:
    :return: the event definition (class) for the given event name
    """
    try:
        return _gob_events_dict[name]
    except KeyError:
        raise GOBException(f"{name} is an invalid GOB event")
Beispiel #20
0
def _ensure_fieldnames_match_existing_file(fieldnames, file):
    """Raises GOBException if fieldnames don't match the header names present in file

    :param fieldnames:
    :param file:
    :return:
    """
    existing_headers = _get_headers_from_file(file)

    if existing_headers != fieldnames:
        raise GOBException(
            'Fields from existing file do not match fields to append')
Beispiel #21
0
def _apply_filters(raw_value, filters):
    value = raw_value
    for filter in filters:
        name = filter[0]
        args = filter[1:]
        if name == "re.sub":
            value = re.sub(args[0], args[1], value)
        elif name == "upper":
            value = value.upper()
        else:
            raise GOBException(f"Unknown function {name}")
    return value
Beispiel #22
0
def _check_message(msg: dict):
    required = [CATALOG_KEY, COLLECTION_KEY, ATTRIBUTE_KEY]

    header = msg.get('header', {})

    for key in required:
        if not header.get(key):
            raise GOBException(f"Missing {key} attribute in header")

    model = GOBModel()
    sources = GOBSources()

    if not model.get_catalog(header[CATALOG_KEY]):
        raise GOBException(f"Invalid catalog name {header[CATALOG_KEY]}")

    if not model.get_collection(header[CATALOG_KEY], header[COLLECTION_KEY]):
        raise GOBException(f"Invalid catalog/collection combination: {header[CATALOG_KEY]}/{header[COLLECTION_KEY]}")

    if not sources.get_field_relations(header[CATALOG_KEY], header[COLLECTION_KEY], header[ATTRIBUTE_KEY]):
        raise GOBException(f"Missing relation specification for {header[CATALOG_KEY]} {header[COLLECTION_KEY]} "
                           f"{header[ATTRIBUTE_KEY]}")
Beispiel #23
0
def _split_object_reference(field: str):
    """
    Splits the object reference in the source column and attribute name

    :param field:
    :return:
    """
    try:
        source, attr = field.split(".")
        return source, attr
    except ValueError:
        raise GOBException(
            "Object reference should contain exactly one dot (.)")
Beispiel #24
0
def connect_to_objectstore(config):
    """Connect to the objectstore

    The Amsterdam/objectstore library is used to connect to the objectstore

    :return: a connection to the given objectstore
    """
    # Get the objectstore config based on the source application name

    try:
        user = f"({config['USER']}@{config['TENANT_NAME']})"
        connection = get_connection(config)

    except KeyError as e:
        raise GOBException(
            f'Missing configuration for source {config["name"]}. Error: {e}')
    except Exception as e:
        raise GOBException(
            f"Objectstore connection for source {config['name']} {user} failed. Error: {e}."
        )
    else:
        return connection, user
Beispiel #25
0
def execute_postgresql_query(connection, query: str) -> None:
    """Executes Postgres query

    :param connection:
    :param query:
    :return:
    """
    try:
        with connection.cursor() as cursor:
            cursor.execute(query)
            connection.commit()
    except Error as e:
        raise GOBException(f'Error executing query: {query[:80]}. Error: {e}')
Beispiel #26
0
    def execute(self, query: str) -> None:
        """Executes Postgres query

        :param query:
        :return:
        """
        try:
            with self.connection.cursor() as cur:
                cur.execute(query)
            self.connection.commit()
        except psycopg2.Error as e:
            raise GOBException(
                f'Error executing query: {query[:80]}. Error: {e}')
Beispiel #27
0
def get_gob_type_from_sql_type(sql_type):
    """
    Get the type definition for a given sqlalchemy type

    Example:
        get_gob_type_from_sqlalchemy_type(<class 'sqlalchemy.sql.sqltypes.Integer'>) => GOBType:String

    :param name:
    :return: the type definition (class) for the given type name
    """
    for type_map in _gob_postgres_sql_types_list:
        if sql_type == type_map['sql_type']:
            return type_map['gob_type']
    raise GOBException(f"No GOBType found for SQLType: {sql_type}")
Beispiel #28
0
    def result(self):
        """
        Checks for fatal errors

        Any non-True result for any of the validators raises an exception

        :return:
        """
        results = [validator.result() for validator in self.validators]
        # Raise an Exception is a fatal validation has failed
        if False in results:
            raise GOBException(
                f"Quality assurance failed for {self.catalog_name}.{self.entity_name}"
            )
        return True
Beispiel #29
0
def _split_relation_table_name(table_name: str):
    split = table_name.split('_')

    if len(split) < 6:
        raise GOBException("Invalid table name")

    # Example: rel_brk_tng_brk_sjt_van_kadastraalsubject
    #          0   1   2   3   4   5 ......

    return {
        'src_cat_abbr': split[1],
        'src_col_abbr': split[2],
        'dst_cat_abbr': split[3],
        'dst_col_abbr': split[4],
        'reference_name': "_".join(split[5:]),
    }
Beispiel #30
0
    def connect(self):
        """Connect to the datasource

        The cx_Oracle library is used to connect to the data source for databases

        :return: a connection to the given database
        """
        # Set the NLS_LANG variable to UTF-8 to get the correct encoding
        os.environ["NLS_LANG"] = ".UTF8"
        try:
            items = ('database', 'username', 'password', 'port', 'host')
            database, username, password, port, host = [str(self.connection_config[k]) for k in items]
            self.user = f"({username}@{database})"
            dsn = self._get_dsn(host, port, database)
            self.connection = cx_Oracle.Connection(user=username, password=password, dsn=dsn)
        except KeyError as e:
            raise GOBException(f'Missing configuration for source {self.connection_config["name"]}. Error: {e}')