コード例 #1
0
    def _join_inverse_relation(self, relation_name: str, attributes: list, arguments: dict):
        parent = self.relation_parents[relation_name]
        parent_info = self._get_relation_info(parent)

        relation_name_snake = to_snake(relation_name).split('_')

        assert relation_name_snake[0] == 'inv'

        relation_attr_name = '_'.join(relation_name_snake[1:-2])
        dst_catalog_name = relation_name_snake[-2]
        dst_collection_name = relation_name_snake[-1]
        dst_model_name = self.model.get_table_name(dst_catalog_name, dst_collection_name)
        dst_info = self._collect_relation_info(relation_name, f'{dst_model_name}')

        json_attrs = self._json_build_attrs(attributes, dst_info['alias'])
        json_attrs = f"{json_attrs}, '_catalog', '{dst_catalog_name}', '_collection', '{dst_collection_name}'"
        alias = f"_inv_{relation_attr_name}_{dst_info['catalog_name']}_{dst_info['collection_name']}"

        relation_name = get_relation_name(
            self.model,
            dst_info['catalog_name'],
            dst_info['collection_name'],
            relation_attr_name
        )

        self._add_relation_joins(parent_info, dst_info, relation_name, arguments, is_inverse=True)
        self.select_expressions.append(f"json_build_object({json_attrs}) {alias}")
コード例 #2
0
def query_reference_entities(catalog, collection, reference_name, src_id):
    assert _Base
    _session = get_session()

    gob_model = GOBModel()

    rel_catalog_name = 'rel'
    rel_collection_name = get_relation_name(gob_model, catalog, collection,
                                            reference_name)

    rel_table, rel_model = get_table_and_model(rel_catalog_name,
                                               rel_collection_name)

    dst_catalog_name, dst_collection_name = gob_model.get_collection(
        catalog, collection)['references'][reference_name]['ref'].split(':')

    # Destination table and model
    dst_table, dst_model = get_table_and_model(dst_catalog_name,
                                               dst_collection_name)

    query = _session.query(dst_table) \
                    .join(rel_table, dst_table._id == rel_table.dst_id) \
                    .filter(rel_table.src_id == src_id)

    # Exclude all records with date_deleted
    all_entities = filter_deleted(query, dst_table)

    # The default result is where expiration date is in the future or empty
    all_entities = filter_active(all_entities, dst_table)

    entity_convert = _get_convert_for_model(dst_catalog_name,
                                            dst_collection_name, dst_model)

    return all_entities, entity_convert
コード例 #3
0
    def _join_relation(self, relation_name: str, attributes: list, arguments: dict):
        parent = self.relation_parents[relation_name]
        parent_info = self._get_relation_info(parent)
        relation_attr_name = to_snake(self.relation_aliases[relation_name])

        dst_catalog_name, dst_collection_name = self.model.get_catalog_collection_names_from_ref(
            parent_info['collection']['attributes'][relation_attr_name]['ref']
        )

        dst_info = self._collect_relation_info(relation_name, f'{dst_catalog_name}_{dst_collection_name}')

        alias = f"_{to_snake(relation_name)}"
        json_attrs = self._json_build_attrs(attributes, dst_info['alias'])
        json_attrs = f"{json_attrs}, '_catalog', '{dst_catalog_name}', '_collection', '{dst_collection_name}'"

        relation_name = get_relation_name(
            self.model,
            parent_info['catalog_name'],
            parent_info['collection_name'],
            relation_attr_name
        )

        self._add_relation_joins(parent_info, dst_info, relation_name, arguments,
                                 self._is_srcvalue_requested(attributes), relation_attr_name,
                                 self._is_many(parent_info['collection']['attributes'][relation_attr_name]['type']))
        self.select_expressions.append(f"json_build_object({json_attrs}) {alias}")
コード例 #4
0
def clear_test_dbs():
    """
    Clear the GOB test databases

    :return:
    """
    model = GOBModel()

    # Test data is contained in the test_catalog and relation catalog
    test_catalog = "test_catalogue"
    rel_catalog = "rel"

    # Collect names of all test tables and entities
    tables = []
    test_entities = []
    rel_entities = []

    for collection_name in model.get_collections(test_catalog):
        collection = model.get_collection(test_catalog, collection_name)
        tables.append(model.get_table_name(test_catalog, collection_name))
        test_entities.append(collection_name)

        refs = {
            **collection['references'],
            **collection['very_many_references']
        }
        for ref in refs:
            ref_name = get_relation_name(model, test_catalog, collection_name, ref)
            tables.append(model.get_table_name(rel_catalog, ref_name))
            rel_entities.append(ref_name)

    # Nicely format the SQL statement
    indent = ",\n" + ' ' * 17
    table_length = max([len(table) for table in tables])

    # Provide for SQL statements
    truncate_tables = ";\n".join([f"TRUNCATE TABLE {table:{table_length}} CASCADE" for table in tables])
    test_entity_list = indent.join([f"'{e}'" for e in test_entities])
    rel_entity_list = indent.join([f"'{e}'" for e in rel_entities])

    # Construct SQL statement
    statement = f"""
-- Truncate test tables
{truncate_tables};

-- Delete test entity events
DELETE
FROM events
WHERE catalogue = '{test_catalog}'
  AND entity IN ({test_entity_list});

-- Delete test relation events
DELETE FROM events
WHERE catalogue = '{rel_catalog}'
  AND entity IN ({rel_entity_list});

-- Commit all changes
COMMIT;
"""
    exec_statement(statement)
コード例 #5
0
def _add_relations(query, catalog_name, collection_name):
    gob_model = GOBModel()
    collection = gob_model.get_collection(catalog_name, collection_name)
    has_states = collection.get('has_states', False)

    src_table, _ = get_table_and_model(catalog_name, collection_name)

    for reference in collection['references']:
        relation_name = get_relation_name(gob_model, catalog_name,
                                          collection_name, reference)

        if not relation_name:
            continue

        rel_table, _ = get_table_and_model('rel', relation_name)

        select_attrs = [
            getattr(rel_table, 'src_id'),
            getattr(rel_table, 'src_volgnummer'),
        ] if has_states else [
            getattr(rel_table, 'src_id'),
        ]

        subselect = session \
            .query(
                *select_attrs,
                func.json_agg(
                    func.json_build_object(
                        FIELD.SOURCE_VALUE, getattr(rel_table, FIELD.SOURCE_VALUE),
                        FIELD.REFERENCE_ID, getattr(rel_table, 'dst_id')
                    )
                ).label('source_values')
            ).filter(
                and_(
                    getattr(rel_table, FIELD.DATE_DELETED).is_(None),
                    or_(
                        getattr(rel_table, FIELD.EXPIRATION_DATE).is_(None),
                        getattr(rel_table, FIELD.EXPIRATION_DATE) > func.now()
                    )
                )
            ).group_by(
                *select_attrs
            ).subquery()

        join_clause = [
            getattr(src_table, FIELD.ID) == getattr(subselect.c, 'src_id'),
            getattr(src_table, FIELD.SEQNR) == getattr(subselect.c,
                                                       'src_volgnummer')
        ] if has_states else [
            getattr(src_table, FIELD.ID) == getattr(subselect.c, 'src_id'),
        ]

        query = query.join(subselect, and_(*join_clause), isouter=True) \
            .add_columns(
            getattr(subselect.c, 'source_values').label(f"ref:{reference}")
        )

    return query
コード例 #6
0
def prepare_relate(msg):
    """
    The starting point for the relate process. A relate job will be split into individual relate jobs on
    attribute level. If there's only a catalog in the message, all collections of that catalog will be related.
    When a job which has been split is received the relation name will be added and the job will be forwarded
    to the next step of the relate process where the relations are being made.

    :param msg: a message from the broker containing the catalog and collections (optional)
    :return: the result message of the relate preparation step
    """
    header = msg.get('header', {})
    catalog_name = header.get('catalogue')
    collection_name = header.get('collection')
    attribute_name = header.get('attribute')

    application = "GOBRelate"
    msg["header"] = {
        **msg.get("header", {}),
        "version": "0.1",
        "source": "GOB",
        "application": application,
        "entity": collection_name
    }

    timestamp = datetime.datetime.utcnow().isoformat()

    msg["header"].update({
        "timestamp": timestamp,
    })

    logger.configure(msg, "RELATE")

    if not catalog_name or not collection_name or not attribute_name:
        # A job will be splitted when catalog, collection or attribute are not provided
        logger.info("Splitting relate job")

        _split_job(msg)
        msg['header']['is_split'] = True

        return publish_result(msg, [])
    else:
        # If the job has all attributes, add the relation name and forward to the next step in the relate process
        logger.info(f"** Relate {catalog_name} {collection_name} {attribute_name}")

        relation_name = get_relation_name(GOBModel(), catalog_name, collection_name, attribute_name)

        msg["header"].update({
            "catalogue": "rel",
            "collection": relation_name,
            "entity": relation_name,
            "original_catalogue": catalog_name,
            "original_collection": collection_name,
            "original_attribute": attribute_name,
        })

        return msg
コード例 #7
0
    def get(self, catalog_name, collection_name, attribute):
        """Returns definition of materialized view for given relation.

        :param collection_name:
        :param catalog_name:
        :param attribute:
        :return:
        """
        relation_name = model_relations.get_relation_name(self.model, catalog_name, collection_name, attribute)
        return self.get_by_relation_name(relation_name)
コード例 #8
0
ファイル: filters.py プロジェクト: jjmurre/GOB-API
    def _get_relation_model(self):
        relation_owner = (self.src_object if self.src_side == 'src' else self.dst_model)

        # Get the source catalogue and collection from the source object
        owner_table_name = getattr(relation_owner, '__tablename__')
        owner_catalog_name, owner_collection_name = _get_catalog_collection_name_from_table_name(owner_table_name)

        relation_name = get_relation_name(gobmodel, owner_catalog_name, owner_collection_name, self.attribute_name)
        relation_table_name = f"rel_{relation_name}"

        return models[relation_table_name]
コード例 #9
0
def process_relate(msg: dict):
    """
    This function starts the actual relate process. The message is checked for completeness and the Relater
    builds the new or updated relations and returns the result the be compared as if it was the result
    of an import job.

    :param msg: a message from the broker containing the catalog and collections (optional)
    :return: the result message of the relate process
    """
    logger.configure(msg, "RELATE SRC")

    _check_message(msg)
    header = msg.get('header')

    logger.info("Relate table started")

    full_update = header.get('mode', "update") == "full"

    if full_update:
        logger.info("Full relate requested")

    updater = Relater(header[CATALOG_KEY], header[COLLECTION_KEY], header[ATTRIBUTE_KEY])

    filename, confirms = updater.update(full_update)

    logger.info("Relate table completed")

    relation_name = get_relation_name(GOBModel(), header[CATALOG_KEY], header[COLLECTION_KEY], header[ATTRIBUTE_KEY])

    result_msg = {
        "header": {
            **msg["header"],
            "catalogue": "rel",
            "collection": relation_name,
            "entity": relation_name,
            "source": "GOB",
            "application": "GOB",
            "version": RELATE_VERSION,
            "timestamp": msg.get("timestamp", datetime.datetime.utcnow().isoformat()),
        },
        "summary": logger.get_summary(),
        "contents_ref": filename,
        "confirms": confirms,
    }

    return result_msg
コード例 #10
0
ファイル: to_db.py プロジェクト: Amsterdam/GOB-API
def _dump_relations(catalog_name, collection_name, config):
    """Dumps relations for catalog_name, collection_name """
    config['schema'] = catalog_name
    _, model = get_table_and_model(catalog_name, collection_name)

    for relation in [k for k in model['references'].keys()]:
        relation_name = get_relation_name(GOBModel(), catalog_name, collection_name, relation)

        if not relation_name or relation_name in SKIP_RELATIONS:
            # relation_name is None when relation does not exist (yet)
            yield f"Skipping {catalog_name} {collection_name} {relation}\n"
            continue

        yield f"Export {catalog_name} {collection_name} {relation}\n"

        rel_dumper = DbDumper('rel', relation_name, config)
        yield from rel_dumper.dump_to_db(full_dump=config.get('force_full', False))
        rel_dumper.disconnect()
コード例 #11
0
ファイル: test_relations.py プロジェクト: Amsterdam/GOB-Core
    def test_relation_name(self, mock_name_compressor):
        model = mock.MagicMock()
        src = {
            "catalog": {
                'abbreviation': 'cat'
            },
            "catalog_name": "catalog",
            "collection": {
                'abbreviation': 'col',
                'attributes': {
                    'reference': {
                        'ref': 'src:dst'
                    }
                }
            },
            "collection_name": "collection"
        }
        dst = {
            "catalog": {
                'abbreviation': 'dst_cat'
            },
            "catalog_name": "catalog",
            "collection": {
                'abbreviation': 'dst_col'
            },
            "collection_name": "collection"
        }

        # Assert that NameCompressor is used
        mock_name_compressor.compress_name.side_effect = lambda s: s
        name = _get_relation_name(src, dst, "reference")
        expect = 'cat_col_dst_cat_dst_col_reference'
        self.assertEqual(name, expect)
        mock_name_compressor.compress_name.assert_called_with(expect)

        model.get_catalog.return_value = src['catalog']
        model.get_collection.return_value = src['collection']
        name = get_relation_name(model, "catalog", "collection", "reference")
        expect = 'cat_col_cat_col_reference'
        self.assertEqual(name, expect)
コード例 #12
0
    def test_relation_name(self):
        model = mock.MagicMock()
        src = {
            "catalog": {
                'abbreviation': 'cat'
            },
            "catalog_name": "catalog",
            "collection": {
                'abbreviation': 'col',
                'attributes': {
                    'reference': {
                        'ref': 'src:dst'
                    }
                }
            },
            "collection_name": "collection"
        }
        dst = {
            "catalog": {
                'abbreviation': 'dst_cat'
            },
            "catalog_name": "catalog",
            "collection": {
                'abbreviation': 'dst_col'
            },
            "collection_name": "collection"
        }

        name = _get_relation_name(src, dst, "reference")
        expect = 'cat_col_dst_cat_dst_col_reference'
        self.assertEqual(name, expect)

        model.get_catalog.return_value = src['catalog']
        model.get_collection.return_value = src['collection']
        name = get_relation_name(model, "catalog", "collection", "reference")
        expect = 'cat_col_cat_col_reference'
        self.assertEqual(name, expect)
コード例 #13
0
def check_very_many_relations(src_catalog_name, src_collection_name,
                              src_field_name):
    """
    Check very many relations for any dangling relations

    Dangling can be because a relation exist without any bronwaarde
    or the bronwaarde cannot be matched with any referenced entity.
    This can be checked in the relation table instead of the json
    attribute itself.

    :param src_catalog_name:
    :param src_collection_name:
    :param src_field_name:
    :return: None
    """
    # Get the source catalog, collection and field for the given names
    model = GOBModel()
    src_table_name = model.get_table_name(src_catalog_name,
                                          src_collection_name)
    src_has_states = model.has_states(src_catalog_name, src_collection_name)

    relation_table_name = "rel_" + get_relation_name(
        model, src_catalog_name, src_collection_name, src_field_name)

    select = ["src._id as id", "rel.bronwaarde as bronwaarde"]
    group_by = ["src._id", "rel.bronwaarde"]
    if src_has_states:
        state_select = [
            "src.volgnummer", "src.begin_geldigheid", "src.eind_geldigheid"
        ]
        select.extend(state_select)
    select = ",\n    ".join(select)
    group_by = ",\n    ".join(group_by)

    join_on = ['src._id = rel.src_id']
    if src_has_states:
        join_on.extend(['src._volgnummer = rel.src_volgnummer'])
    join_on = ",\n    ".join(join_on)

    name = f"{src_collection_name} {src_field_name}"

    bronwaarden = f"""
SELECT
    {select}
FROM
    {src_table_name} src
LEFT OUTER JOIN {relation_table_name} rel
ON
    {join_on}
WHERE
    src._date_deleted IS NULL AND
    rel.bronwaarde IS NULL
GROUP BY
    {group_by}
"""
    _query_missing(bronwaarden, QA_CHECK.Sourcevalue_exists, name)

    dangling = f"""
SELECT
    {select}
FROM
    {src_table_name} src
LEFT OUTER JOIN {relation_table_name} rel
ON
    {join_on}
WHERE
    src._date_deleted IS NULL AND
    rel.bronwaarde IS NOT NULL AND
    rel.dst_id IS NULL
GROUP BY
    {group_by}
"""
    _query_missing(dangling, QA_CHECK.Reference_exists, name)
コード例 #14
0
def _get_relation_check_query(query_type, src_catalog_name,
                              src_collection_name, src_field_name,
                              filter_applications: list):
    assert query_type in [
        "dangling", "missing"
    ], "Relation check query expects type to be dangling or missing"

    model = GOBModel()
    src_collection = model.get_collection(src_catalog_name,
                                          src_collection_name)
    src_table_name = model.get_table_name(src_catalog_name,
                                          src_collection_name)
    src_field = src_collection['all_fields'].get(src_field_name)
    src_has_states = model.has_states(src_catalog_name, src_collection_name)

    is_many = src_field['type'] == "GOB.ManyReference"

    relation_table_name = "rel_" + get_relation_name(
        model, src_catalog_name, src_collection_name, src_field_name)

    main_select = [f"src.{FIELD.ID} as id", f"src.{FIELD.EXPIRATION_DATE}"]
    main_select.extend([
        f"rel.{FIELD.SOURCE_VALUE}"
    ] if query_type == "dangling" else [
        f"src.{src_field_name}->>'{FIELD.SOURCE_VALUE}' as {FIELD.SOURCE_VALUE}"
    ])
    select = [
        FIELD.ID, FIELD.EXPIRATION_DATE, FIELD.DATE_DELETED,
        f"jsonb_array_elements({src_field_name}) as {src_field_name}"
    ]

    if src_has_states:
        state_select = [FIELD.SEQNR, FIELD.START_VALIDITY, FIELD.END_VALIDITY]
        select.extend(state_select)
        main_select.extend([f"src.{field}" for field in state_select])
    select = ",\n    ".join(select)
    main_select = ",\n    ".join(main_select)

    join_on = ['src._id = rel.src_id']
    if src_has_states:
        join_on.extend(['src.volgnummer = rel.src_volgnummer'])
    join_on = " AND ".join(join_on)

    src = f"""
(
SELECT
    {select}
FROM
    {src_table_name}
) AS src
""" if is_many and query_type == "missing" else f"{src_table_name} src"

    where = [f"src.{FIELD.DATE_DELETED} IS NULL"]

    # For missing relations check is bronwaarde is empty
    where.extend([f"{src_field_name}->>'bronwaarde' IS NULL"] if query_type ==
                 "missing" else [])

    # For dangling relations check if destination is empty
    where.extend(["rel.dst_id IS NULL", f"rel.{FIELD.DATE_DELETED} IS NULL"]
                 if query_type == "dangling" else [])

    if filter_applications:
        ors = [
            f"src.{FIELD.APPLICATION} = '{application}'"
            for application in filter_applications
        ]
        where.append(f"({' OR '.join(ors)})")

    where = " AND ".join(where)
    query = f"""
SELECT
    {main_select}
FROM
    {src}"""

    query += f"""
JOIN {relation_table_name} rel
ON
    {join_on}
""" if query_type == "dangling" else ""

    query += f"""
WHERE
    {where}
"""
    return query
コード例 #15
0
ファイル: to_db.py プロジェクト: Amsterdam/GOB-API
    def create_utility_view(self):
        """Creates view with utility columns for relating without relation table

        View contains all columns from the main table, plus the RELATION_id, RELATION_VOLGNUMMER, RELATION_ref
        and RELATION_bronwaarde columns for each RELATION (for example ligt_in_buurt_id, ligt_in_buurt_ref and
        ligt_in_buurt_bronwaarde)

        :return:
        """
        yield "Creating view\n"

        main_alias = self.model['abbreviation'].lower()
        src_has_states = self.model.get('has_states', False)

        # Collect all necessary joins and select statements
        joins = []
        selects = [f'{main_alias}.*']

        for relation in self.model['references'].keys():
            # Add a join and selects for each relation
            relation_name = get_relation_name(GOBModel(), self.catalog_name, self.collection_name, relation)

            if not relation_name:
                # Undefined relation
                continue

            if not self._table_exists(relation_name):
                yield f"Excluding relation {relation_name} from view because table does not exist\n"
                continue

            relation_table = f'{self.catalog_name}.{relation_name}'

            # Determine if ManyReference and if destination has states
            src_field = self.model['all_fields'].get(relation)
            dst_catalog_name, dst_collection_name = GOBModel().split_ref(src_field['ref'])
            dst_has_states = GOBModel().has_states(dst_catalog_name, dst_collection_name)
            is_many = src_field['type'] == fully_qualified_type_name(GOB.ManyReference)

            on = f'{relation}.src_id = {main_alias}.{FIELD.ID}' + (
                f' and {relation}.src_volgnummer = {main_alias}.{FIELD.SEQNR}' if src_has_states else ''
            )

            if is_many:
                # For a ManyReference, we need to aggregate the values in an array
                join = f"""
left join (
    -- Aggregates id, volgnummer and ref for {relation} per src object. bronwaarde is already in the src table
    select
        rel.src_id,
        {'rel.src_volgnummer,' if src_has_states else ''}
        array_agg(rel.dst_id) dst_id,
        {'array_agg(rel.dst_volgnummer) dst_volgnummer,' if dst_has_states else ''}
        array_agg({self._ref('rel', dst_has_states)}) "ref"
    from {relation_table} rel
    group by rel.src_id{', rel.src_volgnummer' if src_has_states else ''}
) {relation} on {on}
"""
                selects.append(f'{relation}.ref {relation}_ref')
            else:
                # For a single Reference we expect one row from the relation table
                join = f"left join {relation_table} {relation} on {on}"
                selects.append(f'{self._ref(relation, dst_has_states)} {relation}_ref')

            joins.append(join)
            selects += [
                f'{relation}.dst_id {relation}_id',
            ]

            if dst_has_states:
                selects += [f'{relation}.dst_volgnummer {relation}_volgnummer']

        # Build query based on collected joins and selects
        NEWLINE = '\n'
        query = f"""
select {f',{NEWLINE}       '.join(selects)}
from {self.catalog_name}.{self.collection_name} {main_alias}
{f'{NEWLINE}'.join(joins)}
"""
        # Create the view
        viewname = f'{self.catalog_name}.v_{self.collection_name}'
        self._execute(f"drop view if exists {viewname}")
        self._execute(f"create view {viewname} as {query}")

        yield f"Utility view {viewname} created\n"