예제 #1
0
파일: gob.py 프로젝트: Amsterdam/GOB-Core
def _relation_indexes_for_collection(catalog_name, collection_name, collection,
                                     idx_prefix):
    model = GOBModel()
    sources = GOBSources()

    indexes = {}
    table_name = model.get_table_name(catalog_name, collection_name)

    reference_columns = {
        column: desc['ref']
        for column, desc in collection['all_fields'].items()
        if desc['type'] in ['GOB.Reference', 'GOB.ManyReference']
    }

    # Search source and destination attributes for relation and define index
    for col, ref in reference_columns.items():
        dst_index_table = model.get_table_name_from_ref(ref)
        dst_collection = model.get_collection_from_ref(ref)
        dst_catalog_name, dst_collection_name = model.get_catalog_collection_names_from_ref(
            ref)
        dst_catalog = model.get_catalog(dst_catalog_name)
        relations = sources.get_field_relations(catalog_name, collection_name,
                                                col)

        for relation in relations:
            dst_idx_prefix = f"{dst_catalog['abbreviation']}_{dst_collection['abbreviation']}".lower(
            )
            src_index_col = f"{relation['source_attribute'] if 'source_attribute' in relation else col}"

            # Source column
            name = _hashed_index_name(
                idx_prefix, _remove_leading_underscore(src_index_col))
            indexes[name] = {
                "table_name": table_name,
                "columns": [src_index_col],
            }

            indexes[name]["type"] = _get_special_column_type(
                collection['all_fields'][src_index_col]['type'])

            # Destination column
            name = _hashed_index_name(
                dst_idx_prefix,
                _remove_leading_underscore(relation['destination_attribute']))

            indexes[name] = {
                "table_name":
                dst_index_table,
                "columns": [relation['destination_attribute']],
                "type":
                _get_special_column_type(dst_collection['all_fields'][
                    relation['destination_attribute']]['type']),
            }

    return indexes
예제 #2
0
class TestSources(unittest.TestCase):
    def setUp(self):
        self.sources = GOBSources()

    def test_get_relations(self):
        # Assert we get a list of relations for a collection
        self.assertIsInstance(self.sources.get_relations('nap', 'peilmerken'),
                              list)

    def test_get_field_relations_keyerror(self):
        self.sources.get_relations = MagicMock(side_effect=KeyError)

        self.assertEqual([],
                         self.sources.get_field_relations(
                             'catalog', 'collection', 'fieldname'))
예제 #3
0
class TestSources(unittest.TestCase):
    def setUp(self):
        self.sources = GOBSources()

    def test_get_relations(self):
        # Assert we get a list of relations for a collection
        self.assertIsInstance(self.sources.get_relations('nap', 'peilmerken'),
                              list)
예제 #4
0
def _check_message(msg: dict):
    required = [CATALOG_KEY, COLLECTION_KEY, ATTRIBUTE_KEY]

    header = msg.get('header', {})

    for key in required:
        if not header.get(key):
            raise GOBException(f"Missing {key} attribute in header")

    model = GOBModel()
    sources = GOBSources()

    if not model.get_catalog(header[CATALOG_KEY]):
        raise GOBException(f"Invalid catalog name {header[CATALOG_KEY]}")

    if not model.get_collection(header[CATALOG_KEY], header[COLLECTION_KEY]):
        raise GOBException(f"Invalid catalog/collection combination: {header[CATALOG_KEY]}/{header[COLLECTION_KEY]}")

    if not sources.get_field_relations(header[CATALOG_KEY], header[COLLECTION_KEY], header[ATTRIBUTE_KEY]):
        raise GOBException(f"Missing relation specification for {header[CATALOG_KEY]} {header[COLLECTION_KEY]} "
                           f"{header[ATTRIBUTE_KEY]}")
예제 #5
0
def check_relations(src_catalog_name, src_collection_name, src_field_name):
    """
    Check relations for any dangling relations

    Dangling can be because a relation exist without any bronwaarde
    or the bronwaarde cannot be matched with any referenced entity

    :param src_catalog_name:
    :param src_collection_name:
    :param src_field_name:
    :return: None
    """

    name = f"{src_collection_name} {src_field_name}"

    # Only include sources where not none_allowed
    sources = GOBSources().get_field_relations(src_catalog_name,
                                               src_collection_name,
                                               src_field_name)
    check_sources = [
        source['source'] for source in sources
        if not source.get('none_allowed', False)
    ]

    if not check_sources:
        logger.info(
            f"All sources for {src_catalog_name} {src_collection_name} {src_field_name} allow empty "
            f"relations. Skipping check.")
        return

    # Only filter on sources when necessary (i.e. when there are multiple sources with different values for
    # none_allowed)
    check_sources = check_sources if len(sources) != len(
        check_sources) else None
    missing_query = _get_relation_check_query("missing", src_catalog_name,
                                              src_collection_name,
                                              src_field_name, check_sources)
    _query_missing(missing_query, QA_CHECK.Sourcevalue_exists, name)

    dangling_query = _get_relation_check_query("dangling", src_catalog_name,
                                               src_collection_name,
                                               src_field_name, check_sources)

    _query_missing(dangling_query, QA_CHECK.Reference_exists, name)
예제 #6
0
파일: amschema.py 프로젝트: jjmurre/GOB-API
import json
import os
import sys

# To have access to the gobapi module, while still being able to run python amschema.py
sys.path.append(os.path.join('..'))
from gobapi.auth.auth_query import Authority  # noqa: E402, module level import not at top of file

# Suppress any output from GOBModel class (otherwise GOB Model messages can appear in the schema output)
sys.stdout = open(os.devnull, 'w')
from gobcore.model import GOBModel  # noqa: E402, module level import not at top of file
from gobcore.model.metadata import FIELD  # noqa: E402, module level import not at top of file
from gobcore.sources import GOBSources  # noqa: E402, module level import not at top of file

model = GOBModel()
sources = GOBSources()
sys.stdout = sys.__stdout__


def get_schema(catalog_name, collection_name=None):
    """
    Get a Amsterdam Schema for the given catalog

    If a collection is specified only the schema for the given collection is returned

    :param catalog_name:
    :param collection_name:
    :return:
    """
    if collection_name:
        schema = _get_collection_schema(catalog_name, collection_name)
예제 #7
0
 def setUp(self):
     self.sources = GOBSources()
예제 #8
0
def _split_job(msg: dict):
    header = msg.get('header', {})
    catalog_name = header.get('catalogue')
    collection_name = header.get('collection')
    attribute_name = header.get('attribute')

    assert catalog_name is not None, "A catalog name is required"

    model = GOBModel()
    catalog = model.get_catalog(catalog_name)

    assert catalog is not None, f"Invalid catalog name '{catalog_name}'"

    if collection_name is None:
        collection_names = model.get_collection_names(catalog_name)
    else:
        collection_names = [collection_name]

    assert collection_names, f"No collections specified or found for catalog {catalog_name}"

    with MessageBrokerConnection(CONNECTION_PARAMS) as connection:
        for collection_name in collection_names:
            collection = model.get_collection(catalog_name, collection_name)
            assert collection is not None, f"Invalid collection name '{collection_name}'"

            logger.info(f"** Split {collection_name}")

            attributes = model._extract_references(collection['attributes']) \
                if attribute_name is None \
                else [attribute_name]

            for attr_name in attributes:
                sources = GOBSources()
                relation_specs = sources.get_field_relations(catalog_name, collection_name, attr_name)

                if not relation_specs:
                    logger.info(f"Missing relation specification for {catalog_name} {collection_name} "
                                f"{attr_name}. Skipping")
                    continue

                if relation_specs[0]['type'] == fully_qualified_type_name(VeryManyReference):
                    logger.info(f"Skipping VeryManyReference {catalog_name} {collection_name} {attr_name}")
                    continue

                logger.info(f"Splitting job for {catalog_name} {collection_name} {attr_name}")

                original_header = msg.get('header', {})

                split_msg = {
                    **msg,
                    "header": {
                        **original_header,
                        "catalogue": catalog_name,
                        "collection": collection_name,
                        "attribute": attr_name,
                        "split_from": original_header.get('jobid'),
                    },
                    "workflow": {
                        "workflow_name": "relate",
                    }
                }

                del split_msg['header']['jobid']
                del split_msg['header']['stepid']

                connection.publish(WORKFLOW_EXCHANGE, WORKFLOW_REQUEST_KEY, split_msg)