def test_catalog_collection_names_from_ref(self): model = GOBModel() model.split_ref = MagicMock() ref = random_string() result = model.get_catalog_collection_names_from_ref(ref) model.split_ref.assert_called_with(ref) self.assertEqual(model.split_ref.return_value, result)
def test_split_table_name(self): model = GOBModel() testcases = ['brk_', '_brk', '', '_', 'brk'] for testcase in testcases: with self.assertRaisesRegexp(GOBException, "Invalid table name"): model._split_table_name(testcase)
def test_load_schema_error(self, mock_print, mock_load_schema): mock_load_schema.side_effect = SchemaException model = GOBModel() model._data = { 'cat_a': { 'collections': { 'coll_a': { 'attributes': { 'some': 'attribute', }, }, 'coll_b': { '_attributes': { 'some': 'attribute', }, 'schema': 'schema_b', } } }, } expected = copy.deepcopy(model._data) expected['cat_a']['collections']['coll_b']['attributes'] = \ expected['cat_a']['collections']['coll_b']['_attributes'] model._load_schemas() self.assertEqual(expected, model._data) mock_print.assert_called_once() self.assertTrue(mock_print.call_args[0][0].startswith( 'ERROR: failed to load schema'))
def query_reference_entities(catalog, collection, reference_name, src_id): assert _Base _session = get_session() gob_model = GOBModel() rel_catalog_name = 'rel' rel_collection_name = get_relation_name(gob_model, catalog, collection, reference_name) rel_table, rel_model = get_table_and_model(rel_catalog_name, rel_collection_name) dst_catalog_name, dst_collection_name = gob_model.get_collection( catalog, collection)['references'][reference_name]['ref'].split(':') # Destination table and model dst_table, dst_model = get_table_and_model(dst_catalog_name, dst_collection_name) query = _session.query(dst_table) \ .join(rel_table, dst_table._id == rel_table.dst_id) \ .filter(rel_table.src_id == src_id) # Exclude all records with date_deleted all_entities = filter_deleted(query, dst_table) # The default result is where expiration date is in the future or empty all_entities = filter_active(all_entities, dst_table) entity_convert = _get_convert_for_model(dst_catalog_name, dst_collection_name, dst_model) return all_entities, entity_convert
def test_catalog_collection_from_abbr(self): model = GOBModel() model._data = { 'cat_a': { 'abbreviation': 'ca', 'collections': { 'col_a': { 'abbreviation': 'coa', 'some other': 'data', }, 'col_b': { 'abbreviation': 'cob', 'some other': 'data', } } } } self.assertEqual(({ 'abbreviation': 'ca', 'collections': { 'col_a': { 'abbreviation': 'coa', 'some other': 'data', }, 'col_b': { 'abbreviation': 'cob', 'some other': 'data', } } }, { 'abbreviation': 'cob', 'some other': 'data', }), model.get_catalog_collection_from_abbr('ca', 'cob'))
def _catalog(catalog_name): """Return the details of a specific GOB catalog :param catalog_name: e.g. meetbouten :return: the details of the specified catalog {name, href} """ catalog = GOBModel().get_catalog(catalog_name) if catalog: result = { 'name': catalog_name, 'abbreviation': catalog['abbreviation'], 'description': catalog['description'], 'version': catalog['version'], 'collections': [a for a in catalog['collections'].keys()], '_embedded': { 'collections': [{ 'name': collection_name, 'abbreviation': collection['abbreviation'], '_links': { 'self': { 'href': f'/gob/{catalog_name}/{collection_name}/' } } } for collection_name, collection in GOBModel().get_collections(catalog_name).items()] } } return hal_response(result) else: return not_found(f"Catalog {catalog_name} not found")
def test_load_schemas(self, mock_load_schema): mock_load_schema.return_value = 'loaded_attributes' model = GOBModel() model._data = { 'cat_a': { 'collections': { 'coll_a': { 'attributes': { 'some': 'attribute', }, }, 'coll_b': { '_attributes': { 'some': 'attribute', }, 'schema': 'schema_b', } } }, } expected = copy.deepcopy(model._data) expected['cat_a']['collections']['coll_b'][ 'attributes'] = 'loaded_attributes' model._load_schemas() self.assertEqual(expected, model._data) mock_load_schema.assert_called_with('schema_b', 'cat_a', 'coll_b')
def get_current_relations(catalog_name, collection_name, field_name): """ Get the current relations as an iterable of dictionaries Each relation is transformed into a dictionary :param catalog_name: :param collection_name: :param field_name: :return: An iterable of dicts """ model = GOBModel() table_name = model.get_table_name(catalog_name, collection_name) collection = model.get_collection(catalog_name, collection_name) field = collection['all_fields'][field_name] field_type = field['type'] assert field_type in ["GOB.Reference", "GOB.ManyReference" ], f"Error: unexpected field type '{field_type}'" select = [FIELD.GOBID, field_name, FIELD.SOURCE, FIELD.ID] order_by = [FIELD.SOURCE, FIELD.ID] if model.has_states(catalog_name, collection_name): select += [FIELD.SEQNR, FIELD.END_VALIDITY] order_by += [FIELD.SEQNR, FIELD.START_VALIDITY] query = f""" SELECT {', '.join(select)} FROM {table_name} WHERE {FIELD.DATE_DELETED} IS NULL ORDER BY {', '.join(order_by)} """ rows = _execute(query) for row in rows: row = dict(row) yield row
def _add_relations(query, catalog_name, collection_name): gob_model = GOBModel() collection = gob_model.get_collection(catalog_name, collection_name) has_states = collection.get('has_states', False) src_table, _ = get_table_and_model(catalog_name, collection_name) for reference in collection['references']: relation_name = get_relation_name(gob_model, catalog_name, collection_name, reference) if not relation_name: continue rel_table, _ = get_table_and_model('rel', relation_name) select_attrs = [ getattr(rel_table, 'src_id'), getattr(rel_table, 'src_volgnummer'), ] if has_states else [ getattr(rel_table, 'src_id'), ] subselect = session \ .query( *select_attrs, func.json_agg( func.json_build_object( FIELD.SOURCE_VALUE, getattr(rel_table, FIELD.SOURCE_VALUE), FIELD.REFERENCE_ID, getattr(rel_table, 'dst_id') ) ).label('source_values') ).filter( and_( getattr(rel_table, FIELD.DATE_DELETED).is_(None), or_( getattr(rel_table, FIELD.EXPIRATION_DATE).is_(None), getattr(rel_table, FIELD.EXPIRATION_DATE) > func.now() ) ) ).group_by( *select_attrs ).subquery() join_clause = [ getattr(src_table, FIELD.ID) == getattr(subselect.c, 'src_id'), getattr(src_table, FIELD.SEQNR) == getattr(subselect.c, 'src_volgnummer') ] if has_states else [ getattr(src_table, FIELD.ID) == getattr(subselect.c, 'src_id'), ] query = query.join(subselect, and_(*join_clause), isouter=True) \ .add_columns( getattr(subselect.c, 'source_values').label(f"ref:{reference}") ) return query
def _catalogs(): model = GOBModel() catalogs = model.get_catalogs() result = {} for catalog_name, catalog in catalogs.items(): result[catalog_name] = [] for entity_name, model in catalog['collections'].items(): result[catalog_name].append(entity_name) return jsonify(result), 200, {'Content-Type': 'application/json'}
def _get_catalog_collection_name_from_table_name(table_name): """Gets the catalog and collection name from the table name :param table_name: """ catalog_name = GOBModel().get_catalog_from_table_name(table_name) collection_name = GOBModel().get_collection_from_table_name(table_name) return catalog_name, collection_name
def test_get_inverse_relations(self, mock_get_inverse_relations): model = GOBModel() self.assertEqual(mock_get_inverse_relations.return_value, model.get_inverse_relations()) # Call twice. Expect same result self.assertEqual(mock_get_inverse_relations.return_value, model.get_inverse_relations()) # But should only be evaluated once mock_get_inverse_relations.assert_called_once()
def _reference_collection(catalog_name, collection_name, entity_id, reference_path): """Returns the (very many) references from an entity within the specified collection with the specified id An list of references is returned. :param catalog_name: e.g. meetbouten :param collection_name: e.g. meting :param entity_id: unique identifier of the entity :param reference: unique identifier of the reference attribute e.g. ligt_in_buurt :param view: the database view that's being used to get the entity, defaults to the entity table :return: """ model = GOBModel() entity_collection = model.get_collection(catalog_name, collection_name) if entity_collection: # Get the reference reference_name = reference_path.replace('-', '_') reference = model.get_collection( catalog_name, collection_name)['references'].get(reference_name) # Check if the source entity exists entity = get_entity(catalog_name, collection_name, entity_id) if entity and reference: page = int(request.args.get('page', 1)) page_size = int(request.args.get('page_size', 100)) stream = request.args.get('stream', None) == "true" ndjson = request.args.get('ndjson', None) == "true" if stream: entities, convert = query_reference_entities( catalog_name, collection_name, reference_name, entity_id) return Response(stream_entities(entities, convert), mimetype='application/json') elif ndjson: entities, convert = query_reference_entities( catalog_name, collection_name, reference_name, entity_id) return Response(ndjson_entities(entities, convert), mimetype='application/x-ndjson') else: result, links = _reference_entities(catalog_name, collection_name, reference_name, entity_id, page, page_size) return hal_response(data=result, links=links) response = not_found(f'{catalog_name}.{collection_name}:{entity_id} not found') \ if not entity else not_found(f'{catalog_name}.{collection_name}:{entity_id}:{reference_name} not found') return response else: return not_found(f'{catalog_name}.{collection_name} not found')
def __init__(self, visitor: GraphQLVisitor): """ :param visitor: """ self.visitor = visitor self.selects = visitor.selects self.relation_parents = visitor.relationParents self.relation_aliases = visitor.relationAliases self.relation_info = {} self.model = GOBModel()
def test_get_collection_from_table_name(self): model = GOBModel() testcases = [ ('brk_collection', 'collection'), ('brk_coll_lection', 'coll_lection'), ] for arg, result in testcases: self.assertEqual(result, model.get_collection_from_table_name(arg)) with self.assertRaisesRegexp(GOBException, "Invalid table name"): model.get_collection_from_table_name('brk_')
def test_get_catalog_from_table_name(self): model = GOBModel() testcases = [ ('brk_something', 'brk'), ('brk_long_table_name', 'brk'), ] for arg, result in testcases: self.assertEqual(result, model.get_catalog_from_table_name(arg)) with self.assertRaisesRegexp(GOBException, "Invalid table name"): model.get_catalog_from_table_name('brk_')
def _derive_indexes() -> dict: model = GOBModel() indexes = {} for catalog_name, catalog in model.get_catalogs().items(): for collection_name, collection in model.get_collections( catalog_name).items(): entity = collection['all_fields'] table_name = model.get_table_name(catalog_name, collection_name) is_relation_table = table_name.startswith('rel_') if is_relation_table: split_table_name = table_name.split('_') prefix = '_'.join(split_table_name[:5]) + '_' + _hash('_'.join( split_table_name[5:]))[:8] else: prefix = f"{catalog['abbreviation']}_{collection['abbreviation']}".lower( ) # Generate indexes on default columns for idx_name, columns in _default_indexes_for_columns( list(entity.keys()), TABLE_TYPE_RELATION if is_relation_table else TABLE_TYPE_ENTITY).items(): indexes[_hashed_index_name(prefix, idx_name)] = { "columns": columns, "table_name": table_name, } # Add source, last event index columns = [FIELD.SOURCE, FIELD.LAST_EVENT + ' DESC'] idx_name = "_".join( [_remove_leading_underscore(column) for column in columns]) indexes[_hashed_index_name(prefix, idx_name)] = { "columns": columns, "table_name": table_name, } # Generate indexes on referenced columns (GOB.Reference and GOB.ManyReference) indexes.update(**_relation_indexes_for_collection( catalog_name, collection_name, collection, prefix)) # Create special COALESCE(_expiration_date, '9999-12-31'::timestamp without time zone') index indexes[_hashed_index_name( prefix, f"{FIELD.EXPIRATION_DATE}_coalesce" )] = { "columns": [ f"COALESCE({FIELD.EXPIRATION_DATE}, '9999-12-31'::timestamp without time zone)" ], "table_name": table_name, } return indexes
def __init__(self): path = os.path.join(os.path.dirname(__file__), 'gobsources.json') with open(path) as file: data = json.load(file) self._data = data self._model = GOBModel() self._relations = defaultdict(lambda: defaultdict(list)) # Extract references for easy access in API for source_name, source in self._data.items(): self._extract_relations(source_name, source)
def __init__(self, gob_db_session, gob_db_base, catalogue: str, collection: str): if self.gobmodel is None: self.gobmodel = GOBModel() self.db_session = gob_db_session self.base = gob_db_base self.collection = self.gobmodel.get_collection(catalogue, collection) self.tablename = self.gobmodel.get_table_name(catalogue, collection) self.basetable = getattr( self.base.classes, self.gobmodel.get_table_name(catalogue, collection)) self._init_relations(catalogue, collection)
def _entity(catalog_name, collection_name, entity_id, view=None): """Returns the entity within the specified collection with the specified id An individual entity is returned. :param catalog_name: e.g. meetbouten :param collection_name: e.g. meting :param entity_id: unique identifier of the entity :param view: the database view that's being used to get the entity, defaults to the entity table :return: """ if GOBModel().get_collection(catalog_name, collection_name): view = request.args.get('view', None) # If a view is requested and doesn't exist return a 404 if view and not GOBViews().get_view(catalog_name, collection_name, view): return not_found( f'{catalog_name}.{collection_name}?view={view} not found') view_name = GOBViews().get_view(catalog_name, collection_name, view)['name'] if view else None result = get_entity(catalog_name, collection_name, entity_id, view_name) return hal_response(result) if result is not None else not_found( f'{catalog_name}.{collection_name}:{entity_id} not found') else: return not_found(f'{catalog_name}.{collection_name} not found')
def upgrade(): # ### commands auto generated by Alembic - please adjust! ### model = GOBModel() connection = op.get_bind() for relation_name, relation in get_relations(model)['collections'].items(): table_name = f"rel_{relation_name}" try: # The code uses the current GOBModel # This model is normally more recent that the database # because of the migrations that have still to be processes after this migration # So do not fail on any missing tables res = connection.execute( f"SELECT * FROM {table_name} WHERE bronwaarde IS NULL LIMIT 1") if res.fetchall(): print( f"{table_name} contains NULL values for bronwaarde. Clearing table and events." ) op.execute( f"DELETE FROM events WHERE entity='{relation_name}'") op.execute(f"TRUNCATE {table_name}") except Exception: connection.execute(f"ROLLBACK")
def __init__(self, catalogue_name: str, entity_name: str, entity_id_attr: str): """ :param catalogue_name: :param entity_name: :param entity_id_attr: The name of the attribute that serves as the entity_id """ self.collection = GOBModel().get_collection(catalogue_name, entity_name) self.fields = self.collection['fields'] mapping = { field: { 'source_mapping': field, } for field in self.fields.keys() } input_spec = { 'gob_mapping': mapping, 'catalogue': catalogue_name, 'entity': entity_name, 'source': { 'entity_id': entity_id_attr, } } self.converter = Converter(catalogue_name, entity_name, input_spec)
def __init__(self, source, app, dataset, mode: ImportMode = ImportMode.FULL): """ source: type : type of source, e.g. file, database, ... application: name of the application or source that holds the data, e.g. Neuron, DIVA, ... query: any query to run on the dataset that is being imported, e.g. a SQL query config: any configuration parameters, e.g. encoding :param source: source definition object :param app: name of the import (often equal to source.application) """ self.source = source self.app = app self.mode = mode self.secure_types = [f"GOB.{type.name}" for type in GOB_SECURE_TYPES] mapping = dataset["gob_mapping"] catalogue = dataset['catalogue'] entity = dataset['entity'] gob_attributes = GOBModel().get_collection(catalogue, entity)["all_fields"] self.secure_attributes = [] self.set_secure_attributes(mapping, gob_attributes) self.datastore = None
def __call__(self, parser, namespace, values, option_string=None): if namespace.format in ['query', 'curl'] and values is None: parser.error('query or curl require a collection') elif values and not GOBModel().get_collection(namespace.catalog, values): parser.error(f"GOB Collection '{values}' does not exist within GOB Catalog '{namespace.catalog}'") else: namespace.collection = values
class MockModel: gobmodel = GOBModel() _data = model # Wire original GOBModel _extract_references method def _extract_references(self, attributes): return self.gobmodel._extract_references(attributes)
def _add_resolve_attrs_to_columns(columns): """Adds attributes to columns necessary to resolve model attributes from a view. Looks for attributes of the form brk:sjt:heeft_bsn_voor, and will try to find the attribute heeft_bsn_voor in the catalog brk and collection sjt. Adds attribute, authority and public_name (heeft_bsn_voor in this case) to columns matching the pattern above. """ resolve_attr_pattern = re.compile(r"^(\w+):(\w+):(\w+)$") for column in columns: match = re.match(resolve_attr_pattern, column.name) if not match: continue catalog_abbreviation, collection_abbreviation, attribute_name = match.groups( ) try: catalog, collection = GOBModel().get_catalog_collection_from_abbr( catalog_abbreviation, collection_abbreviation) attribute = collection['attributes'][attribute_name] except (NotInModelException, KeyError): continue setattr(column, 'attribute', attribute) setattr(column, 'public_name', attribute_name)
def __init__(self, host, query, catalogue, collection, expand_history=False, sort=None, unfold=False, row_formatter=None, cross_relations=False, secure_user=None): """Constructor Lazy loading, Just register host and query and wait for the iterator to be called to load the data :param host: :param query: :param catalogue: :param collection: """ self.host = host self.secure_user = secure_user self.url = self.host + (GRAPHQL_SECURE_ENDPOINT if self.secure_user else GRAPHQL_PUBLIC_ENDPOINT) self.catalogue = catalogue self.collection = collection self.schema_collection_name = f'{self.catalogue}{self.collection.title()}' self.end_cursor = "" self.query = self._update_query(query, NUM_RECORDS) self.has_next_page = True self.gob_model = GOBModel().get_collection(self.catalogue, self.collection) self.formatter = GraphQLResultFormatter(expand_history, sort=sort, unfold=unfold, row_formatter=row_formatter, cross_relations=cross_relations)
def __init__(self, catalog_name, entity_name, input_spec): self.gob_model = GOBModel() collection = self.gob_model.get_collection(catalog_name, entity_name) self.input_spec = input_spec self.mapping = input_spec['gob_mapping'] self.fields = collection['all_fields'] # Get the fieldnames for the id and seqnr fields self.entity_id = input_spec['source']['entity_id'] self.seqnr = self.mapping.get(FIELD.SEQNR, {}).get('source_mapping') # Extract the fields that have a source mapping defined self.extract_fields = [ field for field, meta in self.mapping.items() if 'source_mapping' in meta ]
def resolve_schema_collection_name(schema_collection_name: str): """ Resolve catalog and collection from schema collection name :param schema_collection_name: :return: """ model = GOBModel() names = to_snake(schema_collection_name).split('_') for n in range(1, len(names)): catalog_name = '_'.join(names[:-n]) collection_name = '_'.join(names[-n:]) catalog = model.get_catalog(catalog_name) collection = model.get_collection(catalog_name, collection_name) if catalog and collection: return catalog_name, collection_name return None, None
def _create_table(schema, catalog_name, collection_name, model, tablename=None): """ Returns a SQL statement to create a table in a schema The table fields are constructed from the specs :param schema: :param collection_name: :param specs: :param tablename: if None, collection_name is used :return: """ specs = get_field_specifications(model) order = _autorized_order(get_field_order(model), catalog_name, collection_name) catalog = GOBModel().get_catalog(catalog_name) catalog_description = quote_sql_string(catalog['description']) fields = [] for field_name in order: field_spec = specs[field_name] field_description = quote_sql_string(field_spec['description']) if field_spec['type'] in REFERENCE_TYPES: for reference_field in get_reference_fields(field_spec): name = joined_names(field_name, reference_field) fields.append( _create_field(name, fully_qualified_type_name(String), f"{field_description} ({reference_field})")) elif field_spec['type'] in JSON_TYPES: for field, spec in field_spec['attributes'].items(): name = joined_names(field_name, field) # Make all JSON attribute columns of type String (in case the resulting values are merged into a list) fields.append( _create_field(name, fully_qualified_type_name(String), f"{field_description} ({field})")) else: fields.append( _create_field(field_name, field_spec['type'], field_description)) field_lengths = [len(field['name']) for field in fields] max_length = max(field_lengths) if field_lengths else 1 table_name = (f"{_quote(schema)}.{_quote(tablename or collection_name)}") table_fields = ",\n ".join( [f"{field['name']:{max_length}} {field['type']}" for field in fields]) comments = ";\n".join([ f"COMMENT ON COLUMN {table_name}.{field['name']:{max_length}} " f"IS {SQL_QUOTATION_MARK}{field['description']}{SQL_QUOTATION_MARK}" for field in fields ]) primary_key = f",PRIMARY KEY ({UNIQUE_ID})" if UNIQUE_ID in order else "" return f"""