def _relation_indexes_for_collection(catalog_name, collection_name, collection, idx_prefix): model = GOBModel() sources = GOBSources() indexes = {} table_name = model.get_table_name(catalog_name, collection_name) reference_columns = { column: desc['ref'] for column, desc in collection['all_fields'].items() if desc['type'] in ['GOB.Reference', 'GOB.ManyReference'] } # Search source and destination attributes for relation and define index for col, ref in reference_columns.items(): dst_index_table = model.get_table_name_from_ref(ref) dst_collection = model.get_collection_from_ref(ref) dst_catalog_name, dst_collection_name = model.get_catalog_collection_names_from_ref( ref) dst_catalog = model.get_catalog(dst_catalog_name) relations = sources.get_field_relations(catalog_name, collection_name, col) for relation in relations: dst_idx_prefix = f"{dst_catalog['abbreviation']}_{dst_collection['abbreviation']}".lower( ) src_index_col = f"{relation['source_attribute'] if 'source_attribute' in relation else col}" # Source column name = _hashed_index_name( idx_prefix, _remove_leading_underscore(src_index_col)) indexes[name] = { "table_name": table_name, "columns": [src_index_col], } indexes[name]["type"] = _get_special_column_type( collection['all_fields'][src_index_col]['type']) # Destination column name = _hashed_index_name( dst_idx_prefix, _remove_leading_underscore(relation['destination_attribute'])) indexes[name] = { "table_name": dst_index_table, "columns": [relation['destination_attribute']], "type": _get_special_column_type(dst_collection['all_fields'][ relation['destination_attribute']]['type']), } return indexes
class TestSources(unittest.TestCase): def setUp(self): self.sources = GOBSources() def test_get_relations(self): # Assert we get a list of relations for a collection self.assertIsInstance(self.sources.get_relations('nap', 'peilmerken'), list) def test_get_field_relations_keyerror(self): self.sources.get_relations = MagicMock(side_effect=KeyError) self.assertEqual([], self.sources.get_field_relations( 'catalog', 'collection', 'fieldname'))
def _check_message(msg: dict): required = [CATALOG_KEY, COLLECTION_KEY, ATTRIBUTE_KEY] header = msg.get('header', {}) for key in required: if not header.get(key): raise GOBException(f"Missing {key} attribute in header") model = GOBModel() sources = GOBSources() if not model.get_catalog(header[CATALOG_KEY]): raise GOBException(f"Invalid catalog name {header[CATALOG_KEY]}") if not model.get_collection(header[CATALOG_KEY], header[COLLECTION_KEY]): raise GOBException(f"Invalid catalog/collection combination: {header[CATALOG_KEY]}/{header[COLLECTION_KEY]}") if not sources.get_field_relations(header[CATALOG_KEY], header[COLLECTION_KEY], header[ATTRIBUTE_KEY]): raise GOBException(f"Missing relation specification for {header[CATALOG_KEY]} {header[COLLECTION_KEY]} " f"{header[ATTRIBUTE_KEY]}")
def _split_job(msg: dict): header = msg.get('header', {}) catalog_name = header.get('catalogue') collection_name = header.get('collection') attribute_name = header.get('attribute') assert catalog_name is not None, "A catalog name is required" model = GOBModel() catalog = model.get_catalog(catalog_name) assert catalog is not None, f"Invalid catalog name '{catalog_name}'" if collection_name is None: collection_names = model.get_collection_names(catalog_name) else: collection_names = [collection_name] assert collection_names, f"No collections specified or found for catalog {catalog_name}" with MessageBrokerConnection(CONNECTION_PARAMS) as connection: for collection_name in collection_names: collection = model.get_collection(catalog_name, collection_name) assert collection is not None, f"Invalid collection name '{collection_name}'" logger.info(f"** Split {collection_name}") attributes = model._extract_references(collection['attributes']) \ if attribute_name is None \ else [attribute_name] for attr_name in attributes: sources = GOBSources() relation_specs = sources.get_field_relations(catalog_name, collection_name, attr_name) if not relation_specs: logger.info(f"Missing relation specification for {catalog_name} {collection_name} " f"{attr_name}. Skipping") continue if relation_specs[0]['type'] == fully_qualified_type_name(VeryManyReference): logger.info(f"Skipping VeryManyReference {catalog_name} {collection_name} {attr_name}") continue logger.info(f"Splitting job for {catalog_name} {collection_name} {attr_name}") original_header = msg.get('header', {}) split_msg = { **msg, "header": { **original_header, "catalogue": catalog_name, "collection": collection_name, "attribute": attr_name, "split_from": original_header.get('jobid'), }, "workflow": { "workflow_name": "relate", } } del split_msg['header']['jobid'] del split_msg['header']['stepid'] connection.publish(WORKFLOW_EXCHANGE, WORKFLOW_REQUEST_KEY, split_msg)