class DatapackageToSpineConverter(QRunnable): def __init__(self, db_url, datapackage_descriptor, datapackage_base_path): super().__init__() self.db_url = db_url create_new_spine_database(self.db_url) self.db_map = DiffDatabaseMapping(db_url, getpass.getuser()) self.datapackage = Package(datapackage_descriptor, datapackage_base_path) self.signaler = Signaler() self.resource_data = dict() self.object_class_count = None self.object_count = None self.relationship_class_count = None self.relationship_count = None self.parameter_count = None self.parameter_value_count = None for resource in self.datapackage.resources: self.resource_data[resource.name] = resource.read(cast=False) def number_of_steps(self): self.object_class_count = len(self.datapackage.resources) self.object_count = sum(len(self.resource_data[x.name]) for x in self.datapackage.resources) self.relationship_class_count = len([x for x in self.datapackage.resources if x.schema.foreign_keys]) self.relationship_count = sum( len(self.resource_data[x.name]) for x in self.datapackage.resources if x.schema.foreign_keys ) self.parameter_count = sum( len(x.schema.fields) - len(x.schema.primary_key) - len([i for fk in x.schema.foreign_keys for i in fk["fields"]]) for x in self.datapackage.resources ) self.parameter_value_count = sum( len(self.resource_data[x.name]) * ( len(x.schema.fields) - len(x.schema.primary_key) - len([i for fk in x.schema.foreign_keys for i in fk["fields"]]) ) for x in self.datapackage.resources ) return ( self.object_class_count + self.object_count + self.relationship_class_count + self.relationship_count + self.parameter_count + self.parameter_value_count ) def run(self): try: self._run() self.signaler.finished.emit() except SpineDBAPIError as e: self.signaler.failed.emit(e.msg) def _run(self): step = 0 self.signaler.progressed.emit(step, "") object_class_names = [x.name for x in self.db_map.object_class_list()] parameter_names = [x.name for x in self.db_map.parameter_definition_list()] object_class_name_lists = [ x.object_class_name_list.split(",") for x in self.db_map.wide_relationship_class_list() ] object_classes = list() pre_relationship_classes = list() pre_parameters = list() for resource in self.datapackage.resources: if resource.name not in object_class_names: object_classes.append(dict(name=resource.name)) object_class_names.append(resource.name) primary_key = resource.schema.primary_key foreign_keys = resource.schema.foreign_keys reference_resource_names = [fk["reference"]["resource"] for fk in foreign_keys] for reference_resource_name in reference_resource_names: if reference_resource_name not in object_class_names: object_classes.append(dict(name=reference_resource_name)) object_class_names.append(reference_resource_name) if reference_resource_names: object_class_name_list = [resource.name] + reference_resource_names relationship_class_name = "__".join(object_class_name_list) pre_relationship_classes.append( dict(object_class_name_list=object_class_name_list, name=relationship_class_name) ) object_class_name_lists.append(object_class_name_list) for field in resource.schema.fields: # Skip fields in primary key if field.name in primary_key: continue # Skip fields in any foreign key if field in [x for fk in foreign_keys for x in fk["fields"]]: continue parameter_name = resource.name + "_" + field.name if parameter_name not in parameter_names: pre_parameters.append(dict(object_class_name=resource.name, name=parameter_name)) parameter_names.append(parameter_name) self.signaler.progressed.emit(step, "Adding object classes...") self.db_map.add_object_classes(*object_classes) step += self.object_class_count object_class_name_id = {x.name: x.id for x in self.db_map.object_class_list()} relationship_classes = [ dict(object_class_id_list=[object_class_name_id[n] for n in r['object_class_name_list']], name=r['name']) for r in pre_relationship_classes ] self.signaler.progressed.emit(step, "Adding relationship classes...") self.db_map.add_wide_relationship_classes(*relationship_classes) step += self.relationship_class_count parameters = [ dict(object_class_id=object_class_name_id[p['object_class_name']], name=p['name']) for p in pre_parameters ] self.signaler.progressed.emit(step, "Adding parameters...") self.db_map.add_parameter_definitions(*parameters) step += self.parameter_count relationship_class_name_id = {x.name: x.id for x in self.db_map.wide_relationship_class_list()} parameter_name_id = {x.name: x.id for x in self.db_map.parameter_definition_list()} object_names = [x.name for x in self.db_map.object_list()] # Create list of object and preliminary parameter value dicts. objects = list() pre_parameter_values = list() for resource in self.datapackage.resources: object_class_id = object_class_name_id[resource.name] primary_key = resource.schema.primary_key foreign_keys = resource.schema.foreign_keys foreign_keys_fields = [x for fk in foreign_keys for x in fk["fields"]] for i, row in enumerate(self.resource_data[resource.name]): row_dict = dict(zip(resource.schema.field_names, row)) if primary_key: object_name_suffix = "_".join(row_dict[field] for field in primary_key) else: object_name_suffix = str(i) object_name = resource.name + "_" + object_name_suffix if not object_name in object_names: objects.append(dict(class_id=object_class_id, name=object_name)) object_names.append(object_name) for field_name, value in row_dict.items(): if field_name in primary_key: continue if field_name in foreign_keys_fields: continue parameter_name = resource.name + "_" + field_name parameter_id = parameter_name_id[parameter_name] pre_parameter_values.append(dict(object_name=object_name, parameter_id=parameter_id, value=value)) self.signaler.progressed.emit(step, "Adding objects...") self.db_map.add_objects(*objects) step += self.object_count object_name_id = {x.name: x.id for x in self.db_map.object_list()} parameter_values = [ dict(object_id=object_name_id[p['object_name']], parameter_id=p['parameter_id'], value=p['value']) for p in pre_parameter_values ] self.signaler.progressed.emit(step, "Adding parameter values...") self.db_map.add_parameter_values(*parameter_values) step += self.parameter_value_count # Create dictionary of reference resource names => list of reference fields names reference_resource_dict = dict() for resource in self.datapackage.resources: foreign_keys = resource.schema.foreign_keys for foreign_key in foreign_keys: reference_resource_name = foreign_key["reference"]["resource"] reference_fields_names = foreign_key["reference"]["fields"] reference_resource_dict.setdefault(reference_resource_name, list()).append(reference_fields_names) # Create dictionary of reference resource name => reference fields names # => reference key => object id reference_object_id_dict = dict() for reference_resource_name, reference_fields_names_list in reference_resource_dict.items(): reference_resource = self.datapackage.get_resource(reference_resource_name) reference_primary_key = reference_resource.schema.primary_key reference_object_id_dict[reference_resource_name] = d1 = dict() for reference_fields_names in reference_fields_names_list: d1[",".join(reference_fields_names)] = d2 = dict() for i, row in enumerate(self.resource_data[reference_resource_name]): row_dict = dict(zip(reference_resource.schema.field_names, row)) # Find object id if reference_primary_key: reference_object_name_suffix = "_".join(row_dict[field] for field in reference_primary_key) else: reference_object_name_suffix = str(i) reference_object_name = reference_resource_name + reference_object_name_suffix reference_object_id = object_name_id[reference_object_name] key = ",".join([row_dict[x] for x in reference_fields_names]) d2[key] = (reference_object_id, reference_object_name) # Create list of relationships relationships = list() for resource in self.datapackage.resources: primary_key = resource.schema.primary_key foreign_keys = resource.schema.foreign_keys reference_resource_names = [fk['reference']['resource'] for fk in foreign_keys] if not reference_resource_names: continue object_class_name_list = [resource.name] + reference_resource_names relationship_class_name = "__".join(object_class_name_list) relationship_class_id = relationship_class_name_id[relationship_class_name] for i, row in enumerate(self.resource_data[resource.name]): row_dict = dict(zip(resource.schema.field_names, row)) if primary_key: object_name_suffix = "_".join(row_dict[field] for field in primary_key) else: object_name_suffix = str(i) object_name = resource.name + object_name_suffix object_id = object_name_id[object_name] object_id_list = [object_id] object_name_list = [object_name] for fk in foreign_keys: fields_names = fk['fields'] reference_resource_name = fk['reference']['resource'] reference_fields_names = fk['reference']['fields'] key = ",".join([row_dict[x] for x in fields_names]) d1 = reference_object_id_dict[reference_resource_name] d2 = d1[",".join(reference_fields_names)] try: reference_object_id, reference_object_name = d2[key] except KeyError: break object_id_list.append(reference_object_id) object_name_list.append(reference_object_name) else: relationship_name = relationship_class_name + "_" + "__".join(object_name_list) relationships.append( dict(class_id=relationship_class_id, object_id_list=object_id_list, name=relationship_name) ) self.signaler.progressed.emit(step, "Adding relationships...") self.db_map.add_wide_relationships(*relationships) step += self.relationship_count self.db_map.commit_session("Automatically generated by Spine Toolbox.") self.signaler.progressed.emit(step, "")
def _create_database(directory): """Creates a database with objects, relationship, parameters and values.""" url = TestExcelIntegration._sqlite_url(_TEMP_SQLITE_FILENAME, directory) create_new_spine_database(url) db_map = DiffDatabaseMapping(url, username='******', upgrade=True) # create empty database for loading excel into url = TestExcelIntegration._sqlite_url(_TEMP_SQLITE_TEST_FILENAME, directory) create_new_spine_database(url) db_map_test = DiffDatabaseMapping(url, username='******', upgrade=True) # delete all object_classes to empty database oc = set(oc.id for oc in db_map_test.object_class_list().all()) if oc: db_map_test.remove_items(object_class_ids=oc) db_map_test.commit_session('empty database') oc = set(oc.id for oc in db_map.object_class_list().all()) if oc: db_map.remove_items(object_class_ids=oc) db_map.commit_session('empty database') # create object classes oc_1 = db_map.add_object_class(**{'name': 'object_class_1'}) oc_2 = db_map.add_object_class(**{'name': 'object_class_2'}) oc_3 = db_map.add_object_class(**{'name': 'object_class_3'}) # create relationship classes relc1 = db_map.add_wide_relationship_class( **{ 'name': 'relationship_class', 'object_class_id_list': [oc_1.id, oc_2.id] }) relc2 = db_map.add_wide_relationship_class( **{ 'name': 'relationship_class2', 'object_class_id_list': [oc_1.id, oc_2.id] }) # create objects oc1_obj1 = db_map.add_object(**{ 'name': 'oc1_obj1', 'class_id': oc_1.id }) oc1_obj2 = db_map.add_object(**{ 'name': 'oc1_obj2', 'class_id': oc_1.id }) oc2_obj1 = db_map.add_object(**{ 'name': 'oc2_obj1', 'class_id': oc_2.id }) oc2_obj2 = db_map.add_object(**{ 'name': 'oc2_obj2', 'class_id': oc_2.id }) oc3_obj1 = db_map.add_object(**{ 'name': 'oc3_obj1', 'class_id': oc_3.id }) # add relationships rel1 = db_map.add_wide_relationship( **{ 'name': 'rel1', 'class_id': relc1.id, 'object_id_list': [oc1_obj1.id, oc2_obj1.id] }) rel2 = db_map.add_wide_relationship( **{ 'name': 'rel2', 'class_id': relc1.id, 'object_id_list': [oc1_obj2.id, oc2_obj2.id] }) # create parameters p1 = db_map.add_parameter_definitions(*[{ 'name': 'parameter1', 'object_class_id': oc_1.id }])[0].first() p2 = db_map.add_parameter_definitions(*[{ 'name': 'parameter2', 'object_class_id': oc_1.id }])[0].first() p3 = db_map.add_parameter_definitions(*[{ 'name': 'parameter3', 'object_class_id': oc_2.id }])[0].first() p4 = db_map.add_parameter_definitions(*[{ 'name': 'parameter4', 'object_class_id': oc_2.id }])[0].first() p5 = db_map.add_parameter_definitions(*[{ 'name': 'parameter5', 'object_class_id': oc_3.id }])[0].first() p6 = db_map.add_parameter_definitions(*[{ 'name': 'parameter6', 'object_class_id': oc_3.id }])[0].first() rel_p1 = db_map.add_parameter_definitions( *[{ 'name': 'rel_parameter1', 'relationship_class_id': relc1.id }])[0].first() rel_p2 = db_map.add_parameter_definitions( *[{ 'name': 'rel_parameter2', 'relationship_class_id': relc1.id }])[0].first() rel_p3 = db_map.add_parameter_definitions( *[{ 'name': 'rel_parameter3', 'relationship_class_id': relc1.id }])[0].first() rel_p4 = db_map.add_parameter_definitions( *[{ 'name': 'rel_parameter4', 'relationship_class_id': relc1.id }])[0].first() # add parameter values db_map.add_parameter_value( **{ 'parameter_definition_id': p1.id, 'object_id': oc1_obj1.id, 'object_class_id': oc_1.id, 'value': '0' }) db_map.add_parameter_value( **{ 'parameter_definition_id': p2.id, 'object_id': oc1_obj2.id, 'object_class_id': oc_1.id, 'value': '3.5' }) db_map.add_parameter_value( **{ 'parameter_definition_id': p3.id, 'object_id': oc2_obj1.id, 'object_class_id': oc_2.id, 'value': '[1, 2, 3, 4]', }) db_map.add_parameter_value( **{ 'parameter_definition_id': p4.id, 'object_id': oc2_obj2.id, 'object_class_id': oc_2.id, 'value': '[5, 6, 7]', }) db_map.add_parameter_value( **{ 'parameter_definition_id': rel_p1.id, 'relationship_id': rel1.id, 'relationship_class_id': relc1.id, 'value': '0', }) db_map.add_parameter_value( **{ 'parameter_definition_id': rel_p2.id, 'relationship_id': rel2.id, 'relationship_class_id': relc1.id, 'value': '4', }) db_map.add_parameter_value( **{ 'parameter_definition_id': rel_p3.id, 'relationship_id': rel1.id, 'relationship_class_id': relc1.id, 'value': '[5, 6, 7]', }) db_map.add_parameter_value( **{ 'parameter_definition_id': rel_p4.id, 'relationship_id': rel2.id, 'relationship_class_id': relc1.id, 'value': '[1, 2, 3, 4]', }) time = [ np.datetime64('2005-02-25T00:00'), np.datetime64('2005-02-25T01:00'), np.datetime64('2005-02-25T02:00') ] value = [1, 2, 3] ts_val = to_database( TimeSeriesVariableResolution(time, value, False, False)) db_map.add_parameter_value( **{ 'parameter_definition_id': p5.id, 'object_id': oc3_obj1.id, 'object_class_id': oc_3.id, 'value': ts_val }) timepattern = ['m1', 'm2', 'm3'] value = [1.1, 2.2, 3.3] ts_val = to_database(TimePattern(timepattern, value)) db_map.add_parameter_value( **{ 'parameter_definition_id': p6.id, 'object_id': oc3_obj1.id, 'object_class_id': oc_3.id, 'value': ts_val }) # commit db_map.commit_session('test') return db_map, db_map_test