class DatapackageToSpineConverter(QRunnable):
    def __init__(self, db_url, datapackage_descriptor, datapackage_base_path):
        super().__init__()
        self.db_url = db_url
        create_new_spine_database(self.db_url)
        self.db_map = DiffDatabaseMapping(db_url, getpass.getuser())
        self.datapackage = Package(datapackage_descriptor, datapackage_base_path)
        self.signaler = Signaler()
        self.resource_data = dict()
        self.object_class_count = None
        self.object_count = None
        self.relationship_class_count = None
        self.relationship_count = None
        self.parameter_count = None
        self.parameter_value_count = None
        for resource in self.datapackage.resources:
            self.resource_data[resource.name] = resource.read(cast=False)

    def number_of_steps(self):
        self.object_class_count = len(self.datapackage.resources)
        self.object_count = sum(len(self.resource_data[x.name]) for x in self.datapackage.resources)
        self.relationship_class_count = len([x for x in self.datapackage.resources if x.schema.foreign_keys])
        self.relationship_count = sum(
            len(self.resource_data[x.name]) for x in self.datapackage.resources if x.schema.foreign_keys
        )
        self.parameter_count = sum(
            len(x.schema.fields)
            - len(x.schema.primary_key)
            - len([i for fk in x.schema.foreign_keys for i in fk["fields"]])
            for x in self.datapackage.resources
        )
        self.parameter_value_count = sum(
            len(self.resource_data[x.name])
            * (
                len(x.schema.fields)
                - len(x.schema.primary_key)
                - len([i for fk in x.schema.foreign_keys for i in fk["fields"]])
            )
            for x in self.datapackage.resources
        )
        return (
            self.object_class_count
            + self.object_count
            + self.relationship_class_count
            + self.relationship_count
            + self.parameter_count
            + self.parameter_value_count
        )

    def run(self):
        try:
            self._run()
            self.signaler.finished.emit()
        except SpineDBAPIError as e:
            self.signaler.failed.emit(e.msg)

    def _run(self):
        step = 0
        self.signaler.progressed.emit(step, "")
        object_class_names = [x.name for x in self.db_map.object_class_list()]
        parameter_names = [x.name for x in self.db_map.parameter_definition_list()]
        object_class_name_lists = [
            x.object_class_name_list.split(",") for x in self.db_map.wide_relationship_class_list()
        ]
        object_classes = list()
        pre_relationship_classes = list()
        pre_parameters = list()
        for resource in self.datapackage.resources:
            if resource.name not in object_class_names:
                object_classes.append(dict(name=resource.name))
                object_class_names.append(resource.name)
            primary_key = resource.schema.primary_key
            foreign_keys = resource.schema.foreign_keys
            reference_resource_names = [fk["reference"]["resource"] for fk in foreign_keys]
            for reference_resource_name in reference_resource_names:
                if reference_resource_name not in object_class_names:
                    object_classes.append(dict(name=reference_resource_name))
                    object_class_names.append(reference_resource_name)
            if reference_resource_names:
                object_class_name_list = [resource.name] + reference_resource_names
                relationship_class_name = "__".join(object_class_name_list)
                pre_relationship_classes.append(
                    dict(object_class_name_list=object_class_name_list, name=relationship_class_name)
                )
                object_class_name_lists.append(object_class_name_list)
            for field in resource.schema.fields:
                # Skip fields in primary key
                if field.name in primary_key:
                    continue
                # Skip fields in any foreign key
                if field in [x for fk in foreign_keys for x in fk["fields"]]:
                    continue
                parameter_name = resource.name + "_" + field.name
                if parameter_name not in parameter_names:
                    pre_parameters.append(dict(object_class_name=resource.name, name=parameter_name))
                    parameter_names.append(parameter_name)
        self.signaler.progressed.emit(step, "Adding object classes...")
        self.db_map.add_object_classes(*object_classes)
        step += self.object_class_count
        object_class_name_id = {x.name: x.id for x in self.db_map.object_class_list()}
        relationship_classes = [
            dict(object_class_id_list=[object_class_name_id[n] for n in r['object_class_name_list']], name=r['name'])
            for r in pre_relationship_classes
        ]
        self.signaler.progressed.emit(step, "Adding relationship classes...")
        self.db_map.add_wide_relationship_classes(*relationship_classes)
        step += self.relationship_class_count
        parameters = [
            dict(object_class_id=object_class_name_id[p['object_class_name']], name=p['name']) for p in pre_parameters
        ]
        self.signaler.progressed.emit(step, "Adding parameters...")
        self.db_map.add_parameter_definitions(*parameters)
        step += self.parameter_count
        relationship_class_name_id = {x.name: x.id for x in self.db_map.wide_relationship_class_list()}
        parameter_name_id = {x.name: x.id for x in self.db_map.parameter_definition_list()}
        object_names = [x.name for x in self.db_map.object_list()]
        # Create list of object and preliminary parameter value dicts.
        objects = list()
        pre_parameter_values = list()
        for resource in self.datapackage.resources:
            object_class_id = object_class_name_id[resource.name]
            primary_key = resource.schema.primary_key
            foreign_keys = resource.schema.foreign_keys
            foreign_keys_fields = [x for fk in foreign_keys for x in fk["fields"]]
            for i, row in enumerate(self.resource_data[resource.name]):
                row_dict = dict(zip(resource.schema.field_names, row))
                if primary_key:
                    object_name_suffix = "_".join(row_dict[field] for field in primary_key)
                else:
                    object_name_suffix = str(i)
                object_name = resource.name + "_" + object_name_suffix
                if not object_name in object_names:
                    objects.append(dict(class_id=object_class_id, name=object_name))
                    object_names.append(object_name)
                for field_name, value in row_dict.items():
                    if field_name in primary_key:
                        continue
                    if field_name in foreign_keys_fields:
                        continue
                    parameter_name = resource.name + "_" + field_name
                    parameter_id = parameter_name_id[parameter_name]
                    pre_parameter_values.append(dict(object_name=object_name, parameter_id=parameter_id, value=value))
        self.signaler.progressed.emit(step, "Adding objects...")
        self.db_map.add_objects(*objects)
        step += self.object_count
        object_name_id = {x.name: x.id for x in self.db_map.object_list()}
        parameter_values = [
            dict(object_id=object_name_id[p['object_name']], parameter_id=p['parameter_id'], value=p['value'])
            for p in pre_parameter_values
        ]
        self.signaler.progressed.emit(step, "Adding parameter values...")
        self.db_map.add_parameter_values(*parameter_values)
        step += self.parameter_value_count
        # Create dictionary of reference resource names => list of reference fields names
        reference_resource_dict = dict()
        for resource in self.datapackage.resources:
            foreign_keys = resource.schema.foreign_keys
            for foreign_key in foreign_keys:
                reference_resource_name = foreign_key["reference"]["resource"]
                reference_fields_names = foreign_key["reference"]["fields"]
                reference_resource_dict.setdefault(reference_resource_name, list()).append(reference_fields_names)
        # Create dictionary of reference resource name => reference fields names
        # => reference key => object id
        reference_object_id_dict = dict()
        for reference_resource_name, reference_fields_names_list in reference_resource_dict.items():
            reference_resource = self.datapackage.get_resource(reference_resource_name)
            reference_primary_key = reference_resource.schema.primary_key
            reference_object_id_dict[reference_resource_name] = d1 = dict()
            for reference_fields_names in reference_fields_names_list:
                d1[",".join(reference_fields_names)] = d2 = dict()
                for i, row in enumerate(self.resource_data[reference_resource_name]):
                    row_dict = dict(zip(reference_resource.schema.field_names, row))
                    # Find object id
                    if reference_primary_key:
                        reference_object_name_suffix = "_".join(row_dict[field] for field in reference_primary_key)
                    else:
                        reference_object_name_suffix = str(i)
                    reference_object_name = reference_resource_name + reference_object_name_suffix
                    reference_object_id = object_name_id[reference_object_name]
                    key = ",".join([row_dict[x] for x in reference_fields_names])
                    d2[key] = (reference_object_id, reference_object_name)
        # Create list of relationships
        relationships = list()
        for resource in self.datapackage.resources:
            primary_key = resource.schema.primary_key
            foreign_keys = resource.schema.foreign_keys
            reference_resource_names = [fk['reference']['resource'] for fk in foreign_keys]
            if not reference_resource_names:
                continue
            object_class_name_list = [resource.name] + reference_resource_names
            relationship_class_name = "__".join(object_class_name_list)
            relationship_class_id = relationship_class_name_id[relationship_class_name]
            for i, row in enumerate(self.resource_data[resource.name]):
                row_dict = dict(zip(resource.schema.field_names, row))
                if primary_key:
                    object_name_suffix = "_".join(row_dict[field] for field in primary_key)
                else:
                    object_name_suffix = str(i)
                object_name = resource.name + object_name_suffix
                object_id = object_name_id[object_name]
                object_id_list = [object_id]
                object_name_list = [object_name]
                for fk in foreign_keys:
                    fields_names = fk['fields']
                    reference_resource_name = fk['reference']['resource']
                    reference_fields_names = fk['reference']['fields']
                    key = ",".join([row_dict[x] for x in fields_names])
                    d1 = reference_object_id_dict[reference_resource_name]
                    d2 = d1[",".join(reference_fields_names)]
                    try:
                        reference_object_id, reference_object_name = d2[key]
                    except KeyError:
                        break
                    object_id_list.append(reference_object_id)
                    object_name_list.append(reference_object_name)
                else:
                    relationship_name = relationship_class_name + "_" + "__".join(object_name_list)
                    relationships.append(
                        dict(class_id=relationship_class_id, object_id_list=object_id_list, name=relationship_name)
                    )
        self.signaler.progressed.emit(step, "Adding relationships...")
        self.db_map.add_wide_relationships(*relationships)
        step += self.relationship_count
        self.db_map.commit_session("Automatically generated by Spine Toolbox.")
        self.signaler.progressed.emit(step, "")
Beispiel #2
0
    def _create_database(directory):
        """Creates a database with objects, relationship, parameters and values."""
        url = TestExcelIntegration._sqlite_url(_TEMP_SQLITE_FILENAME,
                                               directory)
        create_new_spine_database(url)
        db_map = DiffDatabaseMapping(url,
                                     username='******',
                                     upgrade=True)

        # create empty database for loading excel into
        url = TestExcelIntegration._sqlite_url(_TEMP_SQLITE_TEST_FILENAME,
                                               directory)
        create_new_spine_database(url)
        db_map_test = DiffDatabaseMapping(url,
                                          username='******',
                                          upgrade=True)

        # delete all object_classes to empty database
        oc = set(oc.id for oc in db_map_test.object_class_list().all())
        if oc:
            db_map_test.remove_items(object_class_ids=oc)
            db_map_test.commit_session('empty database')

        oc = set(oc.id for oc in db_map.object_class_list().all())
        if oc:
            db_map.remove_items(object_class_ids=oc)
            db_map.commit_session('empty database')

        # create object classes
        oc_1 = db_map.add_object_class(**{'name': 'object_class_1'})
        oc_2 = db_map.add_object_class(**{'name': 'object_class_2'})
        oc_3 = db_map.add_object_class(**{'name': 'object_class_3'})

        # create relationship classes
        relc1 = db_map.add_wide_relationship_class(
            **{
                'name': 'relationship_class',
                'object_class_id_list': [oc_1.id, oc_2.id]
            })
        relc2 = db_map.add_wide_relationship_class(
            **{
                'name': 'relationship_class2',
                'object_class_id_list': [oc_1.id, oc_2.id]
            })

        # create objects
        oc1_obj1 = db_map.add_object(**{
            'name': 'oc1_obj1',
            'class_id': oc_1.id
        })
        oc1_obj2 = db_map.add_object(**{
            'name': 'oc1_obj2',
            'class_id': oc_1.id
        })
        oc2_obj1 = db_map.add_object(**{
            'name': 'oc2_obj1',
            'class_id': oc_2.id
        })
        oc2_obj2 = db_map.add_object(**{
            'name': 'oc2_obj2',
            'class_id': oc_2.id
        })
        oc3_obj1 = db_map.add_object(**{
            'name': 'oc3_obj1',
            'class_id': oc_3.id
        })

        # add relationships
        rel1 = db_map.add_wide_relationship(
            **{
                'name': 'rel1',
                'class_id': relc1.id,
                'object_id_list': [oc1_obj1.id, oc2_obj1.id]
            })
        rel2 = db_map.add_wide_relationship(
            **{
                'name': 'rel2',
                'class_id': relc1.id,
                'object_id_list': [oc1_obj2.id, oc2_obj2.id]
            })

        # create parameters
        p1 = db_map.add_parameter_definitions(*[{
            'name': 'parameter1',
            'object_class_id': oc_1.id
        }])[0].first()
        p2 = db_map.add_parameter_definitions(*[{
            'name': 'parameter2',
            'object_class_id': oc_1.id
        }])[0].first()
        p3 = db_map.add_parameter_definitions(*[{
            'name': 'parameter3',
            'object_class_id': oc_2.id
        }])[0].first()
        p4 = db_map.add_parameter_definitions(*[{
            'name': 'parameter4',
            'object_class_id': oc_2.id
        }])[0].first()
        p5 = db_map.add_parameter_definitions(*[{
            'name': 'parameter5',
            'object_class_id': oc_3.id
        }])[0].first()
        p6 = db_map.add_parameter_definitions(*[{
            'name': 'parameter6',
            'object_class_id': oc_3.id
        }])[0].first()
        rel_p1 = db_map.add_parameter_definitions(
            *[{
                'name': 'rel_parameter1',
                'relationship_class_id': relc1.id
            }])[0].first()
        rel_p2 = db_map.add_parameter_definitions(
            *[{
                'name': 'rel_parameter2',
                'relationship_class_id': relc1.id
            }])[0].first()
        rel_p3 = db_map.add_parameter_definitions(
            *[{
                'name': 'rel_parameter3',
                'relationship_class_id': relc1.id
            }])[0].first()
        rel_p4 = db_map.add_parameter_definitions(
            *[{
                'name': 'rel_parameter4',
                'relationship_class_id': relc1.id
            }])[0].first()

        # add parameter values
        db_map.add_parameter_value(
            **{
                'parameter_definition_id': p1.id,
                'object_id': oc1_obj1.id,
                'object_class_id': oc_1.id,
                'value': '0'
            })
        db_map.add_parameter_value(
            **{
                'parameter_definition_id': p2.id,
                'object_id': oc1_obj2.id,
                'object_class_id': oc_1.id,
                'value': '3.5'
            })
        db_map.add_parameter_value(
            **{
                'parameter_definition_id': p3.id,
                'object_id': oc2_obj1.id,
                'object_class_id': oc_2.id,
                'value': '[1, 2, 3, 4]',
            })
        db_map.add_parameter_value(
            **{
                'parameter_definition_id': p4.id,
                'object_id': oc2_obj2.id,
                'object_class_id': oc_2.id,
                'value': '[5, 6, 7]',
            })
        db_map.add_parameter_value(
            **{
                'parameter_definition_id': rel_p1.id,
                'relationship_id': rel1.id,
                'relationship_class_id': relc1.id,
                'value': '0',
            })
        db_map.add_parameter_value(
            **{
                'parameter_definition_id': rel_p2.id,
                'relationship_id': rel2.id,
                'relationship_class_id': relc1.id,
                'value': '4',
            })
        db_map.add_parameter_value(
            **{
                'parameter_definition_id': rel_p3.id,
                'relationship_id': rel1.id,
                'relationship_class_id': relc1.id,
                'value': '[5, 6, 7]',
            })
        db_map.add_parameter_value(
            **{
                'parameter_definition_id': rel_p4.id,
                'relationship_id': rel2.id,
                'relationship_class_id': relc1.id,
                'value': '[1, 2, 3, 4]',
            })

        time = [
            np.datetime64('2005-02-25T00:00'),
            np.datetime64('2005-02-25T01:00'),
            np.datetime64('2005-02-25T02:00')
        ]
        value = [1, 2, 3]
        ts_val = to_database(
            TimeSeriesVariableResolution(time, value, False, False))
        db_map.add_parameter_value(
            **{
                'parameter_definition_id': p5.id,
                'object_id': oc3_obj1.id,
                'object_class_id': oc_3.id,
                'value': ts_val
            })

        timepattern = ['m1', 'm2', 'm3']
        value = [1.1, 2.2, 3.3]
        ts_val = to_database(TimePattern(timepattern, value))
        db_map.add_parameter_value(
            **{
                'parameter_definition_id': p6.id,
                'object_id': oc3_obj1.id,
                'object_class_id': oc_3.id,
                'value': ts_val
            })

        # commit
        db_map.commit_session('test')

        return db_map, db_map_test