Exemple #1
0
    def __init__(self, **kwargs):
        super(Node, self).__init__()

        self._temp_folder = None

        dbnode = kwargs.pop('dbnode', None)

        # Set the internal parameters
        # Can be redefined in the subclasses
        self._init_internal_params()

        if dbnode is not None:
            if not isinstance(dbnode, DbNode):
                raise TypeError("dbnode is not a DbNode instance")
            if dbnode.id is None:
                raise ValueError("If cannot load an aiida.orm.Node instance "
                                 "from an unsaved DbNode object.")
            if kwargs:
                raise ValueError("If you pass a dbnode, you cannot pass any "
                                 "further parameter")

            # If I am loading, I cannot modify it
            self._to_be_stored = False

            self._dbnode = dbnode

            # If this is changed, fix also the importer
            self._repo_folder = RepositoryFolder(section=self._section_name,
                                                 uuid=self._dbnode.uuid)

        else:
            # TODO: allow to get the user from the parameters
            user = get_automatic_user()

            self._dbnode = DbNode(user=user,
                                  uuid=get_new_uuid(),
                                  type=self._plugin_type_string)

            self._to_be_stored = True

            # As creating the temp folder may require some time on slow
            # filesystems, we defer its creation
            self._temp_folder = None
            # Used only before the first save
            self._attrs_cache = {}
            # If this is changed, fix also the importer
            self._repo_folder = RepositoryFolder(section=self._section_name,
                                                 uuid=self.uuid)

            # Automatically set all *other* attributes, if possible, otherwise
            # stop
            self._set_with_defaults(**kwargs)
Exemple #2
0
    def test_links_to_unknown_nodes(self, temp_dir):
        """Test importing of nodes, that have links to unknown nodes."""
        node_label = 'Test structure data'
        struct = orm.StructureData()
        struct.label = str(node_label)
        struct.store()
        struct_uuid = struct.uuid

        filename = os.path.join(temp_dir, 'export.aiida')
        export([struct], filename=filename, file_format='tar.gz')

        unpack = SandboxFolder()
        with tarfile.open(filename, 'r:gz', format=tarfile.PAX_FORMAT) as tar:
            tar.extractall(unpack.abspath)

        with open(unpack.get_abs_path('data.json'), 'r',
                  encoding='utf8') as fhandle:
            data = json.load(fhandle)
        data['links_uuid'].append({
            'output': struct.uuid,
            # note: this uuid is supposed to not be in the DB:
            'input': get_new_uuid(),
            'label': 'parent',
            'type': LinkType.CREATE.value
        })

        with open(unpack.get_abs_path('data.json'), 'wb') as fhandle:
            json.dump(data, fhandle)

        with tarfile.open(filename, 'w:gz', format=tarfile.PAX_FORMAT) as tar:
            tar.add(unpack.abspath, arcname='')

        self.clean_db()

        with self.assertRaises(DanglingLinkError):
            import_data(filename)

        import_data(filename, ignore_unknown_nodes=True)
        self.assertEqual(orm.load_node(struct_uuid).label, node_label)
Exemple #3
0
    def test_User_node_4(self):
        """
        Test that when several nodes are created with the same user and each
        of them is assigned to the same name, storage of last node object
        associated to that node does not trigger storage of all objects.


        Assert the correct storage of the user and node. Assert the
        non-storage of the other nodes
        """
        # Create user
        dbu1 = DbUser('tests4@schema', 'spam', 'eggs', 'monty')

        # Creat node objects assigningd them to the same name
        # Check https://docs.python.org/2/tutorial/classes.html subsec. 9.1

        for _ in range(5):
            # It is important to change the uuid each time (or any other
            # variable) so that a different objects (with a different pointer)
            # is actually created in this scope.
            dbn1 = DbNode(user=dbu1, uuid=get_new_uuid())

        # Check that the two are neither flushed nor committed
        self.assertIsNone(dbu1.id)
        self.assertIsNone(dbn1.id)

        session = aiida.backends.sqlalchemy.get_scoped_session()

        # Add only first node and commit
        session.add(dbn1)
        session.commit()

        # Check for which object a pk has been assigned, which means that
        # things have been at least flushed into the database
        self.assertIsNotNone(dbu1.id)
        self.assertIsNotNone(dbn1.id)
Exemple #4
0
    def __init__(self, **kwargs):
        from aiida.backends.djsite.db.models import DbNode
        super(Node, self).__init__()

        self._temp_folder = None

        dbnode = kwargs.pop('dbnode', None)

        # Set the internal parameters
        # Can be redefined in the subclasses
        self._init_internal_params()

        if dbnode is not None:
            if not isinstance(dbnode, DbNode):
                raise TypeError("dbnode is not a DbNode instance")
            if dbnode.pk is None:
                raise ValueError("If cannot load an aiida.orm.Node instance "
                                 "from an unsaved Django DbNode object.")
            if kwargs:
                raise ValueError("If you pass a dbnode, you cannot pass any "
                                 "further parameter")

            # If I am loading, I cannot modify it
            self._to_be_stored = False

            self._dbnode = dbnode

            # If this is changed, fix also the importer
            self._repo_folder = RepositoryFolder(section=self._section_name,
                                                 uuid=self._dbnode.uuid)

        # NO VALIDATION ON __init__ BY DEFAULT, IT IS TOO SLOW SINCE IT OFTEN
        # REQUIRES MULTIPLE DB HITS
        # try:
        #                # Note: the validation often requires to load at least one
        #                # attribute, and therefore it will take a lot of time
        #                # because it has to cache every attribute.
        #                self._validate()
        #            except ValidationError as e:
        #                raise DbContentError("The data in the DB with UUID={} is not "
        #                                     "valid for class {}: {}".format(
        #                    uuid, self.__class__.__name__, e.message))
        else:
            # TODO: allow to get the user from the parameters
            user = get_automatic_user()
            self._dbnode = DbNode(user=user,
                                  uuid=get_new_uuid(),
                                  type=self._plugin_type_string)

            self._to_be_stored = True

            # As creating the temp folder may require some time on slow
            # filesystems, we defer its creation
            self._temp_folder = None
            # Used only before the first save
            self._attrs_cache = {}
            # If this is changed, fix also the importer
            self._repo_folder = RepositoryFolder(section=self._section_name,
                                                 uuid=self.uuid)

            # Automatically set all *other* attributes, if possible, otherwise
            # stop
            self._set_with_defaults(**kwargs)
    def setUpBeforeMigration(self):
        from aiida.common.utils import get_new_uuid
        self.file_name = 'test.temp'
        self.file_content = '#!/bin/bash\n\necho test run\n'

        self.nodes_boolean = []
        self.nodes_integer = []
        self.n_bool_duplicates = 2
        self.n_int_duplicates = 4

        node_bool = self.DbNode(type='data.bool.Bool.', user_id=self.default_user.id, uuid=get_new_uuid())
        node_bool.save()

        node_int = self.DbNode(type='data.int.Int.', user_id=self.default_user.id, uuid=get_new_uuid())
        node_int.save()

        self.nodes_boolean.append(node_bool)
        self.nodes_integer.append(node_int)

        for _ in range(self.n_bool_duplicates):
            node = self.DbNode(type='data.bool.Bool.', user_id=self.default_user.id, uuid=node_bool.uuid)
            node.save()
            utils.put_object_from_string(node.uuid, self.file_name, self.file_content)
            self.nodes_boolean.append(node)

        for _ in range(self.n_int_duplicates):
            node = self.DbNode(type='data.int.Int.', user_id=self.default_user.id, uuid=node_int.uuid)
            node.save()
            utils.put_object_from_string(node.uuid, self.file_name, self.file_content)
            self.nodes_integer.append(node)

        # Verify that there are duplicate UUIDs by checking that the following function raises
        with self.assertRaises(IntegrityError):
            verify_uuid_uniqueness(table='db_dbnode')

        # Now run the function responsible for solving duplicate UUIDs which would also be called by the user
        # through the `verdi database integrity detect-duplicate-uuid` command
        deduplicate_uuids(table='db_dbnode', dry_run=False)
def deduplicate_uuids(table=None, dry_run=True):
    """Detect and solve entities with duplicate UUIDs in a given database table.

    Before aiida-core v1.0.0, there was no uniqueness constraint on the UUID column of the node table in the database
    and a few other tables as well. This made it possible to store multiple entities with identical UUIDs in the same
    table without the database complaining. This bug was fixed in aiida-core=1.0.0 by putting an explicit uniqueness
    constraint on UUIDs on the database level. However, this would leave databases created before this patch with
    duplicate UUIDs in an inconsistent state. This command will run an analysis to detect duplicate UUIDs in a given
    table and solve it by generating new UUIDs. Note that it will not delete or merge any rows.

    :param dry_run: when True, no actual changes will be made
    :return: list of strings denoting the performed operations, or those that would have been applied for dry_run=False
    :raises ValueError: if the specified table is invalid
    """
    from collections import defaultdict

    from aiida.common.utils import get_new_uuid
    from aiida.orm.utils.repository import Repository

    if table not in TABLES_UUID_DEDUPLICATION:
        raise ValueError('invalid table {}: choose from {}'.format(
            table, ', '.join(TABLES_UUID_DEDUPLICATION)))

    mapping = defaultdict(list)

    for pk, uuid in get_duplicate_uuids(table=table):
        mapping[uuid].append(int(pk))

    messages = []
    mapping_new_uuid = {}

    for uuid, rows in mapping.items():

        uuid_ref = None

        for pk in rows:

            # We don't have to change all rows that have the same UUID, the first one can keep the original
            if uuid_ref is None:
                uuid_ref = uuid
                continue

            uuid_new = str(get_new_uuid())
            mapping_new_uuid[pk] = uuid_new

            if dry_run:
                messages.append(
                    'would update UUID of {} row<{}> from {} to {}'.format(
                        table, pk, uuid_ref, uuid_new))
            else:
                messages.append(
                    'updated UUID of {} row<{}> from {} to {}'.format(
                        table, pk, uuid_ref, uuid_new))
                repo_ref = Repository(uuid_ref, True, 'path')
                repo_new = Repository(uuid_new, False, 'path')
                repo_new.put_object_from_tree(
                    repo_ref._get_base_folder().abspath)  # pylint: disable=protected-access
                repo_new.store()

    if not dry_run:
        apply_new_uuid_mapping(table, mapping_new_uuid)

    if not messages:
        messages = ['no duplicate UUIDs found']

    return messages