def __init__(self, **kwargs): super(Node, self).__init__() self._temp_folder = None dbnode = kwargs.pop('dbnode', None) # Set the internal parameters # Can be redefined in the subclasses self._init_internal_params() if dbnode is not None: if not isinstance(dbnode, DbNode): raise TypeError("dbnode is not a DbNode instance") if dbnode.id is None: raise ValueError("If cannot load an aiida.orm.Node instance " "from an unsaved DbNode object.") if kwargs: raise ValueError("If you pass a dbnode, you cannot pass any " "further parameter") # If I am loading, I cannot modify it self._to_be_stored = False self._dbnode = dbnode # If this is changed, fix also the importer self._repo_folder = RepositoryFolder(section=self._section_name, uuid=self._dbnode.uuid) else: # TODO: allow to get the user from the parameters user = get_automatic_user() self._dbnode = DbNode(user=user, uuid=get_new_uuid(), type=self._plugin_type_string) self._to_be_stored = True # As creating the temp folder may require some time on slow # filesystems, we defer its creation self._temp_folder = None # Used only before the first save self._attrs_cache = {} # If this is changed, fix also the importer self._repo_folder = RepositoryFolder(section=self._section_name, uuid=self.uuid) # Automatically set all *other* attributes, if possible, otherwise # stop self._set_with_defaults(**kwargs)
def test_links_to_unknown_nodes(self, temp_dir): """Test importing of nodes, that have links to unknown nodes.""" node_label = 'Test structure data' struct = orm.StructureData() struct.label = str(node_label) struct.store() struct_uuid = struct.uuid filename = os.path.join(temp_dir, 'export.aiida') export([struct], filename=filename, file_format='tar.gz') unpack = SandboxFolder() with tarfile.open(filename, 'r:gz', format=tarfile.PAX_FORMAT) as tar: tar.extractall(unpack.abspath) with open(unpack.get_abs_path('data.json'), 'r', encoding='utf8') as fhandle: data = json.load(fhandle) data['links_uuid'].append({ 'output': struct.uuid, # note: this uuid is supposed to not be in the DB: 'input': get_new_uuid(), 'label': 'parent', 'type': LinkType.CREATE.value }) with open(unpack.get_abs_path('data.json'), 'wb') as fhandle: json.dump(data, fhandle) with tarfile.open(filename, 'w:gz', format=tarfile.PAX_FORMAT) as tar: tar.add(unpack.abspath, arcname='') self.clean_db() with self.assertRaises(DanglingLinkError): import_data(filename) import_data(filename, ignore_unknown_nodes=True) self.assertEqual(orm.load_node(struct_uuid).label, node_label)
def test_User_node_4(self): """ Test that when several nodes are created with the same user and each of them is assigned to the same name, storage of last node object associated to that node does not trigger storage of all objects. Assert the correct storage of the user and node. Assert the non-storage of the other nodes """ # Create user dbu1 = DbUser('tests4@schema', 'spam', 'eggs', 'monty') # Creat node objects assigningd them to the same name # Check https://docs.python.org/2/tutorial/classes.html subsec. 9.1 for _ in range(5): # It is important to change the uuid each time (or any other # variable) so that a different objects (with a different pointer) # is actually created in this scope. dbn1 = DbNode(user=dbu1, uuid=get_new_uuid()) # Check that the two are neither flushed nor committed self.assertIsNone(dbu1.id) self.assertIsNone(dbn1.id) session = aiida.backends.sqlalchemy.get_scoped_session() # Add only first node and commit session.add(dbn1) session.commit() # Check for which object a pk has been assigned, which means that # things have been at least flushed into the database self.assertIsNotNone(dbu1.id) self.assertIsNotNone(dbn1.id)
def __init__(self, **kwargs): from aiida.backends.djsite.db.models import DbNode super(Node, self).__init__() self._temp_folder = None dbnode = kwargs.pop('dbnode', None) # Set the internal parameters # Can be redefined in the subclasses self._init_internal_params() if dbnode is not None: if not isinstance(dbnode, DbNode): raise TypeError("dbnode is not a DbNode instance") if dbnode.pk is None: raise ValueError("If cannot load an aiida.orm.Node instance " "from an unsaved Django DbNode object.") if kwargs: raise ValueError("If you pass a dbnode, you cannot pass any " "further parameter") # If I am loading, I cannot modify it self._to_be_stored = False self._dbnode = dbnode # If this is changed, fix also the importer self._repo_folder = RepositoryFolder(section=self._section_name, uuid=self._dbnode.uuid) # NO VALIDATION ON __init__ BY DEFAULT, IT IS TOO SLOW SINCE IT OFTEN # REQUIRES MULTIPLE DB HITS # try: # # Note: the validation often requires to load at least one # # attribute, and therefore it will take a lot of time # # because it has to cache every attribute. # self._validate() # except ValidationError as e: # raise DbContentError("The data in the DB with UUID={} is not " # "valid for class {}: {}".format( # uuid, self.__class__.__name__, e.message)) else: # TODO: allow to get the user from the parameters user = get_automatic_user() self._dbnode = DbNode(user=user, uuid=get_new_uuid(), type=self._plugin_type_string) self._to_be_stored = True # As creating the temp folder may require some time on slow # filesystems, we defer its creation self._temp_folder = None # Used only before the first save self._attrs_cache = {} # If this is changed, fix also the importer self._repo_folder = RepositoryFolder(section=self._section_name, uuid=self.uuid) # Automatically set all *other* attributes, if possible, otherwise # stop self._set_with_defaults(**kwargs)
def setUpBeforeMigration(self): from aiida.common.utils import get_new_uuid self.file_name = 'test.temp' self.file_content = '#!/bin/bash\n\necho test run\n' self.nodes_boolean = [] self.nodes_integer = [] self.n_bool_duplicates = 2 self.n_int_duplicates = 4 node_bool = self.DbNode(type='data.bool.Bool.', user_id=self.default_user.id, uuid=get_new_uuid()) node_bool.save() node_int = self.DbNode(type='data.int.Int.', user_id=self.default_user.id, uuid=get_new_uuid()) node_int.save() self.nodes_boolean.append(node_bool) self.nodes_integer.append(node_int) for _ in range(self.n_bool_duplicates): node = self.DbNode(type='data.bool.Bool.', user_id=self.default_user.id, uuid=node_bool.uuid) node.save() utils.put_object_from_string(node.uuid, self.file_name, self.file_content) self.nodes_boolean.append(node) for _ in range(self.n_int_duplicates): node = self.DbNode(type='data.int.Int.', user_id=self.default_user.id, uuid=node_int.uuid) node.save() utils.put_object_from_string(node.uuid, self.file_name, self.file_content) self.nodes_integer.append(node) # Verify that there are duplicate UUIDs by checking that the following function raises with self.assertRaises(IntegrityError): verify_uuid_uniqueness(table='db_dbnode') # Now run the function responsible for solving duplicate UUIDs which would also be called by the user # through the `verdi database integrity detect-duplicate-uuid` command deduplicate_uuids(table='db_dbnode', dry_run=False)
def deduplicate_uuids(table=None, dry_run=True): """Detect and solve entities with duplicate UUIDs in a given database table. Before aiida-core v1.0.0, there was no uniqueness constraint on the UUID column of the node table in the database and a few other tables as well. This made it possible to store multiple entities with identical UUIDs in the same table without the database complaining. This bug was fixed in aiida-core=1.0.0 by putting an explicit uniqueness constraint on UUIDs on the database level. However, this would leave databases created before this patch with duplicate UUIDs in an inconsistent state. This command will run an analysis to detect duplicate UUIDs in a given table and solve it by generating new UUIDs. Note that it will not delete or merge any rows. :param dry_run: when True, no actual changes will be made :return: list of strings denoting the performed operations, or those that would have been applied for dry_run=False :raises ValueError: if the specified table is invalid """ from collections import defaultdict from aiida.common.utils import get_new_uuid from aiida.orm.utils.repository import Repository if table not in TABLES_UUID_DEDUPLICATION: raise ValueError('invalid table {}: choose from {}'.format( table, ', '.join(TABLES_UUID_DEDUPLICATION))) mapping = defaultdict(list) for pk, uuid in get_duplicate_uuids(table=table): mapping[uuid].append(int(pk)) messages = [] mapping_new_uuid = {} for uuid, rows in mapping.items(): uuid_ref = None for pk in rows: # We don't have to change all rows that have the same UUID, the first one can keep the original if uuid_ref is None: uuid_ref = uuid continue uuid_new = str(get_new_uuid()) mapping_new_uuid[pk] = uuid_new if dry_run: messages.append( 'would update UUID of {} row<{}> from {} to {}'.format( table, pk, uuid_ref, uuid_new)) else: messages.append( 'updated UUID of {} row<{}> from {} to {}'.format( table, pk, uuid_ref, uuid_new)) repo_ref = Repository(uuid_ref, True, 'path') repo_new = Repository(uuid_new, False, 'path') repo_new.put_object_from_tree( repo_ref._get_base_folder().abspath) # pylint: disable=protected-access repo_new.store() if not dry_run: apply_new_uuid_mapping(table, mapping_new_uuid) if not messages: messages = ['no duplicate UUIDs found'] return messages