def find_latest_uuid(): IterHarmonicApprox = WorkflowFactory('phonopy.iter_ha') qb = QueryBuilder() qb.append(IterHarmonicApprox) qb.order_by({IterHarmonicApprox: {'ctime': 'desc'}}) qb.first() return qb.first()[0].uuid
def test_configure_from_file(configure): from aiida.orm import QueryBuilder from aiida.orm import User qb = QueryBuilder() qb.append(User) user = qb.first()[0] assert user.first_name == 'AiiDA'
def reuse_kpoints_grid(grid, lowest_pk=False): """ Retrieve previously stored kpoints mesh data node. If there is no such ``KpointsData``, a new node will be created. Will return the one with highest pk :param grid: Grid to be retrieved :param bool lowest_pk: If set to True will return the node with lowest pk :returns: A KpointsData node representing the grid requested """ from aiida.orm import QueryBuilder from aiida.orm import KpointsData qbd = QueryBuilder() qbd.append(KpointsData, tag="kpoints", filters={ "attributes.mesh.0": grid[0], "attributes.mesh.1": grid[1], "attributes.mesh.2": grid[2] }) if lowest_pk: order = "asc" else: order = "desc" qbd.order_by({"kpoints": [{"id": {"order": order}}]}) if qbd.count() >= 1: return qbd.first()[0] kpoints = KpointsData() kpoints.set_kpoints_mesh(grid) return kpoints
def _get_unique_node_property(self, project: str) -> Union[Node, Any]: query = QueryBuilder(limit=1) query.append(self.AIIDA_ENTITY, filters={"id": self._pk}, project=project) if query.count() != 1: raise AiidaEntityNotFound( f"Could not find {self.AIIDA_ENTITY} with PK {self._pk}.") return query.first()[0]
def has_descendants(pk): """Check whether a node has descendants.""" from aiida.orm import Node, QueryBuilder builder = QueryBuilder().append(Node, filters={ 'id': pk }, tag='origin').append(Node, with_incoming='origin', project='id') if builder.first(): return True else: return False
def get(self): from aiida.orm import QueryBuilder, Dict qb = QueryBuilder() qb.append(Dict, project=['id', 'ctime', 'attributes'], tag='pdata') qb.order_by({'pdata': {'ctime': 'desc'}}) result = qb.first() # Results are returned as a dictionary, datetime objects is # serialized as ISO 8601 return dict(id=result[0], ctime=result[1].isoformat(), attributes=result[2])
def create_from_folder(cls, dirpath, label, description='', deduplicate=True): """Create a new `PseudoPotentialFamily` from the pseudo potentials contained in a directory. :param dirpath: absolute path to the folder containing the UPF files. :param label: label to give to the `PseudoPotentialFamily`, should not already exist. :param description: description to give to the family. :param deduplicate: if True, will scan database for existing pseudo potentials of same type and with the same md5 checksum, and use that instead of the parsed one. :raises ValueError: if a `PseudoPotentialFamily` already exists with the given name. """ type_check(description, str, allow_none=True) try: cls.objects.get(label=label) except exceptions.NotExistent: family = cls(label=label, description=description) else: raise ValueError(f'the {cls.__name__} `{label}` already exists') parsed_pseudos = cls.parse_pseudos_from_directory(dirpath) family_pseudos = [] for pseudo in parsed_pseudos: if deduplicate: query = QueryBuilder() query.append(cls.pseudo_type, subclassing=False, filters={'attributes.md5': pseudo.md5}) existing = query.first() if existing: pseudo = existing[0] family_pseudos.append(pseudo) # Only store the `Group` and the pseudo nodes now, such that we don't have to worry about the clean up in the # case that an exception is raised during creating them. family.store() family.add_nodes([pseudo.store() for pseudo in family_pseudos]) return family
def get_structure(): """Return a `StructureData` representing bulk silicon. The database will first be queried for the existence of a bulk silicon crystal. If this is not the case, one is created and stored. This function should be used as a default for CLI options that require a `StructureData` node. This way new users can launch the command without having to construct or import a structure first. This is the reason that we hardcode a bulk silicon crystal to be returned. More flexibility is not required for this purpose. :return: a `StructureData` representing bulk silicon """ from ase.spacegroup import crystal from aiida.orm import QueryBuilder, StructureData # Filters that will match any elemental Silicon structure with 2 or less sites in total filters = { 'attributes.sites': { 'of_length': 2 }, 'attributes.kinds': { 'of_length': 1 }, 'attributes.kinds.0.symbols.0': 'Si' } builder = QueryBuilder().append(StructureData, filters=filters) results = builder.first() if not results: alat = 5.43 ase_structure = crystal( 'Si', [(0, 0, 0)], spacegroup=227, cellpar=[alat, alat, alat, 90, 90, 90], primitive_cell=True, ) structure = StructureData(ase=ase_structure) structure.store() else: structure = results[0] return structure.uuid
def _add_nodes_to_groups(*, group_count: int, group_uuids: Iterable[Tuple[str, Set[str]]], foreign_ids_reverse_mappings: Dict[str, Dict[str, int]]): """Add nodes to imported groups.""" if not group_count: return pbar_base_str = 'Groups - ' with get_progress_reporter()(total=group_count, desc=pbar_base_str) as progress: for groupuuid, groupnodes in group_uuids: if not groupnodes: progress.update() continue # TODO: cache these to avoid too many queries qb_group = QueryBuilder().append( Group, filters={'uuid': { '==': groupuuid }}) group_ = qb_group.first()[0] progress.set_description_str( f'{pbar_base_str}label={group_.label}', refresh=False) progress.update() nodes_to_store = [ foreign_ids_reverse_mappings[NODE_ENTITY_NAME][node_uuid] for node_uuid in groupnodes ] qb_nodes = QueryBuilder().append( Node, filters={'id': { 'in': nodes_to_store }}) # Adding nodes to group avoiding the SQLA ORM to increase speed nodes_to_add = [n[0].backend_entity for n in qb_nodes.all()] group_.backend_entity.add_nodes(nodes_to_add, skip_orm=True)
def retrieve_basis_sets(files, stop_if_existing): """Retrieve existing basis sets or create if them, if they do not exist. :param files: list of basis set file paths :param stop_if_existing: if True, check for the md5 of the files and, if the file already exists in the DB, raises a MultipleObjectsError. If False, simply adds the existing BasisSetData node to the group. :return: """ basis_and_created = [] for basis_file in files: _, content = parse_basis(basis_file) md5sum = md5_from_string(content) qb = QueryBuilder() qb.append(BasisSetData, filters={"attributes.md5": {"==": md5sum}}) existing_basis = qb.first() if existing_basis is None: # return the basis set data instances, not stored basisset, created = BasisSetData.get_or_create( basis_file, use_first=True, store_basis=False ) # to check whether only one basis set per element exists # NOTE: actually, created has the meaning of "to_be_created" basis_and_created.append((basisset, created)) else: if stop_if_existing: raise ValueError( "A Basis Set with identical MD5 to " " {} cannot be added with stop_if_existing" "".format(basis_file) ) existing_basis = existing_basis[0] basis_and_created.append((existing_basis, False)) return basis_and_created
def import_data_sqla(in_path, group=None, ignore_unknown_nodes=False, extras_mode_existing='kcl', extras_mode_new='import', comment_mode='newest', silent=False, **kwargs): """Import exported AiiDA archive to the AiiDA database and repository. Specific for the SQLAlchemy backend. If ``in_path`` is a folder, calls extract_tree; otherwise, tries to detect the compression format (zip, tar.gz, tar.bz2, ...) and calls the correct function. :param in_path: the path to a file or folder that can be imported in AiiDA. :type in_path: str :param group: Group wherein all imported Nodes will be placed. :type group: :py:class:`~aiida.orm.groups.Group` :param extras_mode_existing: 3 letter code that will identify what to do with the extras import. The first letter acts on extras that are present in the original node and not present in the imported node. Can be either: 'k' (keep it) or 'n' (do not keep it). The second letter acts on the imported extras that are not present in the original node. Can be either: 'c' (create it) or 'n' (do not create it). The third letter defines what to do in case of a name collision. Can be either: 'l' (leave the old value), 'u' (update with a new value), 'd' (delete the extra), or 'a' (ask what to do if the content is different). :type extras_mode_existing: str :param extras_mode_new: 'import' to import extras of new nodes or 'none' to ignore them. :type extras_mode_new: str :param comment_mode: Comment import modes (when same UUIDs are found). Can be either: 'newest' (will keep the Comment with the most recent modification time (mtime)) or 'overwrite' (will overwrite existing Comments with the ones from the import file). :type comment_mode: str :param silent: suppress progress bar and summary. :type silent: bool :return: New and existing Nodes and Links. :rtype: dict :raises `~aiida.tools.importexport.common.exceptions.ImportValidationError`: if parameters or the contents of `metadata.json` or `data.json` can not be validated. :raises `~aiida.tools.importexport.common.exceptions.CorruptArchive`: if the provided archive at ``in_path`` is corrupted. :raises `~aiida.tools.importexport.common.exceptions.IncompatibleArchiveVersionError`: if the provided archive's export version is not equal to the export version of AiiDA at the moment of import. :raises `~aiida.tools.importexport.common.exceptions.ArchiveImportError`: if there are any internal errors when importing. :raises `~aiida.tools.importexport.common.exceptions.ImportUniquenessError`: if a new unique entity can not be created. """ from aiida.backends.sqlalchemy.models.node import DbNode, DbLink from aiida.backends.sqlalchemy.utils import flag_modified # This is the export version expected by this function expected_export_version = StrictVersion(EXPORT_VERSION) # The returned dictionary with new and existing nodes and links ret_dict = {} # Initial check(s) if group: if not isinstance(group, Group): raise exceptions.ImportValidationError( 'group must be a Group entity') elif not group.is_stored: group.store() if silent: logging.disable(level=logging.CRITICAL) ################ # EXTRACT DATA # ################ # The sandbox has to remain open until the end with SandboxFolder() as folder: if os.path.isdir(in_path): extract_tree(in_path, folder) else: if tarfile.is_tarfile(in_path): extract_tar(in_path, folder, silent=silent, nodes_export_subfolder=NODES_EXPORT_SUBFOLDER, **kwargs) elif zipfile.is_zipfile(in_path): extract_zip(in_path, folder, silent=silent, nodes_export_subfolder=NODES_EXPORT_SUBFOLDER, **kwargs) else: raise exceptions.ImportValidationError( 'Unable to detect the input file format, it is neither a ' 'tar file, nor a (possibly compressed) zip file.') if not folder.get_content_list(): raise exceptions.CorruptArchive( 'The provided file/folder ({}) is empty'.format(in_path)) try: IMPORT_LOGGER.debug('CACHING metadata.json') with open(folder.get_abs_path('metadata.json'), encoding='utf8') as fhandle: metadata = json.load(fhandle) IMPORT_LOGGER.debug('CACHING data.json') with open(folder.get_abs_path('data.json'), encoding='utf8') as fhandle: data = json.load(fhandle) except IOError as error: raise exceptions.CorruptArchive( 'Unable to find the file {} in the import file or folder'. format(error.filename)) ###################### # PRELIMINARY CHECKS # ###################### export_version = StrictVersion(str(metadata['export_version'])) if export_version != expected_export_version: msg = 'Export file version is {}, can import only version {}'\ .format(metadata['export_version'], expected_export_version) if export_version < expected_export_version: msg += "\nUse 'verdi export migrate' to update this export file." else: msg += '\nUpdate your AiiDA version in order to import this file.' raise exceptions.IncompatibleArchiveVersionError(msg) start_summary(in_path, comment_mode, extras_mode_new, extras_mode_existing) ################################################################### # CREATE UUID REVERSE TABLES AND CHECK IF # # I HAVE ALL NODES FOR THE LINKS # ################################################################### IMPORT_LOGGER.debug( 'CHECKING IF NODES FROM LINKS ARE IN DB OR ARCHIVE...') linked_nodes = set( chain.from_iterable( (l['input'], l['output']) for l in data['links_uuid'])) group_nodes = set(chain.from_iterable(data['groups_uuid'].values())) # Check that UUIDs are valid linked_nodes = set(x for x in linked_nodes if validate_uuid(x)) group_nodes = set(x for x in group_nodes if validate_uuid(x)) import_nodes_uuid = set() for value in data['export_data'].get(NODE_ENTITY_NAME, {}).values(): import_nodes_uuid.add(value['uuid']) unknown_nodes = linked_nodes.union(group_nodes) - import_nodes_uuid if unknown_nodes and not ignore_unknown_nodes: raise exceptions.DanglingLinkError( 'The import file refers to {} nodes with unknown UUID, therefore it cannot be imported. Either first ' 'import the unknown nodes, or export also the parents when exporting. The unknown UUIDs are:\n' ''.format(len(unknown_nodes)) + '\n'.join('* {}'.format(uuid) for uuid in unknown_nodes)) ################################### # DOUBLE-CHECK MODEL DEPENDENCIES # ################################### # The entity import order. It is defined by the database model relationships. entity_order = [ USER_ENTITY_NAME, COMPUTER_ENTITY_NAME, NODE_ENTITY_NAME, GROUP_ENTITY_NAME, LOG_ENTITY_NAME, COMMENT_ENTITY_NAME ] # I make a new list that contains the entity names: # eg: ['User', 'Computer', 'Node', 'Group'] for import_field_name in metadata['all_fields_info']: if import_field_name not in entity_order: raise exceptions.ImportValidationError( "You are trying to import an unknown model '{}'!".format( import_field_name)) for idx, entity_name in enumerate(entity_order): dependencies = [] # for every field, I checked the dependencies given as value for key requires for field in metadata['all_fields_info'][entity_name].values(): try: dependencies.append(field['requires']) except KeyError: # (No ForeignKey) pass for dependency in dependencies: if dependency not in entity_order[:idx]: raise exceptions.ArchiveImportError( 'Entity {} requires {} but would be loaded first; stopping...' .format(entity_name, dependency)) ################################################### # CREATE IMPORT DATA DIRECT UNIQUE_FIELD MAPPINGS # ################################################### # This is nested dictionary of entity_name:{id:uuid} # to map one id (the pk) to a different one. # One of the things to remove for v0.4 # { # 'Node': {2362: '82a897b5-fb3a-47d7-8b22-c5fe1b4f2c14', # 2363: 'ef04aa5d-99e7-4bfd-95ef-fe412a6a3524', 2364: '1dc59576-af21-4d71-81c2-bac1fc82a84a'}, # 'User': {1: 'aiida@localhost'} # } IMPORT_LOGGER.debug('CREATING PK-2-UUID/EMAIL MAPPING...') import_unique_ids_mappings = {} # Export data since v0.3 contains the keys entity_name for entity_name, import_data in data['export_data'].items(): # Again I need the entity_name since that's what's being stored since 0.3 if entity_name in metadata['unique_identifiers']: # I have to reconvert the pk to integer import_unique_ids_mappings[entity_name] = { int(k): v[metadata['unique_identifiers'][entity_name]] for k, v in import_data.items() } ############### # IMPORT DATA # ############### # DO ALL WITH A TRANSACTION import aiida.backends.sqlalchemy session = aiida.backends.sqlalchemy.get_scoped_session() try: foreign_ids_reverse_mappings = {} new_entries = {} existing_entries = {} IMPORT_LOGGER.debug('GENERATING LIST OF DATA...') # Instantiate progress bar progress_bar = get_progress_bar(total=1, leave=False, disable=silent) pbar_base_str = 'Generating list of data - ' # Get total entities from data.json # To be used with progress bar number_of_entities = 0 # I first generate the list of data for entity_name in entity_order: entity = entity_names_to_entities[entity_name] # I get the unique identifier, since v0.3 stored under entity_name unique_identifier = metadata['unique_identifiers'].get( entity_name, None) # so, new_entries. Also, since v0.3 it makes more sense to use the entity_name new_entries[entity_name] = {} existing_entries[entity_name] = {} foreign_ids_reverse_mappings[entity_name] = {} # Not necessarily all models are exported if entity_name in data['export_data']: IMPORT_LOGGER.debug(' %s...', entity_name) progress_bar.set_description_str(pbar_base_str + entity_name, refresh=False) number_of_entities += len(data['export_data'][entity_name]) if unique_identifier is not None: import_unique_ids = set( v[unique_identifier] for v in data['export_data'][entity_name].values()) relevant_db_entries = {} if import_unique_ids: builder = QueryBuilder() builder.append(entity, filters={ unique_identifier: { 'in': import_unique_ids } }, project='*') if builder.count(): progress_bar = get_progress_bar( total=builder.count(), disable=silent) for object_ in builder.iterall(): progress_bar.update() relevant_db_entries.update({ getattr(object_[0], unique_identifier): object_[0] }) foreign_ids_reverse_mappings[entity_name] = { k: v.pk for k, v in relevant_db_entries.items() } IMPORT_LOGGER.debug(' GOING THROUGH ARCHIVE...') imported_comp_names = set() for key, value in data['export_data'][ entity_name].items(): if entity_name == GROUP_ENTITY_NAME: # Check if there is already a group with the same name, # and if so, recreate the name orig_label = value['label'] dupl_counter = 0 while QueryBuilder().append( entity, filters={ 'label': { '==': value['label'] } }).count(): # Rename the new group value[ 'label'] = orig_label + DUPL_SUFFIX.format( dupl_counter) dupl_counter += 1 if dupl_counter == 100: raise exceptions.ImportUniquenessError( 'A group of that label ( {} ) already exists and I could not create a new ' 'one'.format(orig_label)) elif entity_name == COMPUTER_ENTITY_NAME: # The following is done for compatibility # reasons in case the export file was generated # with the Django export method. In Django the # metadata and the transport parameters are # stored as (unicode) strings of the serialized # JSON objects and not as simple serialized # JSON objects. if isinstance(value['metadata'], (str, bytes)): value['metadata'] = json.loads( value['metadata']) # Check if there is already a computer with the # same name in the database builder = QueryBuilder() builder.append( entity, filters={'name': { '==': value['name'] }}, project=['*'], tag='res') dupl = builder.count( ) or value['name'] in imported_comp_names dupl_counter = 0 orig_name = value['name'] while dupl: # Rename the new computer value[ 'name'] = orig_name + DUPL_SUFFIX.format( dupl_counter) builder = QueryBuilder() builder.append(entity, filters={ 'name': { '==': value['name'] } }, project=['*'], tag='res') dupl = builder.count( ) or value['name'] in imported_comp_names dupl_counter += 1 if dupl_counter == 100: raise exceptions.ImportUniquenessError( 'A computer of that name ( {} ) already exists and I could not create a ' 'new one'.format(orig_name)) imported_comp_names.add(value['name']) if value[unique_identifier] in relevant_db_entries: # Already in DB # again, switched to entity_name in v0.3 existing_entries[entity_name][key] = value else: # To be added new_entries[entity_name][key] = value else: new_entries[entity_name] = data['export_data'][ entity_name] # Progress bar - reset for import progress_bar = get_progress_bar(total=number_of_entities, disable=silent) reset_progress_bar = {} # I import data from the given model for entity_name in entity_order: entity = entity_names_to_entities[entity_name] fields_info = metadata['all_fields_info'].get(entity_name, {}) unique_identifier = metadata['unique_identifiers'].get( entity_name, '') # Progress bar initialization - Model if reset_progress_bar: progress_bar = get_progress_bar( total=reset_progress_bar['total'], disable=silent) progress_bar.n = reset_progress_bar['n'] reset_progress_bar = {} pbar_base_str = '{}s - '.format(entity_name) progress_bar.set_description_str(pbar_base_str + 'Initializing', refresh=True) # EXISTING ENTRIES if existing_entries[entity_name]: # Progress bar update - Model progress_bar.set_description_str( pbar_base_str + '{} existing entries'.format( len(existing_entries[entity_name])), refresh=True) for import_entry_pk, entry_data in existing_entries[ entity_name].items(): unique_id = entry_data[unique_identifier] existing_entry_pk = foreign_ids_reverse_mappings[ entity_name][unique_id] import_data = dict( deserialize_field(k, v, fields_info=fields_info, import_unique_ids_mappings= import_unique_ids_mappings, foreign_ids_reverse_mappings= foreign_ids_reverse_mappings) for k, v in entry_data.items()) # TODO COMPARE, AND COMPARE ATTRIBUTES if entity_name == COMMENT_ENTITY_NAME: new_entry_uuid = merge_comment(import_data, comment_mode) if new_entry_uuid is not None: entry_data[unique_identifier] = new_entry_uuid new_entries[entity_name][ import_entry_pk] = entry_data if entity_name not in ret_dict: ret_dict[entity_name] = {'new': [], 'existing': []} ret_dict[entity_name]['existing'].append( (import_entry_pk, existing_entry_pk)) IMPORT_LOGGER.debug('Existing %s: %s (%s->%s)', entity_name, unique_id, import_entry_pk, existing_entry_pk) # Store all objects for this model in a list, and store them # all in once at the end. objects_to_create = list() # In the following list we add the objects to be updated objects_to_update = list() # This is needed later to associate the import entry with the new pk import_new_entry_pks = dict() # NEW ENTRIES if new_entries[entity_name]: # Progress bar update - Model progress_bar.set_description_str( pbar_base_str + '{} new entries'.format(len(new_entries[entity_name])), refresh=True) for import_entry_pk, entry_data in new_entries[ entity_name].items(): unique_id = entry_data[unique_identifier] import_data = dict( deserialize_field(k, v, fields_info=fields_info, import_unique_ids_mappings= import_unique_ids_mappings, foreign_ids_reverse_mappings= foreign_ids_reverse_mappings) for k, v in entry_data.items()) # We convert the Django fields to SQLA. Note that some of # the Django fields were converted to SQLA compatible # fields by the deserialize_field method. This was done # for optimization reasons in Django but makes them # compatible with the SQLA schema and they don't need any # further conversion. if entity_name in file_fields_to_model_fields: for file_fkey in file_fields_to_model_fields[ entity_name]: # This is an exception because the DbLog model defines the `_metadata` column instead of the # `metadata` column used in the Django model. This is because the SqlAlchemy model base # class already has a metadata attribute that cannot be overridden. For consistency, the # `DbLog` class however expects the `metadata` keyword in its constructor, so we should # ignore the mapping here if entity_name == LOG_ENTITY_NAME and file_fkey == 'metadata': continue model_fkey = file_fields_to_model_fields[ entity_name][file_fkey] if model_fkey in import_data: continue import_data[model_fkey] = import_data[file_fkey] import_data.pop(file_fkey, None) db_entity = get_object_from_string( entity_names_to_sqla_schema[entity_name]) objects_to_create.append(db_entity(**import_data)) import_new_entry_pks[unique_id] = import_entry_pk if entity_name == NODE_ENTITY_NAME: IMPORT_LOGGER.debug( 'STORING NEW NODE REPOSITORY FILES & ATTRIBUTES...') # NEW NODES for object_ in objects_to_create: import_entry_uuid = object_.uuid import_entry_pk = import_new_entry_pks[ import_entry_uuid] # Progress bar initialization - Node progress_bar.update() pbar_node_base_str = pbar_base_str + 'UUID={} - '.format( import_entry_uuid.split('-')[0]) # Before storing entries in the DB, I store the files (if these are nodes). # Note: only for new entries! subfolder = folder.get_subfolder( os.path.join(NODES_EXPORT_SUBFOLDER, export_shard_uuid(import_entry_uuid))) if not subfolder.exists(): raise exceptions.CorruptArchive( 'Unable to find the repository folder for Node with UUID={} in the exported ' 'file'.format(import_entry_uuid)) destdir = RepositoryFolder( section=Repository._section_name, uuid=import_entry_uuid) # Replace the folder, possibly destroying existing previous folders, and move the files # (faster if we are on the same filesystem, and in any case the source is a SandboxFolder) progress_bar.set_description_str(pbar_node_base_str + 'Repository', refresh=True) destdir.replace_with_folder(subfolder.abspath, move=True, overwrite=True) # For Nodes, we also have to store Attributes! IMPORT_LOGGER.debug('STORING NEW NODE ATTRIBUTES...') progress_bar.set_description_str(pbar_node_base_str + 'Attributes', refresh=True) # Get attributes from import file try: object_.attributes = data['node_attributes'][str( import_entry_pk)] except KeyError: raise exceptions.CorruptArchive( 'Unable to find attribute info for Node with UUID={}' .format(import_entry_uuid)) # For DbNodes, we also have to store extras if extras_mode_new == 'import': IMPORT_LOGGER.debug('STORING NEW NODE EXTRAS...') progress_bar.set_description_str( pbar_node_base_str + 'Extras', refresh=True) # Get extras from import file try: extras = data['node_extras'][str( import_entry_pk)] except KeyError: raise exceptions.CorruptArchive( 'Unable to find extra info for Node with UUID={}' .format(import_entry_uuid)) # TODO: remove when aiida extras will be moved somewhere else # from here extras = { key: value for key, value in extras.items() if not key.startswith('_aiida_') } if object_.node_type.endswith('code.Code.'): extras = { key: value for key, value in extras.items() if not key == 'hidden' } # till here object_.extras = extras elif extras_mode_new == 'none': IMPORT_LOGGER.debug('SKIPPING NEW NODE EXTRAS...') else: raise exceptions.ImportValidationError( "Unknown extras_mode_new value: {}, should be either 'import' or 'none'" ''.format(extras_mode_new)) # EXISTING NODES (Extras) IMPORT_LOGGER.debug('UPDATING EXISTING NODE EXTRAS...') import_existing_entry_pks = { entry_data[unique_identifier]: import_entry_pk for import_entry_pk, entry_data in existing_entries[entity_name].items() } for node in session.query(DbNode).filter( DbNode.uuid.in_(import_existing_entry_pks)).all(): import_entry_uuid = str(node.uuid) import_entry_pk = import_existing_entry_pks[ import_entry_uuid] # Progress bar initialization - Node pbar_node_base_str = pbar_base_str + 'UUID={} - '.format( import_entry_uuid.split('-')[0]) progress_bar.set_description_str(pbar_node_base_str + 'Extras', refresh=False) progress_bar.update() # Get extras from import file try: extras = data['node_extras'][str(import_entry_pk)] except KeyError: raise exceptions.CorruptArchive( 'Unable to find extra info for Node with UUID={}' .format(import_entry_uuid)) old_extras = node.extras.copy() # TODO: remove when aiida extras will be moved somewhere else # from here extras = { key: value for key, value in extras.items() if not key.startswith('_aiida_') } if node.node_type.endswith('code.Code.'): extras = { key: value for key, value in extras.items() if not key == 'hidden' } # till here new_extras = merge_extras(node.extras, extras, extras_mode_existing) if new_extras != old_extras: node.extras = new_extras flag_modified(node, 'extras') objects_to_update.append(node) else: # Update progress bar with new non-Node entries progress_bar.update(n=len(existing_entries[entity_name]) + len(new_entries[entity_name])) progress_bar.set_description_str(pbar_base_str + 'Storing', refresh=True) # Store them all in once; However, the PK are not set in this way... if objects_to_create: session.add_all(objects_to_create) if objects_to_update: session.add_all(objects_to_update) session.flush() just_saved = {} if import_new_entry_pks.keys(): reset_progress_bar = { 'total': progress_bar.total, 'n': progress_bar.n } progress_bar = get_progress_bar( total=len(import_new_entry_pks), disable=silent) builder = QueryBuilder() builder.append(entity, filters={ unique_identifier: { 'in': list(import_new_entry_pks.keys()) } }, project=[unique_identifier, 'id']) for entry in builder.iterall(): progress_bar.update() just_saved.update({entry[0]: entry[1]}) progress_bar.set_description_str(pbar_base_str + 'Done!', refresh=True) # Now I have the PKs, print the info # Moreover, add newly created Nodes to foreign_ids_reverse_mappings for unique_id, new_pk in just_saved.items(): from uuid import UUID if isinstance(unique_id, UUID): unique_id = str(unique_id) import_entry_pk = import_new_entry_pks[unique_id] foreign_ids_reverse_mappings[entity_name][ unique_id] = new_pk if entity_name not in ret_dict: ret_dict[entity_name] = {'new': [], 'existing': []} ret_dict[entity_name]['new'].append( (import_entry_pk, new_pk)) IMPORT_LOGGER.debug('N %s: %s (%s->%s)', entity_name, unique_id, import_entry_pk, new_pk) IMPORT_LOGGER.debug('STORING NODE LINKS...') import_links = data['links_uuid'] if import_links: progress_bar = get_progress_bar(total=len(import_links), disable=silent) pbar_base_str = 'Links - ' for link in import_links: # Check for dangling Links within the, supposed, self-consistent archive progress_bar.set_description_str( pbar_base_str + 'label={}'.format(link['label']), refresh=False) progress_bar.update() try: in_id = foreign_ids_reverse_mappings[NODE_ENTITY_NAME][ link['input']] out_id = foreign_ids_reverse_mappings[NODE_ENTITY_NAME][ link['output']] except KeyError: if ignore_unknown_nodes: continue raise exceptions.ImportValidationError( 'Trying to create a link with one or both unknown nodes, stopping (in_uuid={}, out_uuid={}, ' 'label={}, type={})'.format(link['input'], link['output'], link['label'], link['type'])) # Since backend specific Links (DbLink) are not validated upon creation, we will now validate them. source = QueryBuilder().append(Node, filters={ 'id': in_id }, project='*').first()[0] target = QueryBuilder().append(Node, filters={ 'id': out_id }, project='*').first()[0] link_type = LinkType(link['type']) # Check for existence of a triple link, i.e. unique triple. # If it exists, then the link already exists, continue to next link, otherwise, validate link. if link_triple_exists(source, target, link_type, link['label']): continue try: validate_link(source, target, link_type, link['label']) except ValueError as why: raise exceptions.ImportValidationError( 'Error occurred during Link validation: {}'.format( why)) # New link session.add( DbLink(input_id=in_id, output_id=out_id, label=link['label'], type=link['type'])) if 'Link' not in ret_dict: ret_dict['Link'] = {'new': []} ret_dict['Link']['new'].append((in_id, out_id)) IMPORT_LOGGER.debug(' (%d new links...)', len(ret_dict.get('Link', {}).get('new', []))) IMPORT_LOGGER.debug('STORING GROUP ELEMENTS...') import_groups = data['groups_uuid'] if import_groups: progress_bar = get_progress_bar(total=len(import_groups), disable=silent) pbar_base_str = 'Groups - ' for groupuuid, groupnodes in import_groups.items(): # # TODO: cache these to avoid too many queries qb_group = QueryBuilder().append( Group, filters={'uuid': { '==': groupuuid }}) group_ = qb_group.first()[0] progress_bar.set_description_str( pbar_base_str + 'label={}'.format(group_.label), refresh=False) progress_bar.update() nodes_ids_to_add = [ foreign_ids_reverse_mappings[NODE_ENTITY_NAME][node_uuid] for node_uuid in groupnodes ] qb_nodes = QueryBuilder().append( Node, filters={'id': { 'in': nodes_ids_to_add }}) # Adding nodes to group avoiding the SQLA ORM to increase speed nodes_to_add = [n[0].backend_entity for n in qb_nodes.all()] group_.backend_entity.add_nodes(nodes_to_add, skip_orm=True) ###################################################### # Put everything in a specific group ###################################################### existing = existing_entries.get(NODE_ENTITY_NAME, {}) existing_pk = [ foreign_ids_reverse_mappings[NODE_ENTITY_NAME][v['uuid']] for v in existing.values() ] new = new_entries.get(NODE_ENTITY_NAME, {}) new_pk = [ foreign_ids_reverse_mappings[NODE_ENTITY_NAME][v['uuid']] for v in new.values() ] pks_for_group = existing_pk + new_pk # So that we do not create empty groups if pks_for_group: # If user specified a group, import all things into it if not group: from aiida.backends.sqlalchemy.models.group import DbGroup # Get an unique name for the import group, based on the current (local) time basename = timezone.localtime( timezone.now()).strftime('%Y%m%d-%H%M%S') counter = 0 group_label = basename while session.query(DbGroup).filter( DbGroup.label == group_label).count() > 0: counter += 1 group_label = '{}_{}'.format(basename, counter) if counter == 100: raise exceptions.ImportUniquenessError( "Overflow of import groups (more than 100 import groups exists with basename '{}')" ''.format(basename)) group = ImportGroup(label=group_label) session.add(group.backend_entity._dbmodel) # Adding nodes to group avoiding the SQLA ORM to increase speed builder = QueryBuilder().append( Node, filters={'id': { 'in': pks_for_group }}) progress_bar = get_progress_bar(total=len(pks_for_group), disable=silent) progress_bar.set_description_str( 'Creating import Group - Preprocessing', refresh=True) first = True nodes = [] for entry in builder.iterall(): if first: progress_bar.set_description_str( 'Creating import Group', refresh=False) first = False progress_bar.update() nodes.append(entry[0].backend_entity) group.backend_entity.add_nodes(nodes, skip_orm=True) progress_bar.set_description_str('Done (cleaning up)', refresh=True) else: IMPORT_LOGGER.debug( 'No Nodes to import, so no Group created, if it did not already exist' ) IMPORT_LOGGER.debug('COMMITTING EVERYTHING...') session.commit() # Finalize Progress bar close_progress_bar(leave=False) # Summarize import result_summary(ret_dict, getattr(group, 'label', None)) except: # Finalize Progress bar close_progress_bar(leave=False) result_summary({}, None) IMPORT_LOGGER.debug('Rolling back') session.rollback() raise # Reset logging level if silent: logging.disable(level=logging.NOTSET) return ret_dict
def upload_otfg_family(entries, group_label, group_description, stop_if_existing=True): """ Set a family for the OTFG pseudo potential strings """ from aiida.common import UniquenessError, NotExistent from aiida.orm.querybuilder import QueryBuilder #from aiida.common import aiidalogger # Try to retrieve a group if it exists try: group = OTFGGroup.get(label=group_label) group_created = False except NotExistent: group = OTFGGroup(label=group_label, ) group_created = True group.description = group_description otfg_and_created = [] nentries = len(entries) for entry in entries: # Add it if it is just one existing data if isinstance(entry, OTFGData): element, setting = entry.element, entry.string elif isinstance(entry, str): element, setting = split_otfg_entry(entry) elif isinstance(entry, UspData): element, setting = entry.element, entry.md5sum qb = QueryBuilder() qb.append(OTFGData, filters={ 'attributes.otfg_entry': { "==": setting }, 'attributes.element': { "==": element } }) existing_otfg = qb.first() # Try find Usp data if existing_otfg is None: qb = QueryBuilder() qb.append(UspData, filters={ 'attributes.md5sum': { "==": setting }, 'attributes.element': { "==": element } }) existing_otfg = qb.first() # Act based on wether the data exists if existing_otfg is None: if isinstance(entry, OTFGData): otfg_and_created.append((entry, True)) elif isinstance(entry, str): otfg, created = OTFGData.get_or_create(entry, use_first=True, store_otfg=False) otfg_and_created.append((otfg, created)) elif isinstance(entry, UspData): otfg_and_created.append((entry, True)) else: if stop_if_existing: raise ValidationError( "A OTFG group cannot be added when stop_if_existing is True" ) existing_otfg = existing_otfg[0] otfg_and_created.append((existing_otfg, False)) # Check for unique for the complete group elements = [(i[0].element, i[0].string) for i in otfg_and_created] # Add other entries for the list to check if not group_created: for aiida_n in group.nodes: if not isinstance(aiida_n, (OTFGData, UspData)): print(("Warning: unsupported node: {}".format(aiida_n))) continue elements.append((aiida_n.element, aiida_n.string)) # Discard duplicated pairs elements = set(elements) elements_names = [e[0] for e in elements] # Check the uniqueness of the complete group if not len(elements_names) == len(set(elements_names)): duplicates = set( [x for x in elements_names if elements_names.count(x) > 1]) dup_string = ", ".join(duplicates) raise UniquenessError("More than one Nodes found for the elements: " + dup_string + ".") # If we survive here uniqueness is fine # Save the group - note we have not added the nodes yet if group_created: group.store() # Save the OTFG in the database if necessary and add them to the group for otfg, created in otfg_and_created: if created: otfg.store() else: pass nodes_add = [otfg for otfg, created in otfg_and_created] nodes_new = [otfg for otfg, created in otfg_and_created if created is True] group.add_nodes(nodes_add) return nentries, len(nodes_new)
def upload_upf_family(folder, group_name, group_description, stop_if_existing=True): """ Upload a set of UPF files in a given group. :param folder: a path containing all UPF files to be added. Only files ending in .UPF (case-insensitive) are considered. :param group_name: the name of the group to create. If it exists and is non-empty, a UniquenessError is raised. :param group_description: a string to be set as the group description. Overwrites previous descriptions, if the group was existing. :param stop_if_existing: if True, check for the md5 of the files and, if the file already exists in the DB, raises a MultipleObjectsError. If False, simply adds the existing UPFData node to the group. """ import os import aiida.common from aiida.common import aiidalogger from aiida.orm import Group from aiida.common.exceptions import UniquenessError, NotExistent from aiida.backends.utils import get_automatic_user from aiida.orm import QueryBuilder if not os.path.isdir(folder): raise ValueError("folder must be a directory") # only files, and only those ending with .upf or .UPF; # go to the real file if it is a symlink files = [ os.path.realpath(os.path.join(folder, i)) for i in os.listdir(folder) if os.path.isfile(os.path.join(folder, i)) and i.lower().endswith('.upf') ] nfiles = len(files) try: group = Group.get(name=group_name, type_string=UPFGROUP_TYPE) group_created = False except NotExistent: group = Group(name=group_name, type_string=UPFGROUP_TYPE, user=get_automatic_user()) group_created = True if group.user != get_automatic_user(): raise UniquenessError("There is already a UpfFamily group with name {}" ", but it belongs to user {}, therefore you " "cannot modify it".format( group_name, group.user.email)) # Always update description, even if the group already existed group.description = group_description # NOTE: GROUP SAVED ONLY AFTER CHECKS OF UNICITY pseudo_and_created = [] for f in files: md5sum = aiida.common.utils.md5_file(f) qb = QueryBuilder() qb.append(UpfData, filters={'attributes.md5': {'==': md5sum}}) existing_upf = qb.first() #~ existing_upf = UpfData.query(dbattributes__key="md5", #~ dbattributes__tval=md5sum) if existing_upf is None: # return the upfdata instances, not stored pseudo, created = UpfData.get_or_create(f, use_first=True, store_upf=False) # to check whether only one upf per element exists # NOTE: actually, created has the meaning of "to_be_created" pseudo_and_created.append((pseudo, created)) else: if stop_if_existing: raise ValueError("A UPF with identical MD5 to " " {} cannot be added with stop_if_existing" "".format(f)) existing_upf = existing_upf[0] pseudo_and_created.append((existing_upf, False)) # check whether pseudo are unique per element elements = [(i[0].element, i[0].md5sum) for i in pseudo_and_created] # If group already exists, check also that I am not inserting more than # once the same element if not group_created: for aiida_n in group.nodes: # Skip non-pseudos if not isinstance(aiida_n, UpfData): continue elements.append((aiida_n.element, aiida_n.md5sum)) elements = set(elements) # Discard elements with the same MD5, that would # not be stored twice elements_names = [e[0] for e in elements] if not len(elements_names) == len(set(elements_names)): duplicates = set( [x for x in elements_names if elements_names.count(x) > 1]) duplicates_string = ", ".join(i for i in duplicates) raise UniquenessError("More than one UPF found for the elements: " + duplicates_string + ".") # At this point, save the group, if still unstored if group_created: group.store() # save the upf in the database, and add them to group for pseudo, created in pseudo_and_created: if created: pseudo.store() aiidalogger.debug("New node {} created for file {}".format( pseudo.uuid, pseudo.filename)) else: aiidalogger.debug("Reusing node {} for file {}".format( pseudo.uuid, pseudo.filename)) # Add elements to the group all togetehr group.add_nodes(pseudo for pseudo, created in pseudo_and_created) nuploaded = len([_ for _, created in pseudo_and_created if created]) return nfiles, nuploaded
def cmd_relax( plugin, structure, protocol, relaxation_type, threshold_forces, threshold_stress, number_machines, wallclock_seconds, daemon, show_engines ): """Relax a crystal structure using the common relax workflow for one of the existing plugin implementations. The command will automatically try to find and load the codes that are required by the plugin workflow. If no code is installed for at least one of the calculation engines, the command will fail. Use the `--show-engine` flag to display the required calculation engines for the selected plugin workflow. """ # pylint: disable=too-many-locals from aiida.orm import QueryBuilder, Code process_class = load_workflow_entry_point('relax', plugin) generator = process_class.get_inputs_generator() number_engines = len(generator.get_calc_types()) if number_machines is None: number_machines = [1] * number_engines if len(number_machines) != number_engines: raise click.BadParameter( f'{process_class.__name__} has {number_engines} engine steps, so requires {number_engines} values', param_hint='--number-machines' ) if wallclock_seconds is None: wallclock_seconds = [1 * 3600] * number_engines if len(wallclock_seconds) != number_engines: raise click.BadParameter( f'{process_class.__name__} has {number_engines} engine steps, so requires {number_engines} values', param_hint='--wallclock-seconds' ) if not generator.is_valid_protocol(protocol): protocols = generator.get_protocol_names() process_class_name = process_class.__name__ message = f'`{protocol}` is not implemented by `{process_class_name}` workflow: choose one of {protocols}' raise click.BadParameter(message, param_hint='protocol') if show_engines: for engine in generator.get_calc_types(): schema = generator.get_calc_type_schema(engine) click.secho(engine, fg='red', bold=True) click.echo('Required code plugin: {}'.format(schema['code_plugin'])) click.echo('Engine description: {}'.format(schema['description'])) return engines = {} for index, engine in enumerate(generator.get_calc_types()): schema = generator.get_calc_type_schema(engine) engines[engine] = { 'options': { 'resources': { 'num_machines': number_machines[index] }, 'max_wallclock_seconds': wallclock_seconds[index], } } code_plugin = schema['code_plugin'] query = QueryBuilder().append(Code, filters={'attributes.input_plugin': code_plugin}) code = query.first() if code is None: raise click.UsageError(f'could not find a configured code for the plugin `{code_plugin}`.') engines[engine]['code'] = code[0].full_label builder = generator.get_builder(structure, engines, protocol, relaxation_type, threshold_forces, threshold_stress) utils.launch_process(builder, daemon)