def test_merge_deprecated_yaml(tmp_path): """Test that an existing 'cache_config.yml' is correctly merged into the main config. An AiidaDeprecationWarning should also be raised. """ from aiida.common.warnings import AiidaDeprecationWarning from aiida.manage import configuration from aiida.manage.configuration import settings, load_profile, reset_profile, get_config_option # Store the current configuration instance and config directory path current_config = configuration.CONFIG current_config_path = current_config.dirpath current_profile_name = configuration.PROFILE.name try: reset_profile() configuration.CONFIG = None # Create a temporary folder, set it as the current config directory path settings.AIIDA_CONFIG_FOLDER = str(tmp_path) config_dictionary = json.loads( Path(__file__).parent.joinpath( 'configuration/migrations/test_samples/reference/5.json'). read_text()) config_dictionary['profiles']['default'][ 'AIIDADB_REPOSITORY_URI'] = f"file:///{tmp_path/'repo'}" cache_dictionary = { 'default': { 'default': True, 'enabled': ['aiida.calculations:quantumespresso.pw'], 'disabled': ['aiida.calculations:templatereplacer'] } } tmp_path.joinpath('config.json').write_text( json.dumps(config_dictionary)) tmp_path.joinpath('cache_config.yml').write_text( yaml.dump(cache_dictionary)) with pytest.warns(AiidaDeprecationWarning, match='cache_config.yml'): configuration.CONFIG = configuration.load_config() load_profile('default') assert get_config_option('caching.default_enabled') is True assert get_config_option('caching.enabled_for') == [ 'aiida.calculations:quantumespresso.pw' ] assert get_config_option('caching.disabled_for') == [ 'aiida.calculations:templatereplacer' ] # should have now been moved to cache_config.yml.<DATETIME> assert not tmp_path.joinpath('cache_config.yml').exists() finally: # Reset the config folder path and the config instance. Note this will always be executed after the yield no # matter what happened in the test that used this fixture. reset_profile() settings.AIIDA_CONFIG_FOLDER = current_config_path configuration.CONFIG = current_config load_profile(current_profile_name)
async def task_submit_job(node: CalcJobNode, transport_queue: TransportQueue, cancellable: InterruptableFuture): """Transport task that will attempt to submit a job calculation. The task will first request a transport from the queue. Once the transport is yielded, the relevant execmanager function is called, wrapped in the exponential_backoff_retry coroutine, which, in case of a caught exception, will retry after an interval that increases exponentially with the number of retries, for a maximum number of retries. If all retries fail, the task will raise a TransportTaskException :param node: the node that represents the job calculation :param transport_queue: the TransportQueue from which to request a Transport :param cancellable: the cancelled flag that will be queried to determine whether the task was cancelled :raises: TransportTaskException if after the maximum number of retries the transport task still excepted """ if node.get_state() == CalcJobState.WITHSCHEDULER: assert node.get_job_id( ) is not None, 'job is WITHSCHEDULER, however, it does not have a job id' logger.warning( f'CalcJob<{node.pk}> already marked as WITHSCHEDULER, skipping task_submit_job' ) return node.get_job_id() initial_interval = get_config_option(RETRY_INTERVAL_OPTION) max_attempts = get_config_option(MAX_ATTEMPTS_OPTION) authinfo = node.get_authinfo() async def do_submit(): with transport_queue.request_transport(authinfo) as request: transport = await cancellable.with_interrupt(request) return execmanager.submit_calculation(node, transport) try: logger.info(f'scheduled request to submit CalcJob<{node.pk}>') ignore_exceptions = (plumpy.futures.CancelledError, plumpy.process_states.Interruption) result = await exponential_backoff_retry( do_submit, initial_interval, max_attempts, logger=node.logger, ignore_exceptions=ignore_exceptions) except (plumpy.futures.CancelledError, plumpy.process_states.Interruption): # pylint: disable=try-except-raise raise except Exception as exception: logger.warning(f'submitting CalcJob<{node.pk}> failed') raise TransportTaskException( f'submit_calculation failed {max_attempts} times consecutively' ) from exception else: logger.info(f'submitting CalcJob<{node.pk}> successful') node.set_state(CalcJobState.WITHSCHEDULER) return result
async def task_stash_job(node: CalcJobNode, transport_queue: TransportQueue, cancellable: InterruptableFuture): """Transport task that will optionally stash files of a completed job calculation on the remote. The task will first request a transport from the queue. Once the transport is yielded, the relevant execmanager function is called, wrapped in the exponential_backoff_retry coroutine, which, in case of a caught exception, will retry after an interval that increases exponentially with the number of retries, for a maximum number of retries. If all retries fail, the task will raise a TransportTaskException :param node: the node that represents the job calculation :param transport_queue: the TransportQueue from which to request a Transport :param cancellable: the cancelled flag that will be queried to determine whether the task was cancelled :type cancellable: :class:`aiida.engine.utils.InterruptableFuture` :raises: Return if the tasks was successfully completed :raises: TransportTaskException if after the maximum number of retries the transport task still excepted """ if node.get_state() == CalcJobState.RETRIEVING: logger.warning( f'calculation<{node.pk}> already marked as RETRIEVING, skipping task_stash_job' ) return initial_interval = get_config_option(RETRY_INTERVAL_OPTION) max_attempts = get_config_option(MAX_ATTEMPTS_OPTION) authinfo = node.get_authinfo() async def do_stash(): with transport_queue.request_transport(authinfo) as request: transport = await cancellable.with_interrupt(request) logger.info(f'stashing calculation<{node.pk}>') return execmanager.stash_calculation(node, transport) try: await exponential_backoff_retry( do_stash, initial_interval, max_attempts, logger=node.logger, ignore_exceptions=plumpy.process_states.Interruption) except plumpy.process_states.Interruption: raise except Exception as exception: logger.warning(f'stashing calculation<{node.pk}> failed') raise TransportTaskException( f'stash_calculation failed {max_attempts} times consecutively' ) from exception else: node.set_state(CalcJobState.RETRIEVING) logger.info(f'stashing calculation<{node.pk}> successful') return
async def task_kill_job(node: CalcJobNode, transport_queue: TransportQueue, cancellable: InterruptableFuture): """Transport task that will attempt to kill a job calculation. The task will first request a transport from the queue. Once the transport is yielded, the relevant execmanager function is called, wrapped in the exponential_backoff_retry coroutine, which, in case of a caught exception, will retry after an interval that increases exponentially with the number of retries, for a maximum number of retries. If all retries fail, the task will raise a TransportTaskException :param node: the node that represents the job calculation :param transport_queue: the TransportQueue from which to request a Transport :param cancellable: the cancelled flag that will be queried to determine whether the task was cancelled :raises: TransportTaskException if after the maximum number of retries the transport task still excepted """ initial_interval = get_config_option(RETRY_INTERVAL_OPTION) max_attempts = get_config_option(MAX_ATTEMPTS_OPTION) if node.get_state() in [CalcJobState.UPLOADING, CalcJobState.SUBMITTING]: logger.warning( f'CalcJob<{node.pk}> killed, it was in the {node.get_state()} state' ) return True authinfo = node.get_authinfo() async def do_kill(): with transport_queue.request_transport(authinfo) as request: transport = await cancellable.with_interrupt(request) return execmanager.kill_calculation(node, transport) try: logger.info(f'scheduled request to kill CalcJob<{node.pk}>') result = await exponential_backoff_retry(do_kill, initial_interval, max_attempts, logger=node.logger) except plumpy.process_states.Interruption: raise except Exception as exception: logger.warning(f'killing CalcJob<{node.pk}> failed') raise TransportTaskException( f'kill_calculation failed {max_attempts} times consecutively' ) from exception else: logger.info(f'killing CalcJob<{node.pk}> successful') node.set_scheduler_state(JobState.DONE) return result
def configure_logging(with_orm=False, daemon=False, daemon_log_file=None): """ Setup the logging by retrieving the LOGGING dictionary from aiida and passing it to the python module logging.config.dictConfig. If the logging needs to be setup for the daemon, set the argument 'daemon' to True and specify the path to the log file. This will cause a 'daemon_handler' to be added to all the configured loggers, that is a RotatingFileHandler that writes to the log file. :param daemon: configure the logging for a daemon task by adding a file handler instead of the default 'console' StreamHandler :param daemon_log_file: absolute filepath of the log file for the RotatingFileHandler """ from logging.config import dictConfig # Evaluate the `LOGGING` configuration to resolve the lambdas that will retrieve the correct values based on the # currently configured profile. Pass a deep copy of `LOGGING` to ensure that the original remains unaltered. config = evaluate_logging_configuration(copy.deepcopy(LOGGING)) daemon_handler_name = 'daemon_log_file' # Add the daemon file handler to all loggers if daemon=True if daemon is True: # Daemon always needs to run with ORM enabled with_orm = True if daemon_log_file is None: raise ValueError('daemon_log_file has to be defined when configuring for the daemon') config.setdefault('handlers', {}) config['handlers'][daemon_handler_name] = { 'level': 'DEBUG', 'formatter': 'halfverbose', 'class': 'logging.handlers.RotatingFileHandler', 'filename': daemon_log_file, 'encoding': 'utf8', 'maxBytes': 10000000, # 10 MB 'backupCount': 10, } for logger in config.get('loggers', {}).values(): logger.setdefault('handlers', []).append(daemon_handler_name) try: # Remove the `console` stdout stream handler to prevent messages being duplicated in the daemon log file logger['handlers'].remove('console') except ValueError: pass # Add the `DbLogHandler` if `with_orm` is `True` if with_orm: handler_dblogger = 'dblogger' config['handlers'][handler_dblogger] = { 'level': get_config_option('logging.db_loglevel'), 'class': 'aiida.orm.utils.log.DBLogHandler', } config['loggers']['aiida']['handlers'].append(handler_dblogger) dictConfig(config)
def test_get_config_option_default(self): """Tests that `get_option` return option default if not specified globally or for current profile.""" option_name = 'logging.aiida_loglevel' option = get_option(option_name) # If we haven't set the option explicitly, `get_config_option` should return the option default option_value = get_config_option(option_name) self.assertEqual(option_value, option.default)
def test_get_config_option_global(self): """Tests that `get_option` correctly agglomerates upwards and so retrieves globally set config options.""" config = get_config() option_name = 'logging.aiida_loglevel' option_value_global = 'CRITICAL' # Setting a specific value globally which should then be returned by `get_config_option` due to agglomeration config.set_option(option_name, option_value_global) option_value = get_config_option(option_name) self.assertEqual(option_value, option_value_global)
def test_get_config_option_profile_specific(self): """Tests that `get_option` correctly gets a configuration option if specified for the current profile.""" config = get_config() profile = config.current_profile option_name = 'logging.aiida_loglevel' option_value_profile = 'WARNING' # Setting a specific value for the current profile which should then be returned by `get_config_option` config.set_option(option_name, option_value_profile, scope=profile.name) option_value = get_config_option(option_name) self.assertEqual(option_value, option_value_profile)
def loglevel(self): return get_config_option('logging.circus_loglevel')
'testing': { '()': NotInTestingFilter } }, 'handlers': { 'console': { 'level': 'DEBUG', 'class': 'logging.StreamHandler', 'formatter': 'halfverbose', 'filters': ['testing'] }, }, 'loggers': { 'aiida': { 'handlers': ['console'], 'level': lambda: get_config_option('logging.aiida_loglevel'), 'propagate': False, }, 'tornado': { 'handlers': ['console'], 'level': lambda: get_config_option('logging.tornado_loglevel'), 'propagate': False, }, 'plumpy': { 'handlers': ['console'], 'level': lambda: get_config_option('logging.plumpy_loglevel'), 'propagate': False, }, 'kiwipy': { 'handlers': ['console'], 'level': lambda: get_config_option('logging.kiwipy_loglevel'),
def set_value(self, key, value, with_transaction=False, subspecifier_value=None, other_attribs=None, stop_if_existing=False): # pylint: disable=too-many-arguments """ Set a new value in the DB, possibly associated to the given subspecifier. :note: This method also stored directly in the DB. :param key: a string with the key to create (must be a level-0 attribute, that is it cannot contain the separator cls._sep). :param value: the value to store (a basic data type or a list or a dict) :param subspecifier_value: must be None if this class has no subspecifier set (e.g., the DbSetting class). Must be the value of the subspecifier (e.g., the dbnode) for classes that define it (e.g. DbAttribute and DbExtra) :param with_transaction: True if you want this function to be managed with transactions. Set to False if you already have a manual management of transactions in the block where you are calling this function (useful for speed improvements to avoid recursive transactions) :param other_attribs: a dictionary of other parameters, to store only on the level-zero attribute (e.g. for description in DbSetting). :param stop_if_existing: if True, it will stop with an UniquenessError exception if the new entry would violate an uniqueness constraint in the DB (same key, or same key+node, depending on the specific subclass). Otherwise, it will first delete the old value, if existent. The use with True is useful if you want to use a given attribute as a "locking" value, e.g. to avoid to perform an action twice on the same node. Note that, if you are using transactions, you may get the error only when the transaction is committed. """ cls = self._model_class from django.db import transaction other_attribs = other_attribs if other_attribs is not None else {} self.validate_key(key) try: if with_transaction: sid = transaction.savepoint() # create_value returns a list of nodes to store to_store = self.create_value(key, value, subspecifier_value=subspecifier_value, other_attribs=other_attribs) if to_store: if not stop_if_existing: # Delete the old values if stop_if_existing is False, # otherwise don't delete them and hope they don't # exist. If they exist, I'll get an UniquenessError # NOTE! Be careful in case the extra/attribute to # store is not a simple attribute but a list or dict: # like this, it should be ok because if we are # overwriting an entry it will stop anyway to avoid # to overwrite the main entry, but otherwise # there is the risk that trailing pieces remain # so in general it is good to recursively clean # all sub-items. self.del_value(key, subspecifier_value=subspecifier_value) cls.objects.bulk_create( to_store, batch_size=get_config_option('db.batch_size')) if with_transaction: transaction.savepoint_commit(sid) except BaseException as exc: # All exceptions including CTRL+C, ... from django.db.utils import IntegrityError from aiida.common.exceptions import UniquenessError if with_transaction: transaction.savepoint_rollback(sid) if isinstance(exc, IntegrityError) and stop_if_existing: raise UniquenessError( 'Impossible to create the required ' 'entry ' "in table '{}', " 'another entry already exists and the creation would ' 'violate an uniqueness constraint.\nFurther details: ' '{}'.format(cls.__name__, exc)) raise
def import_data_dj(in_path, group=None, ignore_unknown_nodes=False, extras_mode_existing='kcl', extras_mode_new='import', comment_mode='newest', silent=False): """Import exported AiiDA archive to the AiiDA database and repository. Specific for the Django backend. If ``in_path`` is a folder, calls extract_tree; otherwise, tries to detect the compression format (zip, tar.gz, tar.bz2, ...) and calls the correct function. :param in_path: the path to a file or folder that can be imported in AiiDA. :type in_path: str :param group: Group wherein all imported Nodes will be placed. :type group: :py:class:`~aiida.orm.groups.Group` :param extras_mode_existing: 3 letter code that will identify what to do with the extras import. The first letter acts on extras that are present in the original node and not present in the imported node. Can be either: 'k' (keep it) or 'n' (do not keep it). The second letter acts on the imported extras that are not present in the original node. Can be either: 'c' (create it) or 'n' (do not create it). The third letter defines what to do in case of a name collision. Can be either: 'l' (leave the old value), 'u' (update with a new value), 'd' (delete the extra), or 'a' (ask what to do if the content is different). :type extras_mode_existing: str :param extras_mode_new: 'import' to import extras of new nodes or 'none' to ignore them. :type extras_mode_new: str :param comment_mode: Comment import modes (when same UUIDs are found). Can be either: 'newest' (will keep the Comment with the most recent modification time (mtime)) or 'overwrite' (will overwrite existing Comments with the ones from the import file). :type comment_mode: str :param silent: suppress prints. :type silent: bool :return: New and existing Nodes and Links. :rtype: dict :raises `~aiida.tools.importexport.common.exceptions.ImportValidationError`: if parameters or the contents of `metadata.json` or `data.json` can not be validated. :raises `~aiida.tools.importexport.common.exceptions.CorruptArchive`: if the provided archive at ``in_path`` is corrupted. :raises `~aiida.tools.importexport.common.exceptions.IncompatibleArchiveVersionError`: if the provided archive's export version is not equal to the export version of AiiDA at the moment of import. :raises `~aiida.tools.importexport.common.exceptions.ArchiveImportError`: if there are any internal errors when importing. :raises `~aiida.tools.importexport.common.exceptions.ImportUniquenessError`: if a new unique entity can not be created. """ from django.db import transaction # pylint: disable=import-error,no-name-in-module from aiida.backends.djsite.db import models # This is the export version expected by this function expected_export_version = StrictVersion(EXPORT_VERSION) # The returned dictionary with new and existing nodes and links ret_dict = {} # Initial check(s) if group: if not isinstance(group, Group): raise exceptions.ImportValidationError( 'group must be a Group entity') elif not group.is_stored: group.store() ################ # EXTRACT DATA # ################ # The sandbox has to remain open until the end with SandboxFolder() as folder: if os.path.isdir(in_path): extract_tree(in_path, folder) else: if tarfile.is_tarfile(in_path): extract_tar(in_path, folder, silent=silent, nodes_export_subfolder=NODES_EXPORT_SUBFOLDER) elif zipfile.is_zipfile(in_path): try: extract_zip(in_path, folder, silent=silent, nodes_export_subfolder=NODES_EXPORT_SUBFOLDER) except ValueError as exc: print( 'The following problem occured while processing the provided file: {}' .format(exc)) return else: raise exceptions.ImportValidationError( 'Unable to detect the input file format, it is neither a ' '(possibly compressed) tar file, nor a zip file.') if not folder.get_content_list(): raise exceptions.CorruptArchive( 'The provided file/folder ({}) is empty'.format(in_path)) try: with open(folder.get_abs_path('metadata.json'), 'r', encoding='utf8') as fhandle: metadata = json.load(fhandle) with open(folder.get_abs_path('data.json'), 'r', encoding='utf8') as fhandle: data = json.load(fhandle) except IOError as error: raise exceptions.CorruptArchive( 'Unable to find the file {} in the import file or folder'. format(error.filename)) ###################### # PRELIMINARY CHECKS # ###################### export_version = StrictVersion(str(metadata['export_version'])) if export_version != expected_export_version: msg = 'Export file version is {}, can import only version {}'\ .format(metadata['export_version'], expected_export_version) if export_version < expected_export_version: msg += "\nUse 'verdi export migrate' to update this export file." else: msg += '\nUpdate your AiiDA version in order to import this file.' raise exceptions.IncompatibleArchiveVersionError(msg) ########################################################################## # CREATE UUID REVERSE TABLES AND CHECK IF I HAVE ALL NODES FOR THE LINKS # ########################################################################## linked_nodes = set( chain.from_iterable( (l['input'], l['output']) for l in data['links_uuid'])) group_nodes = set(chain.from_iterable(data['groups_uuid'].values())) if NODE_ENTITY_NAME in data['export_data']: import_nodes_uuid = set( v['uuid'] for v in data['export_data'][NODE_ENTITY_NAME].values()) else: import_nodes_uuid = set() # the combined set of linked_nodes and group_nodes was obtained from looking at all the links # the set of import_nodes_uuid was received from the stuff actually referred to in export_data unknown_nodes = linked_nodes.union(group_nodes) - import_nodes_uuid if unknown_nodes and not ignore_unknown_nodes: raise exceptions.DanglingLinkError( 'The import file refers to {} nodes with unknown UUID, therefore it cannot be imported. Either first ' 'import the unknown nodes, or export also the parents when exporting. The unknown UUIDs are:\n' ''.format(len(unknown_nodes)) + '\n'.join('* {}'.format(uuid) for uuid in unknown_nodes)) ################################### # DOUBLE-CHECK MODEL DEPENDENCIES # ################################### # The entity import order. It is defined by the database model relationships. model_order = (USER_ENTITY_NAME, COMPUTER_ENTITY_NAME, NODE_ENTITY_NAME, GROUP_ENTITY_NAME, LOG_ENTITY_NAME, COMMENT_ENTITY_NAME) for import_field_name in metadata['all_fields_info']: if import_field_name not in model_order: raise exceptions.ImportValidationError( "You are trying to import an unknown model '{}'!".format( import_field_name)) for idx, model_name in enumerate(model_order): dependencies = [] for field in metadata['all_fields_info'][model_name].values(): try: dependencies.append(field['requires']) except KeyError: # (No ForeignKey) pass for dependency in dependencies: if dependency not in model_order[:idx]: raise exceptions.ArchiveImportError( 'Model {} requires {} but would be loaded first; stopping...' .format(model_name, dependency)) ################################################### # CREATE IMPORT DATA DIRECT UNIQUE_FIELD MAPPINGS # ################################################### import_unique_ids_mappings = {} for model_name, import_data in data['export_data'].items(): if model_name in metadata['unique_identifiers']: # I have to reconvert the pk to integer import_unique_ids_mappings[model_name] = { int(k): v[metadata['unique_identifiers'][model_name]] for k, v in import_data.items() } ############### # IMPORT DATA # ############### # DO ALL WITH A TRANSACTION # batch size for bulk create operations batch_size = get_config_option('db.batch_size') with transaction.atomic(): foreign_ids_reverse_mappings = {} new_entries = {} existing_entries = {} # I first generate the list of data for model_name in model_order: cls_signature = entity_names_to_signatures[model_name] model = get_object_from_string(cls_signature) fields_info = metadata['all_fields_info'].get(model_name, {}) unique_identifier = metadata['unique_identifiers'].get( model_name, None) new_entries[model_name] = {} existing_entries[model_name] = {} foreign_ids_reverse_mappings[model_name] = {} # Not necessarily all models are exported if model_name in data['export_data']: # skip nodes that are already present in the DB if unique_identifier is not None: import_unique_ids = set( v[unique_identifier] for v in data['export_data'][model_name].values()) relevant_db_entries_result = model.objects.filter(**{ '{}__in'.format(unique_identifier): import_unique_ids }) # Note: uuids need to be converted to strings relevant_db_entries = { str(getattr(n, unique_identifier)): n for n in relevant_db_entries_result } foreign_ids_reverse_mappings[model_name] = { k: v.pk for k, v in relevant_db_entries.items() } for key, value in data['export_data'][ model_name].items(): if value[ unique_identifier] in relevant_db_entries.keys( ): # Already in DB existing_entries[model_name][key] = value else: # To be added new_entries[model_name][key] = value else: new_entries[model_name] = data['export_data'][ model_name].copy() # Show Comment mode if not silent if not silent: print('Comment mode: {}'.format(comment_mode)) # I import data from the given model for model_name in model_order: cls_signature = entity_names_to_signatures[model_name] model = get_object_from_string(cls_signature) fields_info = metadata['all_fields_info'].get(model_name, {}) unique_identifier = metadata['unique_identifiers'].get( model_name, None) # EXISTING ENTRIES for import_entry_pk, entry_data in existing_entries[ model_name].items(): unique_id = entry_data[unique_identifier] existing_entry_id = foreign_ids_reverse_mappings[ model_name][unique_id] import_data = dict( deserialize_field(k, v, fields_info=fields_info, import_unique_ids_mappings= import_unique_ids_mappings, foreign_ids_reverse_mappings= foreign_ids_reverse_mappings) for k, v in entry_data.items()) # TODO COMPARE, AND COMPARE ATTRIBUTES if model is models.DbComment: new_entry_uuid = merge_comment(import_data, comment_mode) if new_entry_uuid is not None: entry_data[unique_identifier] = new_entry_uuid new_entries[model_name][ import_entry_pk] = entry_data if model_name not in ret_dict: ret_dict[model_name] = {'new': [], 'existing': []} ret_dict[model_name]['existing'].append( (import_entry_pk, existing_entry_id)) if not silent: print('existing %s: %s (%s->%s)' % (model_name, unique_id, import_entry_pk, existing_entry_id)) # print(" `-> WARNING: NO DUPLICITY CHECK DONE!") # CHECK ALSO FILES! # Store all objects for this model in a list, and store them all in once at the end. objects_to_create = [] # This is needed later to associate the import entry with the new pk import_new_entry_pks = {} imported_comp_names = set() # NEW ENTRIES for import_entry_pk, entry_data in new_entries[ model_name].items(): unique_id = entry_data[unique_identifier] import_data = dict( deserialize_field(k, v, fields_info=fields_info, import_unique_ids_mappings= import_unique_ids_mappings, foreign_ids_reverse_mappings= foreign_ids_reverse_mappings) for k, v in entry_data.items()) if model is models.DbGroup: # Check if there is already a group with the same name dupl_counter = 0 orig_label = import_data['label'] while model.objects.filter(label=import_data['label']): import_data[ 'label'] = orig_label + DUPL_SUFFIX.format( dupl_counter) dupl_counter += 1 if dupl_counter == 100: raise exceptions.ImportUniquenessError( 'A group of that label ( {} ) already exists and I could not create a new one' ''.format(orig_label)) elif model is models.DbComputer: # Check if there is already a computer with the same name in the database dupl = (model.objects.filter(name=import_data['name']) or import_data['name'] in imported_comp_names) orig_name = import_data['name'] dupl_counter = 0 while dupl: # Rename the new computer import_data['name'] = ( orig_name + DUPL_SUFFIX.format(dupl_counter)) dupl = ( model.objects.filter(name=import_data['name']) or import_data['name'] in imported_comp_names) dupl_counter += 1 if dupl_counter == 100: raise exceptions.ImportUniquenessError( 'A computer of that name ( {} ) already exists and I could not create a new one' ''.format(orig_name)) imported_comp_names.add(import_data['name']) objects_to_create.append(model(**import_data)) import_new_entry_pks[unique_id] = import_entry_pk if model_name == NODE_ENTITY_NAME: if not silent: print('STORING NEW NODE REPOSITORY FILES...') # NEW NODES for object_ in objects_to_create: import_entry_uuid = object_.uuid import_entry_pk = import_new_entry_pks[ import_entry_uuid] # Before storing entries in the DB, I store the files (if these are nodes). # Note: only for new entries! subfolder = folder.get_subfolder( os.path.join(NODES_EXPORT_SUBFOLDER, export_shard_uuid(import_entry_uuid))) if not subfolder.exists(): raise exceptions.CorruptArchive( 'Unable to find the repository folder for Node with UUID={} in the exported ' 'file'.format(import_entry_uuid)) destdir = RepositoryFolder( section=Repository._section_name, uuid=import_entry_uuid) # Replace the folder, possibly destroying existing previous folders, and move the files # (faster if we are on the same filesystem, and in any case the source is a SandboxFolder) destdir.replace_with_folder(subfolder.abspath, move=True, overwrite=True) # For DbNodes, we also have to store its attributes if not silent: print('STORING NEW NODE ATTRIBUTES...') # Get attributes from import file try: object_.attributes = data['node_attributes'][str( import_entry_pk)] except KeyError: raise exceptions.CorruptArchive( 'Unable to find attribute info for Node with UUID={}' .format(import_entry_uuid)) # For DbNodes, we also have to store its extras if extras_mode_new == 'import': if not silent: print('STORING NEW NODE EXTRAS...') # Get extras from import file try: extras = data['node_extras'][str( import_entry_pk)] except KeyError: raise exceptions.CorruptArchive( 'Unable to find extra info for Node with UUID={}' .format(import_entry_uuid)) # TODO: remove when aiida extras will be moved somewhere else # from here extras = { key: value for key, value in extras.items() if not key.startswith('_aiida_') } if object_.node_type.endswith('code.Code.'): extras = { key: value for key, value in extras.items() if not key == 'hidden' } # till here object_.extras = extras elif extras_mode_new == 'none': if not silent: print('SKIPPING NEW NODE EXTRAS...') else: raise exceptions.ImportValidationError( "Unknown extras_mode_new value: {}, should be either 'import' or 'none'" ''.format(extras_mode_new)) # EXISTING NODES (Extras) # For the existing nodes that are also in the imported list we also update their extras if necessary if not silent: print( 'UPDATING EXISTING NODE EXTRAS (mode: {})'.format( extras_mode_existing)) import_existing_entry_pks = { entry_data[unique_identifier]: import_entry_pk for import_entry_pk, entry_data in existing_entries[model_name].items() } for node in models.DbNode.objects.filter( uuid__in=import_existing_entry_pks).all(): # pylint: disable=no-member import_entry_uuid = str(node.uuid) import_entry_pk = import_existing_entry_pks[ import_entry_uuid] # Get extras from import file try: extras = data['node_extras'][str(import_entry_pk)] except KeyError: raise exceptions.CorruptArchive( 'Unable to find extra info for ode with UUID={}' .format(import_entry_uuid)) # TODO: remove when aiida extras will be moved somewhere else # from here extras = { key: value for key, value in extras.items() if not key.startswith('_aiida_') } if node.node_type.endswith('code.Code.'): extras = { key: value for key, value in extras.items() if not key == 'hidden' } # till here node.extras = merge_extras(node.extras, extras, extras_mode_existing) # Already saving existing node here to update its extras node.save() # If there is an mtime in the field, disable the automatic update # to keep the mtime that we have set here if 'mtime' in [ field.name for field in model._meta.local_fields ]: with models.suppress_auto_now([(model, ['mtime'])]): # Store them all in once; however, the PK are not set in this way... model.objects.bulk_create(objects_to_create, batch_size=batch_size) else: model.objects.bulk_create(objects_to_create, batch_size=batch_size) # Get back the just-saved entries just_saved_queryset = model.objects.filter( **{ '{}__in'.format(unique_identifier): import_new_entry_pks.keys() }).values_list(unique_identifier, 'pk') # note: convert uuids from type UUID to strings just_saved = { str(key): value for key, value in just_saved_queryset } # Now I have the PKs, print the info # Moreover, add newly created Nodes to foreign_ids_reverse_mappings for unique_id, new_pk in just_saved.items(): import_entry_pk = import_new_entry_pks[unique_id] foreign_ids_reverse_mappings[model_name][ unique_id] = new_pk if model_name not in ret_dict: ret_dict[model_name] = {'new': [], 'existing': []} ret_dict[model_name]['new'].append( (import_entry_pk, new_pk)) if not silent: print('NEW %s: %s (%s->%s)' % (model_name, unique_id, import_entry_pk, new_pk)) if not silent: print('STORING NODE LINKS...') import_links = data['links_uuid'] links_to_store = [] # Needed, since QueryBuilder does not yet work for recently saved Nodes existing_links_raw = models.DbLink.objects.all().values_list( 'input', 'output', 'label', 'type') existing_links = {(l[0], l[1], l[2], l[3]) for l in existing_links_raw} existing_outgoing_unique = {(l[0], l[3]) for l in existing_links_raw} existing_outgoing_unique_pair = {(l[0], l[2], l[3]) for l in existing_links_raw} existing_incoming_unique = {(l[1], l[3]) for l in existing_links_raw} existing_incoming_unique_pair = {(l[1], l[2], l[3]) for l in existing_links_raw} calculation_node_types = 'process.calculation.' workflow_node_types = 'process.workflow.' data_node_types = 'data.' link_mapping = { LinkType.CALL_CALC: (workflow_node_types, calculation_node_types, 'unique_triple', 'unique'), LinkType.CALL_WORK: (workflow_node_types, workflow_node_types, 'unique_triple', 'unique'), LinkType.CREATE: (calculation_node_types, data_node_types, 'unique_pair', 'unique'), LinkType.INPUT_CALC: (data_node_types, calculation_node_types, 'unique_triple', 'unique_pair'), LinkType.INPUT_WORK: (data_node_types, workflow_node_types, 'unique_triple', 'unique_pair'), LinkType.RETURN: (workflow_node_types, data_node_types, 'unique_pair', 'unique_triple'), } for link in import_links: # Check for dangling Links within the, supposed, self-consistent archive try: in_id = foreign_ids_reverse_mappings[NODE_ENTITY_NAME][ link['input']] out_id = foreign_ids_reverse_mappings[NODE_ENTITY_NAME][ link['output']] except KeyError: if ignore_unknown_nodes: continue raise exceptions.ImportValidationError( 'Trying to create a link with one or both unknown nodes, stopping (in_uuid={}, out_uuid={}, ' 'label={}, type={})'.format(link['input'], link['output'], link['label'], link['type'])) # Check if link already exists, skip if it does # This is equivalent to an existing triple link (i.e. unique_triple from below) if (in_id, out_id, link['label'], link['type']) in existing_links: continue # Since backend specific Links (DbLink) are not validated upon creation, we will now validate them. try: validate_link_label(link['label']) except ValueError as why: raise exceptions.ImportValidationError( 'Error during Link label validation: {}'.format(why)) source = models.DbNode.objects.get(id=in_id) target = models.DbNode.objects.get(id=out_id) if source.uuid == target.uuid: raise exceptions.ImportValidationError( 'Cannot add a link to oneself') link_type = LinkType(link['type']) type_source, type_target, outdegree, indegree = link_mapping[ link_type] # Check if source Node is a valid type if not source.node_type.startswith(type_source): raise exceptions.ImportValidationError( 'Cannot add a {} link from {} to {}'.format( link_type, source.node_type, target.node_type)) # Check if target Node is a valid type if not target.node_type.startswith(type_target): raise exceptions.ImportValidationError( 'Cannot add a {} link from {} to {}'.format( link_type, source.node_type, target.node_type)) # If the outdegree is `unique` there cannot already be any other outgoing link of that type, # i.e., the source Node may not have a LinkType of current LinkType, going out, existing already. if outdegree == 'unique' and ( in_id, link['type']) in existing_outgoing_unique: raise exceptions.ImportValidationError( 'Node<{}> already has an outgoing {} link'.format( source.uuid, link_type)) # If the outdegree is `unique_pair`, # then the link labels for outgoing links of this type should be unique, # i.e., the source Node may not have a LinkType of current LinkType, going out, # that also has the current Link label, existing already. elif outdegree == 'unique_pair' and \ (in_id, link['label'], link['type']) in existing_outgoing_unique_pair: raise exceptions.ImportValidationError( 'Node<{}> already has an outgoing {} link with label "{}"' .format(source.uuid, link_type, link['label'])) # If the indegree is `unique` there cannot already be any other incoming links of that type, # i.e., the target Node may not have a LinkType of current LinkType, coming in, existing already. if indegree == 'unique' and ( out_id, link['type']) in existing_incoming_unique: raise exceptions.ImportValidationError( 'Node<{}> already has an incoming {} link'.format( target.uuid, link_type)) # If the indegree is `unique_pair`, # then the link labels for incoming links of this type should be unique, # i.e., the target Node may not have a LinkType of current LinkType, coming in # that also has the current Link label, existing already. elif indegree == 'unique_pair' and \ (out_id, link['label'], link['type']) in existing_incoming_unique_pair: raise exceptions.ImportValidationError( 'Node<{}> already has an incoming {} link with label "{}"' .format(target.uuid, link_type, link['label'])) # New link links_to_store.append( models.DbLink(input_id=in_id, output_id=out_id, label=link['label'], type=link['type'])) if 'Link' not in ret_dict: ret_dict['Link'] = {'new': []} ret_dict['Link']['new'].append((in_id, out_id)) # Add new Link to sets of existing Links 'input PK', 'output PK', 'label', 'type' existing_links.add( (in_id, out_id, link['label'], link['type'])) existing_outgoing_unique.add((in_id, link['type'])) existing_outgoing_unique_pair.add( (in_id, link['label'], link['type'])) existing_incoming_unique.add((out_id, link['type'])) existing_incoming_unique_pair.add( (out_id, link['label'], link['type'])) # Store new links if links_to_store: if not silent: print(' ({} new links...)'.format(len(links_to_store))) models.DbLink.objects.bulk_create(links_to_store, batch_size=batch_size) else: if not silent: print(' (0 new links...)') if not silent: print('STORING GROUP ELEMENTS...') import_groups = data['groups_uuid'] for groupuuid, groupnodes in import_groups.items(): # TODO: cache these to avoid too many queries group_ = models.DbGroup.objects.get(uuid=groupuuid) nodes_to_store = [ foreign_ids_reverse_mappings[NODE_ENTITY_NAME][node_uuid] for node_uuid in groupnodes ] if nodes_to_store: group_.dbnodes.add(*nodes_to_store) ###################################################### # Put everything in a specific group ###################################################### existing = existing_entries.get(NODE_ENTITY_NAME, {}) existing_pk = [ foreign_ids_reverse_mappings[NODE_ENTITY_NAME][v['uuid']] for v in existing.values() ] new = new_entries.get(NODE_ENTITY_NAME, {}) new_pk = [ foreign_ids_reverse_mappings[NODE_ENTITY_NAME][v['uuid']] for v in new.values() ] pks_for_group = existing_pk + new_pk # So that we do not create empty groups if pks_for_group: # If user specified a group, import all things into it if not group: # Get an unique name for the import group, based on the current (local) time basename = timezone.localtime( timezone.now()).strftime('%Y%m%d-%H%M%S') counter = 0 group_label = basename while Group.objects.find(filters={'label': group_label}): counter += 1 group_label = '{}_{}'.format(basename, counter) if counter == 100: raise exceptions.ImportUniquenessError( "Overflow of import groups (more than 100 import groups exists with basename '{}')" ''.format(basename)) group = ImportGroup(label=group_label).store() # Add all the nodes to the new group # TODO: decide if we want to return the group label nodes = [ entry[0] for entry in QueryBuilder().append(Node, filters={ 'id': { 'in': pks_for_group } }).all() ] group.add_nodes(nodes) if not silent: print( "IMPORTED NODES ARE GROUPED IN THE IMPORT GROUP LABELED '{}'" .format(group.label)) else: if not silent: print( 'NO NODES TO IMPORT, SO NO GROUP CREATED, IF IT DID NOT ALREADY EXIST' ) if not silent: print('DONE.') return ret_dict
async def task_upload_job(process: 'CalcJob', transport_queue: TransportQueue, cancellable: InterruptableFuture): """Transport task that will attempt to upload the files of a job calculation to the remote. The task will first request a transport from the queue. Once the transport is yielded, the relevant execmanager function is called, wrapped in the exponential_backoff_retry coroutine, which, in case of a caught exception, will retry after an interval that increases exponentially with the number of retries, for a maximum number of retries. If all retries fail, the task will raise a TransportTaskException :param process: the job calculation :param transport_queue: the TransportQueue from which to request a Transport :param cancellable: the cancelled flag that will be queried to determine whether the task was cancelled :raises: TransportTaskException if after the maximum number of retries the transport task still excepted """ node = process.node if node.get_state() == CalcJobState.SUBMITTING: logger.warning( f'CalcJob<{node.pk}> already marked as SUBMITTING, skipping task_update_job' ) return initial_interval = get_config_option(RETRY_INTERVAL_OPTION) max_attempts = get_config_option(MAX_ATTEMPTS_OPTION) authinfo = node.get_authinfo() async def do_upload(): with transport_queue.request_transport(authinfo) as request: transport = await cancellable.with_interrupt(request) with SandboxFolder() as folder: # Any exception thrown in `presubmit` call is not transient so we circumvent the exponential backoff try: calc_info = process.presubmit(folder) except Exception as exception: # pylint: disable=broad-except raise PreSubmitException( 'exception occurred in presubmit call') from exception else: execmanager.upload_calculation(node, transport, calc_info, folder) skip_submit = calc_info.skip_submit or False return skip_submit try: logger.info(f'scheduled request to upload CalcJob<{node.pk}>') ignore_exceptions = (plumpy.futures.CancelledError, PreSubmitException, plumpy.process_states.Interruption) skip_submit = await exponential_backoff_retry( do_upload, initial_interval, max_attempts, logger=node.logger, ignore_exceptions=ignore_exceptions) except PreSubmitException: raise except (plumpy.futures.CancelledError, plumpy.process_states.Interruption): raise except Exception as exception: logger.warning(f'uploading CalcJob<{node.pk}> failed') raise TransportTaskException( f'upload_calculation failed {max_attempts} times consecutively' ) from exception else: logger.info(f'uploading CalcJob<{node.pk}> successful') node.set_state(CalcJobState.SUBMITTING) return skip_submit
async def task_retrieve_job(node: CalcJobNode, transport_queue: TransportQueue, retrieved_temporary_folder: str, cancellable: InterruptableFuture): """Transport task that will attempt to retrieve all files of a completed job calculation. The task will first request a transport from the queue. Once the transport is yielded, the relevant execmanager function is called, wrapped in the exponential_backoff_retry coroutine, which, in case of a caught exception, will retry after an interval that increases exponentially with the number of retries, for a maximum number of retries. If all retries fail, the task will raise a TransportTaskException :param node: the node that represents the job calculation :param transport_queue: the TransportQueue from which to request a Transport :param retrieved_temporary_folder: the absolute path to a directory to store files :param cancellable: the cancelled flag that will be queried to determine whether the task was cancelled :raises: TransportTaskException if after the maximum number of retries the transport task still excepted """ if node.get_state() == CalcJobState.PARSING: logger.warning( f'CalcJob<{node.pk}> already marked as PARSING, skipping task_retrieve_job' ) return initial_interval = get_config_option(RETRY_INTERVAL_OPTION) max_attempts = get_config_option(MAX_ATTEMPTS_OPTION) authinfo = node.get_authinfo() async def do_retrieve(): with transport_queue.request_transport(authinfo) as request: transport = await cancellable.with_interrupt(request) # Perform the job accounting and set it on the node if successful. If the scheduler does not implement this # still set the attribute but set it to `None`. This way we can distinguish calculation jobs for which the # accounting was called but could not be set. scheduler = node.computer.get_scheduler( ) # type: ignore[union-attr] scheduler.set_transport(transport) try: detailed_job_info = scheduler.get_detailed_job_info( node.get_job_id()) except FeatureNotAvailable: logger.info( f'detailed job info not available for scheduler of CalcJob<{node.pk}>' ) node.set_detailed_job_info(None) else: node.set_detailed_job_info(detailed_job_info) return execmanager.retrieve_calculation( node, transport, retrieved_temporary_folder) try: logger.info(f'scheduled request to retrieve CalcJob<{node.pk}>') ignore_exceptions = (plumpy.futures.CancelledError, plumpy.process_states.Interruption) result = await exponential_backoff_retry( do_retrieve, initial_interval, max_attempts, logger=node.logger, ignore_exceptions=ignore_exceptions) except (plumpy.futures.CancelledError, plumpy.process_states.Interruption): # pylint: disable=try-except-raise raise except Exception as exception: logger.warning(f'retrieving CalcJob<{node.pk}> failed') raise TransportTaskException( f'retrieve_calculation failed {max_attempts} times consecutively' ) from exception else: node.set_state(CalcJobState.PARSING) logger.info(f'retrieving CalcJob<{node.pk}> successful') return result
async def task_update_job(node: CalcJobNode, job_manager, cancellable: InterruptableFuture): """Transport task that will attempt to update the scheduler status of the job calculation. The task will first request a transport from the queue. Once the transport is yielded, the relevant execmanager function is called, wrapped in the exponential_backoff_retry coroutine, which, in case of a caught exception, will retry after an interval that increases exponentially with the number of retries, for a maximum number of retries. If all retries fail, the task will raise a TransportTaskException :param node: the node that represents the job calculation :type node: :class:`aiida.orm.nodes.process.calculation.calcjob.CalcJobNode` :param job_manager: The job manager :type job_manager: :class:`aiida.engine.processes.calcjobs.manager.JobManager` :param cancellable: A cancel flag :type cancellable: :class:`aiida.engine.utils.InterruptableFuture` :return: True if the tasks was successfully completed, False otherwise """ state = node.get_state() if state in [CalcJobState.RETRIEVING, CalcJobState.STASHING]: logger.warning( f'CalcJob<{node.pk}> already marked as `{state}`, skipping task_update_job' ) return True initial_interval = get_config_option(RETRY_INTERVAL_OPTION) max_attempts = get_config_option(MAX_ATTEMPTS_OPTION) authinfo = node.get_authinfo() job_id = node.get_job_id() async def do_update(): # Get the update request with job_manager.request_job_info_update(authinfo, job_id) as update_request: job_info = await cancellable.with_interrupt(update_request) if job_info is None: # If the job is computed or not found assume it's done node.set_scheduler_state(JobState.DONE) job_done = True else: node.set_last_job_info(job_info) node.set_scheduler_state(job_info.job_state) job_done = job_info.job_state == JobState.DONE return job_done try: logger.info(f'scheduled request to update CalcJob<{node.pk}>') ignore_exceptions = (plumpy.futures.CancelledError, plumpy.process_states.Interruption) job_done = await exponential_backoff_retry( do_update, initial_interval, max_attempts, logger=node.logger, ignore_exceptions=ignore_exceptions) except (plumpy.futures.CancelledError, plumpy.process_states.Interruption): # pylint: disable=try-except-raise raise except Exception as exception: logger.warning(f'updating CalcJob<{node.pk}> failed') raise TransportTaskException( f'update_calculation failed {max_attempts} times consecutively' ) from exception else: logger.info(f'updating CalcJob<{node.pk}> successful') if job_done: node.set_state(CalcJobState.STASHING) return job_done
value = get_random_string(16) return value SETUP_PROFILE = options.OverridableOption( '--profile', prompt='Profile name', help='The name of the new profile.', required=True, type=types.ProfileParamType(cannot_exist=True), cls=options.interactive.InteractiveOption) SETUP_USER_EMAIL = options.USER_EMAIL.clone( prompt='Email Address (for sharing data)', default=get_config_option('user.email'), required_fn=lambda x: get_config_option('user.email') is None, required=True, cls=options.interactive.InteractiveOption) SETUP_USER_FIRST_NAME = options.USER_FIRST_NAME.clone( prompt='First name', default=get_config_option('user.first_name'), required_fn=lambda x: get_config_option('user.first_name') is None, required=True, cls=options.interactive.InteractiveOption) SETUP_USER_LAST_NAME = options.USER_LAST_NAME.clone( prompt='Last name', default=get_config_option('user.last_name'), required_fn=lambda x: get_config_option('user.last_name') is None,
u'For further information please visit http://www.aiida.net/. All rights reserved.' ) __license__ = 'MIT license, see LICENSE.txt file.' __version__ = '1.0.1' __authors__ = 'The AiiDA team.' __paper__ = ( u'G. Pizzi, A. Cepellotti, R. Sabatini, N. Marzari, and B. Kozinsky,' u'"AiiDA: automated interactive infrastructure and database for computational science", ' u'Comp. Mat. Sci 111, 218-230 (2016); https://doi.org/10.1016/j.commatsci.2015.09.013 ' u'- http://www.aiida.net.') __paper_short__ = 'G. Pizzi et al., Comp. Mat. Sci 111, 218 (2016).' # Configure the default logging configure_logging() if get_config_option('warnings.showdeprecations'): # If the user does not want to get AiiDA deprecation warnings, we disable them - this can be achieved with:: # verdi config warnings.showdeprecations False # Note that the AiidaDeprecationWarning does NOT inherit from DeprecationWarning warnings.simplefilter('default', AiidaDeprecationWarning) # pylint: disable=no-member # This should default to 'once', i.e. once per different message else: warnings.simplefilter('ignore', AiidaDeprecationWarning) # pylint: disable=no-member if six.PY2: warnings.warn('python 2 will be deprecated in `aiida-core v2.0.0`', DeprecationWarning) # pylint: disable=no-member def load_dbenv(profile=None): """Alias for `load_dbenv` from `aiida.backends.utils`
def import_data_dj(in_path: str, group: Optional[Group] = None, ignore_unknown_nodes: bool = False, extras_mode_existing: str = 'kcl', extras_mode_new: str = 'import', comment_mode: str = 'newest', silent: Optional[bool] = None, **kwargs: Any): # pylint: disable=unused-argument """Import exported AiiDA archive to the AiiDA database and repository. Specific for the Django backend. If ``in_path`` is a folder, calls extract_tree; otherwise, tries to detect the compression format (zip, tar.gz, tar.bz2, ...) and calls the correct function. :param in_path: the path to a file or folder that can be imported in AiiDA. :type in_path: str :param group: Group wherein all imported Nodes will be placed. :type group: :py:class:`~aiida.orm.groups.Group` :param extras_mode_existing: 3 letter code that will identify what to do with the extras import. The first letter acts on extras that are present in the original node and not present in the imported node. Can be either: 'k' (keep it) or 'n' (do not keep it). The second letter acts on the imported extras that are not present in the original node. Can be either: 'c' (create it) or 'n' (do not create it). The third letter defines what to do in case of a name collision. Can be either: 'l' (leave the old value), 'u' (update with a new value), 'd' (delete the extra), or 'a' (ask what to do if the content is different). :type extras_mode_existing: str :param extras_mode_new: 'import' to import extras of new nodes or 'none' to ignore them. :type extras_mode_new: str :param comment_mode: Comment import nodes (when same UUIDs are found). Can be either: 'newest' (will keep the Comment with the most recent modification time (mtime)) or 'overwrite' (will overwrite existing Comments with the ones from the import file). :type comment_mode: str :return: New and existing Nodes and Links. :rtype: dict :raises `~aiida.tools.importexport.common.exceptions.ImportValidationError`: if parameters or the contents of `metadata.json` or `data.json` can not be validated. :raises `~aiida.tools.importexport.common.exceptions.CorruptArchive`: if the provided archive at ``in_path`` is corrupted. :raises `~aiida.tools.importexport.common.exceptions.IncompatibleArchiveVersionError`: if the provided archive's export version is not equal to the export version of AiiDA at the moment of import. :raises `~aiida.tools.importexport.common.exceptions.ArchiveImportError`: if there are any internal errors when importing. :raises `~aiida.tools.importexport.common.exceptions.ImportUniquenessError`: if a new unique entity can not be created. """ # Initial check(s) if silent is not None: warnings.warn( 'silent keyword is deprecated and will be removed in AiiDA v2.0.0, set the logger level explicitly instead', AiidaDeprecationWarning) # pylint: disable=no-member if extras_mode_new not in ['import', 'none']: raise exceptions.ImportValidationError( f"Unknown extras_mode_new value: {extras_mode_new}, should be either 'import' or 'none'" ) reader_cls = get_reader(detect_archive_type(in_path)) if group: if not isinstance(group, Group): raise exceptions.ImportValidationError( 'group must be a Group entity') elif not group.is_stored: group.store() # The returned dictionary with new and existing nodes and links # entity_name -> new or existing -> list pk ret_dict: Dict[str, dict] = {} with reader_cls(in_path) as reader: IMPORT_LOGGER.debug('Checking archive version compatibility') reader.check_version() start_summary(in_path, comment_mode, extras_mode_new, extras_mode_existing) ########################################################################## # CREATE UUID REVERSE TABLES AND CHECK IF I HAVE ALL NODES FOR THE LINKS # ########################################################################## IMPORT_LOGGER.debug( 'CHECKING IF NODES FROM LINKS ARE IN DB OR ARCHIVE...') linked_nodes = set( chain.from_iterable( (l['input'], l['output']) for l in reader.iter_link_data())) group_nodes = set( chain.from_iterable( (uuids for _, uuids in reader.iter_group_uuids()))) # Check that UUIDs are valid linked_nodes = set(x for x in linked_nodes if validate_uuid(x)) group_nodes = set(x for x in group_nodes if validate_uuid(x)) import_nodes_uuid = set(v for v in reader.iter_node_uuids()) # the combined set of linked_nodes and group_nodes was obtained from looking at all the links # the set of import_nodes_uuid was received from the stuff actually referred to in export_data unknown_nodes = linked_nodes.union(group_nodes) - import_nodes_uuid if unknown_nodes and not ignore_unknown_nodes: raise exceptions.DanglingLinkError( 'The import file refers to {} nodes with unknown UUID, therefore it cannot be imported. Either first ' 'import the unknown nodes, or export also the parents when exporting. The unknown UUIDs are:\n' ''.format(len(unknown_nodes)) + '\n'.join('* {}'.format(uuid) for uuid in unknown_nodes)) ################################### # DOUBLE-CHECK MODEL DEPENDENCIES # ################################### # The entity import order. It is defined by the database model relationships. entity_order = (USER_ENTITY_NAME, COMPUTER_ENTITY_NAME, NODE_ENTITY_NAME, GROUP_ENTITY_NAME, LOG_ENTITY_NAME, COMMENT_ENTITY_NAME) for entity_name in reader.entity_names: if entity_name not in entity_order: raise exceptions.ImportValidationError( f"You are trying to import an unknown model '{entity_name}'!" ) for idx, entity_name in enumerate(entity_order): dependencies = [] for field in reader.metadata.all_fields_info[entity_name].values(): try: dependencies.append(field['requires']) except KeyError: # (No ForeignKey) pass for dependency in dependencies: if dependency not in entity_order[:idx]: raise exceptions.ArchiveImportError( f'Entity {entity_name} requires {dependency} but would be loaded first; stopping...' ) IMPORT_LOGGER.debug('CREATING PK-2-UUID/EMAIL MAPPING...') # entity_name -> pk -> unique id import_unique_ids_mappings: Dict[str, Dict[int, str]] = {} for entity_name, identifier in reader.metadata.unique_identifiers.items( ): import_unique_ids_mappings[entity_name] = { int(k): f[identifier] for k, f in reader.iter_entity_fields(entity_name, fields=(identifier, )) } # count total number of entities to import number_of_entities: int = sum( reader.entity_count(entity_name) for entity_name in entity_order) IMPORT_LOGGER.debug('Importing %s entities', number_of_entities) ########################################### # IMPORT ALL DATA IN A SINGLE TRANSACTION # ########################################### from django.db import transaction # pylint: disable=import-error,no-name-in-module # batch size for bulk create operations batch_size: int = get_config_option('db.batch_size') with transaction.atomic(): # entity_name -> str(pk) -> fields new_entries: Dict[str, Dict[str, dict]] = {} existing_entries: Dict[str, Dict[str, dict]] = {} # entity_name -> identifier -> pk foreign_ids_reverse_mappings: Dict[str, Dict[str, int]] = {} IMPORT_LOGGER.debug('ASSESSING IMPORT DATA...') for entity_name in entity_order: _select_entity_data( entity_name=entity_name, reader=reader, new_entries=new_entries, existing_entries=existing_entries, foreign_ids_reverse_mappings=foreign_ids_reverse_mappings, extras_mode_new=extras_mode_new, ) IMPORT_LOGGER.debug('STORING ENTITIES...') for entity_name in entity_order: _store_entity_data( reader=reader, entity_name=entity_name, comment_mode=comment_mode, extras_mode_existing=extras_mode_existing, new_entries=new_entries, existing_entries=existing_entries, foreign_ids_reverse_mappings=foreign_ids_reverse_mappings, import_unique_ids_mappings=import_unique_ids_mappings, ret_dict=ret_dict, batch_size=batch_size, # session=session ) # store all pks to add to import group pks_for_group: List[int] = [ foreign_ids_reverse_mappings[NODE_ENTITY_NAME][v['uuid']] for entries in [existing_entries, new_entries] for v in entries.get(NODE_ENTITY_NAME, {}).values() ] # now delete the entity data because we no longer need it del existing_entries del new_entries IMPORT_LOGGER.debug('STORING NODE LINKS...') _store_node_links( reader=reader, ignore_unknown_nodes=ignore_unknown_nodes, foreign_ids_reverse_mappings=foreign_ids_reverse_mappings, ret_dict=ret_dict, batch_size=batch_size, # session=session ) IMPORT_LOGGER.debug('STORING GROUP ELEMENTS...') _add_nodes_to_groups( group_count=reader.entity_count(GROUP_ENTITY_NAME), group_uuids=reader.iter_group_uuids(), foreign_ids_reverse_mappings=foreign_ids_reverse_mappings) ###################################### # Put everything in a specific group # ###################################### # Note this is done in a separate transaction group = _make_import_group(group=group, node_pks=pks_for_group) # Summarize import result_summary(ret_dict, getattr(group, 'label', None)) return ret_dict