Esempio n. 1
0
def get_use_cache(*, identifier=None):
    """Return whether the caching mechanism should be used for the given process type according to the configuration.

    :param identifier: Process type string of the node
    :type identifier: str
    :return: boolean, True if caching is enabled, False otherwise
    :raises: `~aiida.common.exceptions.ConfigurationError` if the configuration is invalid, either due to a general
        configuration error, or by defining the class both enabled and disabled
    """
    type_check(identifier, str, allow_none=True)

    if identifier is not None:
        type_check(identifier, str)

        enable_matches = [
            pattern for pattern in _CONFIG[ConfigKeys.ENABLED.value]
            if _match_wildcard(string=identifier, pattern=pattern)
        ]
        disable_matches = [
            pattern for pattern in _CONFIG[ConfigKeys.DISABLED.value]
            if _match_wildcard(string=identifier, pattern=pattern)
        ]

        if enable_matches and disable_matches:
            # If both enable and disable have matching identifier, we search for
            # the most specific one. This is determined by checking whether
            # all other patterns match the specific pattern.
            PatternWithResult = namedtuple('PatternWithResult', ['pattern', 'use_cache'])
            most_specific = []
            for specific_pattern in enable_matches:
                if all(
                    _match_wildcard(string=specific_pattern, pattern=other_pattern)
                    for other_pattern in enable_matches + disable_matches
                ):
                    most_specific.append(PatternWithResult(pattern=specific_pattern, use_cache=True))
            for specific_pattern in disable_matches:
                if all(
                    _match_wildcard(string=specific_pattern, pattern=other_pattern)
                    for other_pattern in enable_matches + disable_matches
                ):
                    most_specific.append(PatternWithResult(pattern=specific_pattern, use_cache=False))

            if len(most_specific) > 1:
                raise exceptions.ConfigurationError((
                    'Invalid configuration: multiple matches for identifier {}'
                    ', but the most specific identifier is not unique. Candidates: {}'
                ).format(identifier, [match.pattern for match in most_specific]))
            if not most_specific:
                raise exceptions.ConfigurationError(
                    'Invalid configuration: multiple matches for identifier {}, but none of them is most specific.'.
                    format(identifier)
                )
            return most_specific[0].use_cache
        if enable_matches:
            return True
        if disable_matches:
            return False
    return _CONFIG[ConfigKeys.DEFAULT.value]
Esempio n. 2
0
def _get_config(config_file):
    """Return the caching configuration.

    :param config_file: the absolute path to the caching configuration file
    :return: the configuration dictionary
    """
    from aiida.manage.configuration import get_profile
    from aiida.plugins.entry_point import is_valid_entry_point_string, load_entry_point_from_string

    profile = get_profile()

    if profile is None:
        exceptions.ConfigurationError('no profile has been loaded')

    try:
        with open(config_file, 'r', encoding='utf8') as handle:
            config = yaml.safe_load(handle)[profile.name]
    except (OSError, IOError, KeyError):
        # No config file, or no config for this profile
        return DEFAULT_CONFIG

    # Validate configuration
    for key in config:
        if key not in DEFAULT_CONFIG:
            raise ValueError(
                "Configuration error: Invalid key '{}' in cache_config.yml".
                format(key))

    # Add defaults where key is either completely missing or specifies no values in which case it will be `None`
    for key, default_config in DEFAULT_CONFIG.items():
        if key not in config or config[key] is None:
            config[key] = default_config

    # Validate the entry point identifiers
    for key in [ConfigKeys.ENABLED.value, ConfigKeys.DISABLED.value]:

        # If the key is defined in the file but contains no values, it will be `None`
        if config[key] is None:
            continue

        for identifier in config[key]:
            if not is_valid_entry_point_string(identifier):
                raise exceptions.ConfigurationError(
                    "entry point '{}' in 'cache_config.yml' is not a valid entry point string."
                    .format(identifier))

            try:
                load_entry_point_from_string(identifier)
            except exceptions.EntryPointError as exception:
                raise exceptions.ConfigurationError(
                    "entry point '{}' in 'cache_config.yml' can not be loaded: {}."
                    .format(identifier, exception))

    return config
Esempio n. 3
0
def _get_config(config_file):
    """Return the caching configuration.

    :param config_file: the absolute path to the caching configuration file
    :return: the configuration dictionary
    """
    from aiida.manage.configuration import get_profile

    profile = get_profile()

    if profile is None:
        exceptions.ConfigurationError('no profile has been loaded')

    try:
        with open(config_file, 'r', encoding='utf8') as handle:
            config = yaml.safe_load(handle)[profile.name]
    except (OSError, IOError, KeyError):
        # No config file, or no config for this profile
        return DEFAULT_CONFIG

    # Validate configuration
    for key in config:
        if key not in DEFAULT_CONFIG:
            raise exceptions.ConfigurationError(
                "Configuration error: Invalid key '{}' in cache_config.yml".
                format(key))

    # Add defaults where key is either completely missing or specifies no values in which case it will be `None`
    for key, default_config in DEFAULT_CONFIG.items():
        if key not in config or config[key] is None:
            config[key] = default_config

    try:
        type_check(config[ConfigKeys.DEFAULT.value], bool)
        type_check(config[ConfigKeys.ENABLED.value], list)
        type_check(config[ConfigKeys.DISABLED.value], list)
    except TypeError as exc:
        raise exceptions.ConfigurationError(
            'Invalid type in caching configuration file.') from exc

    # Check validity of enabled and disabled entries
    try:
        for identifier in config[ConfigKeys.ENABLED.value] + config[
                ConfigKeys.DISABLED.value]:
            _validate_identifier_pattern(identifier=identifier)
    except ValueError as exc:
        raise exceptions.ConfigurationError(
            'Invalid identifier pattern in enable or disable list.') from exc

    return config
Esempio n. 4
0
    def get(self, computer, user):
        """
        Return a AuthInfo given a computer and a user

        :param computer: a Computer instance
        :param user: a User instance
        :return: an AuthInfo object associated with the given computer and user
        :raise NotExistent: if the user is not configured to use computer
        :raise sqlalchemy.orm.exc.MultipleResultsFound: if the user is configured
            more than once to use the computer! Should never happen
        """
        from django.core.exceptions import ObjectDoesNotExist, MultipleObjectsReturned

        try:
            authinfo = DbAuthInfo.objects.get(dbcomputer=computer.dbcomputer,
                                              aiidauser=user.id)

            return self.from_dbmodel(authinfo)
        except ObjectDoesNotExist:
            raise exceptions.NotExistent(
                "The aiida user {} is not configured to use computer {}".
                format(user.email, computer.name))
        except MultipleObjectsReturned:
            raise exceptions.ConfigurationError(
                "The aiida user {} is configured more than once to use "
                "computer {}! Only one configuration is allowed".format(
                    user.email, computer.name))
Esempio n. 5
0
def load_config(create=False):
    """Instantiate Config object representing an AiiDA configuration file.

    Warning: Contrary to :func:`~aiida.manage.configuration.get_config`, this function is uncached and will always
    create a new Config object. You may want to call :func:`~aiida.manage.configuration.get_config` instead.

    :param create: if True, will create the configuration file if it does not already exist
    :type create: bool

    :return: the config
    :rtype: :class:`~aiida.manage.configuration.config.Config`
    :raises aiida.common.MissingConfigurationError: if the configuration file could not be found and create=False
    """
    import os
    from aiida.common import exceptions
    from .config import Config

    filepath = get_config_path()

    if not os.path.isfile(filepath) and not create:
        raise exceptions.MissingConfigurationError('configuration file {} does not exist'.format(filepath))

    try:
        config = Config.from_file(filepath)
    except ValueError:
        raise exceptions.ConfigurationError('configuration file {} contains invalid JSON'.format(filepath))

    return config
Esempio n. 6
0
    def get(self, computer, user):
        """
        Return a SqlaAuthInfo given a computer and a user

        :param computer: a Computer instance
        :param user: a User instance
        :return: an AuthInfo object associated with the given computer and user
        :raise NotExistent: if the user is not configured to use computer
        :raise sqlalchemy.orm.exc.MultipleResultsFound: if the user is configured
             more than once to use the computer! Should never happen
        """
        from aiida.backends.sqlalchemy.models.authinfo import DbAuthInfo
        from aiida.backends.sqlalchemy import get_scoped_session
        session = get_scoped_session()
        from sqlalchemy.orm.exc import MultipleResultsFound, NoResultFound

        try:
            authinfo = session.query(DbAuthInfo).filter_by(
                dbcomputer_id=computer.id,
                aiidauser_id=user.id,
            ).one()

            return self.from_dbmodel(authinfo)
        except NoResultFound:
            raise exceptions.NotExistent(
                "The aiida user {} is not configured to use computer {}".format(
                    user.email, computer.name))
        except MultipleResultsFound:
            raise exceptions.ConfigurationError(
                "The aiida user {} is configured more than once to use "
                "computer {}! Only one configuration is allowed".format(
                    user.email, computer.name))
Esempio n. 7
0
    def configure_repository(self):
        """Validates the configured repository and in the case of a file system repo makes sure the folder exists."""
        import errno

        try:
            os.makedirs(self.repository_path)
        except OSError as exception:
            if exception.errno != errno.EEXIST:
                raise exceptions.ConfigurationError(
                    'could not create the configured repository `{}`: {}'.
                    format(self.repository_path, str(exception)))
Esempio n. 8
0
    def get_transport_class(self):
        """
        Get the transport class for this computer.  Can be used to instantiate a transport instance.

        :return: the transport class
        """
        try:
            return TransportFactory(self.transport_type)
        except exceptions.EntryPointError as exception:
            raise exceptions.ConfigurationError(
                f'No transport found for {self.label} [type {self.transport_type}], message: {exception}'
            )
Esempio n. 9
0
    def _parse_repository_uri(self):
        """
        This function validates the REPOSITORY_URI, that should be in the format protocol://address

        :note: At the moment, only the file protocol is supported.

        :return: a tuple (protocol, address).
        """
        import uritools
        parts = uritools.urisplit(self.repository_uri)

        if parts.scheme != u'file':
            raise exceptions.ConfigurationError(
                'invalid repository protocol, only the local `file://` is supported'
            )

        if not os.path.isabs(parts.path):
            raise exceptions.ConfigurationError(
                'invalid repository URI: the path has to be absolute')

        return parts.scheme, os.path.expanduser(parts.path)
Esempio n. 10
0
def load_config(create=False):
    """Instantiate the Config object representing the configuration file of the current AiiDA instance.

    :param create: if True, will create the configuration file if it does not already exist
    :type create: bool

    :return: the config
    :rtype: :class:`~aiida.manage.configuration.config.Config`
    :raises aiida.common.MissingConfigurationError: if the configuration file could not be found and create=False
    """
    import os
    from aiida.common import exceptions
    from .config import Config
    from .settings import AIIDA_CONFIG_FOLDER, DEFAULT_CONFIG_FILE_NAME

    filepath = os.path.join(AIIDA_CONFIG_FOLDER, DEFAULT_CONFIG_FILE_NAME)

    if IN_RT_DOC_MODE:
        # The following is a dummy config.json configuration that it is used for the
        # proper compilation of the documentation on readthedocs.
        from aiida.manage.external.postgres import DEFAULT_DBINFO
        import tempfile
        return Config(
            tempfile.mkstemp()[1], {
                'default_profile': 'default',
                'profiles': {
                    'default': {
                        'AIIDADB_ENGINE': 'postgresql_psycopg2',
                        'AIIDADB_BACKEND': 'django',
                        'AIIDADB_HOST': DEFAULT_DBINFO['host'],
                        'AIIDADB_PORT': DEFAULT_DBINFO['port'],
                        'AIIDADB_NAME': 'aiidadb',
                        'AIIDADB_PASS': '******',
                        'default_user_email': '*****@*****.**',
                        'TIMEZONE': 'Europe/Zurich',
                        'AIIDADB_REPOSITORY_URI': 'file:///tmp/repository',
                        'AIIDADB_USER': '******'
                    }
                }
            }
        )

    if not os.path.isfile(filepath) and not create:
        raise exceptions.MissingConfigurationError('configuration file {} does not exist'.format(filepath))

    try:
        config = Config.from_file(filepath)
    except ValueError:
        raise exceptions.ConfigurationError('configuration file {} contains invalid JSON'.format(filepath))

    return config
Esempio n. 11
0
    def get_transport_class(self):
        """
        Get the transport class for this computer.  Can be used to instantiate a transport instance.

        :return: the transport class
        """
        try:
            return TransportFactory(self.get_transport_type())
        except exceptions.EntryPointError as exception:
            raise exceptions.ConfigurationError(
                'No transport found for {} [type {}], message: {}'.format(
                    self.name, self.get_transport_type(), exception
                )
            )
Esempio n. 12
0
    def get_scheduler(self):
        """
        Get a scheduler instance for this computer

        :return: the scheduler instance
        :rtype: :class:`aiida.schedulers.Scheduler`
        """
        try:
            scheduler_class = SchedulerFactory(self.scheduler_type)
            # I call the init without any parameter
            return scheduler_class()
        except exceptions.EntryPointError as exception:
            raise exceptions.ConfigurationError(
                f'No scheduler found for {self.label} [type {self.scheduler_type}], message: {exception}'
            )
Esempio n. 13
0
    def get_transport(self):
        """Return a fully configured transport that can be used to connect to the computer set for this instance.

        :rtype: :class:`aiida.transports.Transport`
        """
        computer = self.computer
        transport_type = computer.get_transport_type()

        try:
            transport_class = TransportFactory(transport_type)
        except exceptions.EntryPointError as exception:
            raise exceptions.ConfigurationError(
                'transport type `{}` could not be loaded: {}'.format(
                    transport_type, exception))

        return transport_class(machine=computer.hostname,
                               **self.get_auth_params())
Esempio n. 14
0
def upload_calculation(node,
                       transport,
                       calc_info,
                       script_filename,
                       dry_run=False):
    """Upload a `CalcJob` instance

    :param node: the `CalcJobNode`.
    :param transport: an already opened transport to use to submit the calculation.
    :param calc_info: the calculation info datastructure returned by `CalcJobNode.presubmit`
    :param script_filename: the job launch script returned by `CalcJobNode.presubmit`
    :return: tuple of ``calc_info`` and ``script_filename``
    """
    from logging import LoggerAdapter
    from tempfile import NamedTemporaryFile
    from aiida.orm import load_node, Code, RemoteData

    # If the calculation already has a `remote_folder`, simply return. The upload was apparently already completed
    # before, which can happen if the daemon is restarted and it shuts down after uploading but before getting the
    # chance to perform the state transition. Upon reloading this calculation, it will re-attempt the upload.
    link_label = 'remote_folder'
    if node.get_outgoing(RemoteData, link_label_filter=link_label).first():
        execlogger.warning(
            'CalcJobNode<{}> already has a `{}` output: skipping upload'.
            format(node.pk, link_label))
        return calc_info, script_filename

    computer = node.computer

    codes_info = calc_info.codes_info
    input_codes = [
        load_node(_.code_uuid, sub_classes=(Code, )) for _ in codes_info
    ]

    logger_extra = get_dblogger_extra(node)
    transport.set_logger_extra(logger_extra)
    logger = LoggerAdapter(logger=execlogger, extra=logger_extra)

    if not dry_run and node.has_cached_links():
        raise ValueError(
            'Cannot submit calculation {} because it has cached input links! If you just want to test the '
            'submission, set `metadata.dry_run` to True in the inputs.'.format(
                node.pk))

    folder = node._raw_input_folder

    # If we are performing a dry-run, the working directory should actually be a local folder that should already exist
    if dry_run:
        workdir = transport.getcwd()
    else:
        remote_user = transport.whoami()
        # TODO Doc: {username} field
        # TODO: if something is changed here, fix also 'verdi computer test'
        remote_working_directory = computer.get_workdir().format(
            username=remote_user)
        if not remote_working_directory.strip():
            raise exceptions.ConfigurationError(
                "[submission of calculation {}] No remote_working_directory configured for computer '{}'"
                .format(node.pk, computer.name))

        # If it already exists, no exception is raised
        try:
            transport.chdir(remote_working_directory)
        except IOError:
            logger.debug(
                '[submission of calculation {}] Unable to chdir in {}, trying to create it'
                .format(node.pk, remote_working_directory))
            try:
                transport.makedirs(remote_working_directory)
                transport.chdir(remote_working_directory)
            except EnvironmentError as exc:
                raise exceptions.ConfigurationError(
                    '[submission of calculation {}] '
                    'Unable to create the remote directory {} on '
                    "computer '{}': {}".format(node.pk,
                                               remote_working_directory,
                                               computer.name, exc))
        # Store remotely with sharding (here is where we choose
        # the folder structure of remote jobs; then I store this
        # in the calculation properties using _set_remote_dir
        # and I do not have to know the logic, but I just need to
        # read the absolute path from the calculation properties.
        transport.mkdir(calc_info.uuid[:2], ignore_existing=True)
        transport.chdir(calc_info.uuid[:2])
        transport.mkdir(calc_info.uuid[2:4], ignore_existing=True)
        transport.chdir(calc_info.uuid[2:4])

        try:
            # The final directory may already exist, most likely because this function was already executed once, but
            # failed and as a result was rescheduled by the eninge. In this case it would be fine to delete the folder
            # and create it from scratch, except that we cannot be sure that this the actual case. Therefore, to err on
            # the safe side, we move the folder to the lost+found directory before recreating the folder from scratch
            transport.mkdir(calc_info.uuid[4:])
        except OSError:
            # Move the existing directory to lost+found, log a warning and create a clean directory anyway
            path_existing = os.path.join(transport.getcwd(),
                                         calc_info.uuid[4:])
            path_lost_found = os.path.join(remote_working_directory,
                                           REMOTE_WORK_DIRECTORY_LOST_FOUND)
            path_target = os.path.join(path_lost_found, calc_info.uuid)
            logger.warning(
                'tried to create path {} but it already exists, moving the entire folder to {}'
                .format(path_existing, path_target))

            # Make sure the lost+found directory exists, then copy the existing folder there and delete the original
            transport.mkdir(path_lost_found, ignore_existing=True)
            transport.copytree(path_existing, path_target)
            transport.rmtree(path_existing)

            # Now we can create a clean folder for this calculation
            transport.mkdir(calc_info.uuid[4:])
        finally:
            transport.chdir(calc_info.uuid[4:])

        # I store the workdir of the calculation for later file retrieval
        workdir = transport.getcwd()
        node.set_remote_workdir(workdir)

    # I first create the code files, so that the code can put
    # default files to be overwritten by the plugin itself.
    # Still, beware! The code file itself could be overwritten...
    # But I checked for this earlier.
    for code in input_codes:
        if code.is_local():
            # Note: this will possibly overwrite files
            for f in code.get_folder_list():
                transport.put(code.get_abs_path(f), f)
            transport.chmod(code.get_local_executable(), 0o755)  # rwxr-xr-x

    # In a dry_run, the working directory is the raw input folder, which will already contain these resources
    if not dry_run:
        for filename in folder.get_content_list():
            logger.debug(
                '[submission of calculation {}] copying file/folder {}...'.
                format(node.pk, filename))
            transport.put(folder.get_abs_path(filename), filename)

    # local_copy_list is a list of tuples, each with (uuid, dest_rel_path)
    # NOTE: validation of these lists are done inside calculation.presubmit()
    local_copy_list = calc_info.local_copy_list or []
    remote_copy_list = calc_info.remote_copy_list or []
    remote_symlink_list = calc_info.remote_symlink_list or []

    for uuid, filename, target in local_copy_list:
        logger.debug(
            '[submission of calculation {}] copying local file/folder to {}'.
            format(node.pk, target))

        try:
            data_node = load_node(uuid=uuid)
        except exceptions.NotExistent:
            logger.warning(
                'failed to load Node<{}> specified in the `local_copy_list`'.
                format(uuid))

        # Note, once #2579 is implemented, use the `node.open` method instead of the named temporary file in
        # combination with the new `Transport.put_object_from_filelike`
        # Since the content of the node could potentially be binary, we read the raw bytes and pass them on
        with NamedTemporaryFile(mode='wb+') as handle:
            handle.write(data_node.get_object_content(filename, mode='rb'))
            handle.flush()
            handle.seek(0)
            transport.put(handle.name, target)

    if dry_run:
        if remote_copy_list:
            with open(os.path.join(workdir, '_aiida_remote_copy_list.txt'),
                      'w') as handle:
                for remote_computer_uuid, remote_abs_path, dest_rel_path in remote_copy_list:
                    handle.write(
                        'would have copied {} to {} in working directory on remote {}'
                        .format(remote_abs_path, dest_rel_path, computer.name))

        if remote_symlink_list:
            with open(os.path.join(workdir, '_aiida_remote_symlink_list.txt'),
                      'w') as handle:
                for remote_computer_uuid, remote_abs_path, dest_rel_path in remote_symlink_list:
                    handle.write(
                        'would have created symlinks from {} to {} in working directory on remote {}'
                        .format(remote_abs_path, dest_rel_path, computer.name))

    else:

        for (remote_computer_uuid, remote_abs_path,
             dest_rel_path) in remote_copy_list:
            if remote_computer_uuid == computer.uuid:
                logger.debug(
                    '[submission of calculation {}] copying {} remotely, directly on the machine {}'
                    .format(node.pk, dest_rel_path, computer.name))
                try:
                    transport.copy(remote_abs_path, dest_rel_path)
                except (IOError, OSError):
                    logger.warning(
                        '[submission of calculation {}] Unable to copy remote resource from {} to {}! '
                        'Stopping.'.format(node.pk, remote_abs_path,
                                           dest_rel_path))
                    raise
            else:
                raise NotImplementedError(
                    '[submission of calculation {}] Remote copy between two different machines is '
                    'not implemented yet'.format(node.pk))

        for (remote_computer_uuid, remote_abs_path,
             dest_rel_path) in remote_symlink_list:
            if remote_computer_uuid == computer.uuid:
                logger.debug(
                    '[submission of calculation {}] copying {} remotely, directly on the machine {}'
                    .format(node.pk, dest_rel_path, computer.name))
                try:
                    transport.symlink(remote_abs_path, dest_rel_path)
                except (IOError, OSError):
                    logger.warning(
                        '[submission of calculation {}] Unable to create remote symlink from {} to {}! '
                        'Stopping.'.format(node.pk, remote_abs_path,
                                           dest_rel_path))
                    raise
            else:
                raise IOError(
                    'It is not possible to create a symlink between two different machines for '
                    'calculation {}'.format(node.pk))

    if not dry_run:
        # Make sure that attaching the `remote_folder` with a link is the last thing we do. This gives the biggest
        # chance of making this method idempotent. That is to say, if a runner gets interrupted during this action, it
        # will simply retry the upload, unless we got here and managed to link it up, in which case we move to the next
        # task. Because in that case, the check for the existence of this link at the top of this function will exit
        # early from this command.
        remotedata = RemoteData(computer=computer, remote_path=workdir)
        remotedata.add_incoming(node,
                                link_type=LinkType.CREATE,
                                link_label='remote_folder')
        remotedata.store()

    return calc_info, script_filename
Esempio n. 15
0
def submit_calculation(calculation, transport):
    """
    Submit a calculation

    :param calculation: the instance of JobCalculation to submit.
    :param transport: an already opened transport to use to submit the calculation.
    """
    from aiida.orm import Code
    from aiida.common.exceptions import InputValidationError
    from aiida.orm.data.remote import RemoteData

    computer = calculation.get_computer()

    if not computer.is_enabled():
        return

    logger_extra = get_dblogger_extra(calculation)
    transport._set_logger_extra(logger_extra)

    if calculation._has_cached_links():
        raise ValueError("Cannot submit calculation {} because it has "
                         "cached input links! If you "
                         "just want to test the submission, use the "
                         "test_submit() method, otherwise store all links"
                         "first".format(calculation.pk))

    s = computer.get_scheduler()
    s.set_transport(transport)

    with SandboxFolder() as folder:
        calcinfo, script_filename = calculation._presubmit(
            folder, use_unstored_links=False)

        codes_info = calcinfo.codes_info
        input_codes = [
            load_node(_.code_uuid, sub_class=Code) for _ in codes_info
        ]

        for code in input_codes:
            if not code.can_run_on(computer):
                raise InputValidationError(
                    "The selected code {} for calculation "
                    "{} cannot run on computer {}".format(
                        code.pk, calculation.pk, computer.name))

        # After this call, no modifications to the folder should be done
        calculation._store_raw_input_folder(folder.abspath)

        # NOTE: some logic is partially replicated in the 'test_submit'
        # method of JobCalculation. If major logic changes are done
        # here, make sure to update also the test_submit routine
        remote_user = transport.whoami()
        # TODO Doc: {username} field
        # TODO: if something is changed here, fix also 'verdi computer test'
        remote_working_directory = computer.get_workdir().format(
            username=remote_user)
        if not remote_working_directory.strip():
            raise exceptions.ConfigurationError(
                "[submission of calculation {}] "
                "No remote_working_directory configured for computer "
                "'{}'".format(calculation.pk, computer.name))

        # If it already exists, no exception is raised
        try:
            transport.chdir(remote_working_directory)
        except IOError:
            execlogger.debug(
                "[submission of calculation {}] "
                "Unable to chdir in {}, trying to create it".format(
                    calculation.pk, remote_working_directory),
                extra=logger_extra)
            try:
                transport.makedirs(remote_working_directory)
                transport.chdir(remote_working_directory)
            except (IOError, OSError) as e:
                raise exceptions.ConfigurationError(
                    "[submission of calculation {}] "
                    "Unable to create the remote directory {} on "
                    "computer '{}': {}".format(calculation.pk,
                                               remote_working_directory,
                                               computer.name, e.message))
        # Store remotely with sharding (here is where we choose
        # the folder structure of remote jobs; then I store this
        # in the calculation properties using _set_remote_dir
        # and I do not have to know the logic, but I just need to
        # read the absolute path from the calculation properties.
        transport.mkdir(calcinfo.uuid[:2], ignore_existing=True)
        transport.chdir(calcinfo.uuid[:2])
        transport.mkdir(calcinfo.uuid[2:4], ignore_existing=True)
        transport.chdir(calcinfo.uuid[2:4])
        transport.mkdir(calcinfo.uuid[4:])
        transport.chdir(calcinfo.uuid[4:])
        workdir = transport.getcwd()
        # I store the workdir of the calculation for later file
        # retrieval
        calculation._set_remote_workdir(workdir)

        # I first create the code files, so that the code can put
        # default files to be overwritten by the plugin itself.
        # Still, beware! The code file itself could be overwritten...
        # But I checked for this earlier.
        for code in input_codes:
            if code.is_local():
                # Note: this will possibly overwrite files
                for f in code.get_folder_list():
                    transport.put(code.get_abs_path(f), f)
                transport.chmod(code.get_local_executable(),
                                0o755)  # rwxr-xr-x

        # copy all files, recursively with folders
        for f in folder.get_content_list():
            execlogger.debug("[submission of calculation {}] "
                             "copying file/folder {}...".format(
                                 calculation.pk, f),
                             extra=logger_extra)
            transport.put(folder.get_abs_path(f), f)

        # local_copy_list is a list of tuples,
        # each with (src_abs_path, dest_rel_path)
        # NOTE: validation of these lists are done
        # inside calculation._presubmit()
        local_copy_list = calcinfo.local_copy_list
        remote_copy_list = calcinfo.remote_copy_list
        remote_symlink_list = calcinfo.remote_symlink_list

        if local_copy_list is not None:
            for src_abs_path, dest_rel_path in local_copy_list:
                execlogger.debug("[submission of calculation {}] "
                                 "copying local file/folder to {}".format(
                                     calculation.pk, dest_rel_path),
                                 extra=logger_extra)
                transport.put(src_abs_path, dest_rel_path)

        if remote_copy_list is not None:
            for (remote_computer_uuid, remote_abs_path,
                 dest_rel_path) in remote_copy_list:
                if remote_computer_uuid == computer.uuid:
                    execlogger.debug(
                        "[submission of calculation {}] "
                        "copying {} remotely, directly on the machine "
                        "{}".format(calculation.pk, dest_rel_path,
                                    computer.name))
                    try:
                        transport.copy(remote_abs_path, dest_rel_path)
                    except (IOError, OSError):
                        execlogger.warning(
                            "[submission of calculation {}] "
                            "Unable to copy remote resource from {} to {}! "
                            "Stopping.".format(calculation.pk, remote_abs_path,
                                               dest_rel_path),
                            extra=logger_extra)
                        raise
                else:
                    # TODO: implement copy between two different
                    # machines!
                    raise NotImplementedError(
                        "[presubmission of calculation {}] "
                        "Remote copy between two different machines is "
                        "not implemented yet".format(calculation.pk))

        if remote_symlink_list is not None:
            for (remote_computer_uuid, remote_abs_path,
                 dest_rel_path) in remote_symlink_list:
                if remote_computer_uuid == computer.uuid:
                    execlogger.debug(
                        "[submission of calculation {}] "
                        "copying {} remotely, directly on the machine "
                        "{}".format(calculation.pk, dest_rel_path,
                                    computer.name))
                    try:
                        transport.symlink(remote_abs_path, dest_rel_path)
                    except (IOError, OSError):
                        execlogger.warning(
                            "[submission of calculation {}] "
                            "Unable to create remote symlink from {} to {}! "
                            "Stopping.".format(calculation.pk, remote_abs_path,
                                               dest_rel_path),
                            extra=logger_extra)
                        raise
                else:
                    raise IOError("It is not possible to create a symlink "
                                  "between two different machines for "
                                  "calculation {}".format(calculation.pk))

        remotedata = RemoteData(computer=computer, remote_path=workdir)
        remotedata.add_link_from(calculation,
                                 label='remote_folder',
                                 link_type=LinkType.CREATE)
        remotedata.store()

        job_id = s.submit_from_script(transport.getcwd(), script_filename)
        calculation._set_job_id(job_id)
Esempio n. 16
0
def upload_calculation(node,
                       transport,
                       calc_info,
                       folder,
                       inputs=None,
                       dry_run=False):
    """Upload a `CalcJob` instance

    :param node: the `CalcJobNode`.
    :param transport: an already opened transport to use to submit the calculation.
    :param calc_info: the calculation info datastructure returned by `CalcJob.presubmit`
    :param folder: temporary local file system folder containing the inputs written by `CalcJob.prepare_for_submission`
    """
    # pylint: disable=too-many-locals,too-many-branches,too-many-statements
    from logging import LoggerAdapter
    from tempfile import NamedTemporaryFile
    from aiida.orm import load_node, Code, RemoteData

    # If the calculation already has a `remote_folder`, simply return. The upload was apparently already completed
    # before, which can happen if the daemon is restarted and it shuts down after uploading but before getting the
    # chance to perform the state transition. Upon reloading this calculation, it will re-attempt the upload.
    link_label = 'remote_folder'
    if node.get_outgoing(RemoteData, link_label_filter=link_label).first():
        execlogger.warning(
            f'CalcJobNode<{node.pk}> already has a `{link_label}` output: skipping upload'
        )
        return calc_info

    computer = node.computer

    codes_info = calc_info.codes_info
    input_codes = [
        load_node(_.code_uuid, sub_classes=(Code, )) for _ in codes_info
    ]

    logger_extra = get_dblogger_extra(node)
    transport.set_logger_extra(logger_extra)
    logger = LoggerAdapter(logger=execlogger, extra=logger_extra)

    if not dry_run and node.has_cached_links():
        raise ValueError(
            'Cannot submit calculation {} because it has cached input links! If you just want to test the '
            'submission, set `metadata.dry_run` to True in the inputs.'.format(
                node.pk))

    # If we are performing a dry-run, the working directory should actually be a local folder that should already exist
    if dry_run:
        workdir = transport.getcwd()
    else:
        remote_user = transport.whoami()
        remote_working_directory = computer.get_workdir().format(
            username=remote_user)
        if not remote_working_directory.strip():
            raise exceptions.ConfigurationError(
                "[submission of calculation {}] No remote_working_directory configured for computer '{}'"
                .format(node.pk, computer.label))

        # If it already exists, no exception is raised
        try:
            transport.chdir(remote_working_directory)
        except IOError:
            logger.debug(
                '[submission of calculation {}] Unable to chdir in {}, trying to create it'
                .format(node.pk, remote_working_directory))
            try:
                transport.makedirs(remote_working_directory)
                transport.chdir(remote_working_directory)
            except EnvironmentError as exc:
                raise exceptions.ConfigurationError(
                    '[submission of calculation {}] '
                    'Unable to create the remote directory {} on '
                    "computer '{}': {}".format(node.pk,
                                               remote_working_directory,
                                               computer.label, exc))
        # Store remotely with sharding (here is where we choose
        # the folder structure of remote jobs; then I store this
        # in the calculation properties using _set_remote_dir
        # and I do not have to know the logic, but I just need to
        # read the absolute path from the calculation properties.
        transport.mkdir(calc_info.uuid[:2], ignore_existing=True)
        transport.chdir(calc_info.uuid[:2])
        transport.mkdir(calc_info.uuid[2:4], ignore_existing=True)
        transport.chdir(calc_info.uuid[2:4])

        try:
            # The final directory may already exist, most likely because this function was already executed once, but
            # failed and as a result was rescheduled by the eninge. In this case it would be fine to delete the folder
            # and create it from scratch, except that we cannot be sure that this the actual case. Therefore, to err on
            # the safe side, we move the folder to the lost+found directory before recreating the folder from scratch
            transport.mkdir(calc_info.uuid[4:])
        except OSError:
            # Move the existing directory to lost+found, log a warning and create a clean directory anyway
            path_existing = os.path.join(transport.getcwd(),
                                         calc_info.uuid[4:])
            path_lost_found = os.path.join(remote_working_directory,
                                           REMOTE_WORK_DIRECTORY_LOST_FOUND)
            path_target = os.path.join(path_lost_found, calc_info.uuid)
            logger.warning(
                f'tried to create path {path_existing} but it already exists, moving the entire folder to {path_target}'
            )

            # Make sure the lost+found directory exists, then copy the existing folder there and delete the original
            transport.mkdir(path_lost_found, ignore_existing=True)
            transport.copytree(path_existing, path_target)
            transport.rmtree(path_existing)

            # Now we can create a clean folder for this calculation
            transport.mkdir(calc_info.uuid[4:])
        finally:
            transport.chdir(calc_info.uuid[4:])

        # I store the workdir of the calculation for later file retrieval
        workdir = transport.getcwd()
        node.set_remote_workdir(workdir)

    # I first create the code files, so that the code can put
    # default files to be overwritten by the plugin itself.
    # Still, beware! The code file itself could be overwritten...
    # But I checked for this earlier.
    for code in input_codes:
        if code.is_local():
            # Note: this will possibly overwrite files
            for filename in code.list_object_names():
                # Note, once #2579 is implemented, use the `node.open` method instead of the named temporary file in
                # combination with the new `Transport.put_object_from_filelike`
                # Since the content of the node could potentially be binary, we read the raw bytes and pass them on
                with NamedTemporaryFile(mode='wb+') as handle:
                    handle.write(code.get_object_content(filename, mode='rb'))
                    handle.flush()
                    transport.put(handle.name, filename)
            transport.chmod(code.get_local_executable(), 0o755)  # rwxr-xr-x

    # local_copy_list is a list of tuples, each with (uuid, dest_rel_path)
    # NOTE: validation of these lists are done inside calculation.presubmit()
    local_copy_list = calc_info.local_copy_list or []
    remote_copy_list = calc_info.remote_copy_list or []
    remote_symlink_list = calc_info.remote_symlink_list or []
    provenance_exclude_list = calc_info.provenance_exclude_list or []

    for uuid, filename, target in local_copy_list:
        logger.debug(
            f'[submission of calculation {node.uuid}] copying local file/folder to {target}'
        )

        def find_data_node(inputs, uuid):
            """Find and return the node with the given UUID from a nested mapping of input nodes.

            :param inputs: (nested) mapping of nodes
            :param uuid: UUID of the node to find
            :return: instance of `Node` or `None` if not found
            """
            from collections.abc import Mapping
            data_node = None

            for input_node in inputs.values():
                if isinstance(input_node, Mapping):
                    data_node = find_data_node(input_node, uuid)
                elif isinstance(input_node, Node) and input_node.uuid == uuid:
                    data_node = input_node
                if data_node is not None:
                    break

            return data_node

        try:
            data_node = load_node(uuid=uuid)
        except exceptions.NotExistent:
            data_node = find_data_node(inputs, uuid)

        if data_node is None:
            logger.warning(
                f'failed to load Node<{uuid}> specified in the `local_copy_list`'
            )
        else:
            dirname = os.path.dirname(target)
            if dirname:
                os.makedirs(os.path.join(folder.abspath, dirname),
                            exist_ok=True)
            with folder.open(target, 'wb') as handle:
                with data_node.open(filename, 'rb') as source:
                    shutil.copyfileobj(source, handle)
            provenance_exclude_list.append(target)

    # In a dry_run, the working directory is the raw input folder, which will already contain these resources
    if not dry_run:
        for filename in folder.get_content_list():
            logger.debug(
                f'[submission of calculation {node.pk}] copying file/folder {filename}...'
            )
            transport.put(folder.get_abs_path(filename), filename)

        for (remote_computer_uuid, remote_abs_path,
             dest_rel_path) in remote_copy_list:
            if remote_computer_uuid == computer.uuid:
                logger.debug(
                    '[submission of calculation {}] copying {} remotely, directly on the machine {}'
                    .format(node.pk, dest_rel_path, computer.label))
                try:
                    transport.copy(remote_abs_path, dest_rel_path)
                except (IOError, OSError):
                    logger.warning(
                        '[submission of calculation {}] Unable to copy remote resource from {} to {}! '
                        'Stopping.'.format(node.pk, remote_abs_path,
                                           dest_rel_path))
                    raise
            else:
                raise NotImplementedError(
                    '[submission of calculation {}] Remote copy between two different machines is '
                    'not implemented yet'.format(node.pk))

        for (remote_computer_uuid, remote_abs_path,
             dest_rel_path) in remote_symlink_list:
            if remote_computer_uuid == computer.uuid:
                logger.debug(
                    '[submission of calculation {}] copying {} remotely, directly on the machine {}'
                    .format(node.pk, dest_rel_path, computer.label))
                try:
                    transport.symlink(remote_abs_path, dest_rel_path)
                except (IOError, OSError):
                    logger.warning(
                        '[submission of calculation {}] Unable to create remote symlink from {} to {}! '
                        'Stopping.'.format(node.pk, remote_abs_path,
                                           dest_rel_path))
                    raise
            else:
                raise IOError(
                    f'It is not possible to create a symlink between two different machines for calculation {node.pk}'
                )
    else:

        if remote_copy_list:
            with open(os.path.join(workdir, '_aiida_remote_copy_list.txt'),
                      'w') as handle:
                for remote_computer_uuid, remote_abs_path, dest_rel_path in remote_copy_list:
                    handle.write(
                        'would have copied {} to {} in working directory on remote {}'
                        .format(remote_abs_path, dest_rel_path,
                                computer.label))

        if remote_symlink_list:
            with open(os.path.join(workdir, '_aiida_remote_symlink_list.txt'),
                      'w') as handle:
                for remote_computer_uuid, remote_abs_path, dest_rel_path in remote_symlink_list:
                    handle.write(
                        'would have created symlinks from {} to {} in working directory on remote {}'
                        .format(remote_abs_path, dest_rel_path,
                                computer.label))

    # Loop recursively over content of the sandbox folder copying all that are not in `provenance_exclude_list`. Note
    # that directories are not created explicitly. The `node.put_object_from_filelike` call will create intermediate
    # directories for nested files automatically when needed. This means though that empty folders in the sandbox or
    # folders that would be empty when considering the `provenance_exclude_list` will *not* be copied to the repo. The
    # advantage of this explicit copying instead of deleting the files from `provenance_exclude_list` from the sandbox
    # first before moving the entire remaining content to the node's repository, is that in this way we are guaranteed
    # not to accidentally move files to the repository that should not go there at all cost. Note that all entries in
    # the provenance exclude list are normalized first, just as the paths that are in the sandbox folder, otherwise the
    # direct equality test may fail, e.g.: './path/file.txt' != 'path/file.txt' even though they reference the same file
    provenance_exclude_list = [
        os.path.normpath(entry) for entry in provenance_exclude_list
    ]

    for root, _, filenames in os.walk(folder.abspath):
        for filename in filenames:
            filepath = os.path.join(root, filename)
            relpath = os.path.normpath(
                os.path.relpath(filepath, folder.abspath))
            if relpath not in provenance_exclude_list:
                with open(filepath, 'rb') as handle:
                    node._repository.put_object_from_filelike(handle,
                                                              relpath,
                                                              'wb',
                                                              force=True)  # pylint: disable=protected-access

    if not dry_run:
        # Make sure that attaching the `remote_folder` with a link is the last thing we do. This gives the biggest
        # chance of making this method idempotent. That is to say, if a runner gets interrupted during this action, it
        # will simply retry the upload, unless we got here and managed to link it up, in which case we move to the next
        # task. Because in that case, the check for the existence of this link at the top of this function will exit
        # early from this command.
        remotedata = RemoteData(computer=computer, remote_path=workdir)
        remotedata.add_incoming(node,
                                link_type=LinkType.CREATE,
                                link_label='remote_folder')
        remotedata.store()