Exemple #1
0
    def _generate_remote_data(computer, remote_path, entry_point_name=None):
        """Generate a RemoteData node, loctated at remote_path"""
        from aiida.common.links import LinkType
        from aiida.orm import CalcJobNode, RemoteData
        from aiida.plugins.entry_point import format_entry_point_string

        entry_point = format_entry_point_string('aiida.calculations',
                                                entry_point_name)

        remote = RemoteData(remote_path=remote_path)
        remote.computer = computer

        if entry_point_name is not None:
            creator = CalcJobNode(computer=computer, process_type=entry_point)
            creator.set_option('resources', {
                'num_machines': 1,
                'num_mpiprocs_per_machine': 1
            })
            #creator.set_attribute('prefix', 'aiida')
            remote.add_incoming(creator,
                                link_type=LinkType.CREATE,
                                link_label='remote_folder')
            creator.store()

        return remote
Exemple #2
0
def test_is_neb(vasp_code, poscar, is_restart, is_neb):
    from aiida.orm import RemoteData
    from aiida.common.links import LinkType
    from aiida.plugins import CalculationFactory
    from aiida.engine import run_get_node
    from aiida_cusp.data import VaspPotcarData
    # define code
    vasp_code.set_attribute('input_plugin', 'cusp.vasp')
    # setup calculator
    inputs = {
        'code': vasp_code,
        'metadata': {'options': {'resources': {'num_machines': 1}}},
    }
    if is_neb:
        neb_path = {'node_00': poscar, 'node_01': poscar, 'node_02': poscar}
        inputs.update({'neb_path': neb_path})
    else:
        inputs.update({'poscar': poscar})
    VaspCalculation = CalculationFactory('cusp.vasp')
    vasp_calc_base = VaspCalculation(inputs=inputs)
    # if restart create a second calculator using a remote_folder connected
    # to the first calculation as input
    if is_restart:
        inputs.pop('poscar', None)
        inputs.pop('neb_path', None)
        remote_data = RemoteData(computer=vasp_code.computer, remote_path='')
        remote_data.add_incoming(vasp_calc_base.node.store(),
                                 link_type=LinkType.CREATE,
                                 link_label='remote_folder')
        inputs.update({'restart': {'folder': remote_data}})
        vasp_calc_base = VaspCalculation(inputs=inputs)
    # assert is_neb() returns the desired result
    result = vasp_calc_base.is_neb()
    assert result is is_neb
Exemple #3
0
    def _generate_remote_data(computer,
                              remote_path,
                              entry_point_name=None,
                              extras_root=[]):
        """Return a `KpointsData` with a mesh of npoints in each direction."""
        from aiida.common.links import LinkType
        from aiida.orm import CalcJobNode, RemoteData, Dict
        from aiida.plugins.entry_point import format_entry_point_string

        entry_point = format_entry_point_string('aiida.calculations',
                                                entry_point_name)

        remote = RemoteData(remote_path=remote_path)
        remote.computer = computer

        if entry_point_name is not None:
            creator = CalcJobNode(computer=computer, process_type=entry_point)
            creator.set_option('resources', {
                'num_machines': 1,
                'num_mpiprocs_per_machine': 1
            })
            remote.add_incoming(creator,
                                link_type=LinkType.CREATE,
                                link_label='remote_folder')

            for extra in extras_root:
                to_link = extra[0]
                if isinstance(to_link, dict):
                    to_link = Dict(dict=to_link)
                to_link.store()
                creator.add_incoming(to_link,
                                     link_type=LinkType.INPUT_CALC,
                                     link_label=extra[1])

            creator.store()

        return remote
Exemple #4
0
def upload_calculation(node,
                       transport,
                       calc_info,
                       script_filename,
                       dry_run=False):
    """Upload a `CalcJob` instance

    :param node: the `CalcJobNode`.
    :param transport: an already opened transport to use to submit the calculation.
    :param calc_info: the calculation info datastructure returned by `CalcJobNode.presubmit`
    :param script_filename: the job launch script returned by `CalcJobNode.presubmit`
    :return: tuple of ``calc_info`` and ``script_filename``
    """
    from logging import LoggerAdapter
    from tempfile import NamedTemporaryFile
    from aiida.orm import load_node, Code, RemoteData

    # If the calculation already has a `remote_folder`, simply return. The upload was apparently already completed
    # before, which can happen if the daemon is restarted and it shuts down after uploading but before getting the
    # chance to perform the state transition. Upon reloading this calculation, it will re-attempt the upload.
    link_label = 'remote_folder'
    if node.get_outgoing(RemoteData, link_label_filter=link_label).first():
        execlogger.warning(
            'CalcJobNode<{}> already has a `{}` output: skipping upload'.
            format(node.pk, link_label))
        return calc_info, script_filename

    computer = node.computer

    codes_info = calc_info.codes_info
    input_codes = [
        load_node(_.code_uuid, sub_classes=(Code, )) for _ in codes_info
    ]

    logger_extra = get_dblogger_extra(node)
    transport.set_logger_extra(logger_extra)
    logger = LoggerAdapter(logger=execlogger, extra=logger_extra)

    if not dry_run and node.has_cached_links():
        raise ValueError(
            'Cannot submit calculation {} because it has cached input links! If you just want to test the '
            'submission, set `metadata.dry_run` to True in the inputs.'.format(
                node.pk))

    folder = node._raw_input_folder

    # If we are performing a dry-run, the working directory should actually be a local folder that should already exist
    if dry_run:
        workdir = transport.getcwd()
    else:
        remote_user = transport.whoami()
        # TODO Doc: {username} field
        # TODO: if something is changed here, fix also 'verdi computer test'
        remote_working_directory = computer.get_workdir().format(
            username=remote_user)
        if not remote_working_directory.strip():
            raise exceptions.ConfigurationError(
                "[submission of calculation {}] No remote_working_directory configured for computer '{}'"
                .format(node.pk, computer.name))

        # If it already exists, no exception is raised
        try:
            transport.chdir(remote_working_directory)
        except IOError:
            logger.debug(
                '[submission of calculation {}] Unable to chdir in {}, trying to create it'
                .format(node.pk, remote_working_directory))
            try:
                transport.makedirs(remote_working_directory)
                transport.chdir(remote_working_directory)
            except EnvironmentError as exc:
                raise exceptions.ConfigurationError(
                    '[submission of calculation {}] '
                    'Unable to create the remote directory {} on '
                    "computer '{}': {}".format(node.pk,
                                               remote_working_directory,
                                               computer.name, exc))
        # Store remotely with sharding (here is where we choose
        # the folder structure of remote jobs; then I store this
        # in the calculation properties using _set_remote_dir
        # and I do not have to know the logic, but I just need to
        # read the absolute path from the calculation properties.
        transport.mkdir(calc_info.uuid[:2], ignore_existing=True)
        transport.chdir(calc_info.uuid[:2])
        transport.mkdir(calc_info.uuid[2:4], ignore_existing=True)
        transport.chdir(calc_info.uuid[2:4])

        try:
            # The final directory may already exist, most likely because this function was already executed once, but
            # failed and as a result was rescheduled by the eninge. In this case it would be fine to delete the folder
            # and create it from scratch, except that we cannot be sure that this the actual case. Therefore, to err on
            # the safe side, we move the folder to the lost+found directory before recreating the folder from scratch
            transport.mkdir(calc_info.uuid[4:])
        except OSError:
            # Move the existing directory to lost+found, log a warning and create a clean directory anyway
            path_existing = os.path.join(transport.getcwd(),
                                         calc_info.uuid[4:])
            path_lost_found = os.path.join(remote_working_directory,
                                           REMOTE_WORK_DIRECTORY_LOST_FOUND)
            path_target = os.path.join(path_lost_found, calc_info.uuid)
            logger.warning(
                'tried to create path {} but it already exists, moving the entire folder to {}'
                .format(path_existing, path_target))

            # Make sure the lost+found directory exists, then copy the existing folder there and delete the original
            transport.mkdir(path_lost_found, ignore_existing=True)
            transport.copytree(path_existing, path_target)
            transport.rmtree(path_existing)

            # Now we can create a clean folder for this calculation
            transport.mkdir(calc_info.uuid[4:])
        finally:
            transport.chdir(calc_info.uuid[4:])

        # I store the workdir of the calculation for later file retrieval
        workdir = transport.getcwd()
        node.set_remote_workdir(workdir)

    # I first create the code files, so that the code can put
    # default files to be overwritten by the plugin itself.
    # Still, beware! The code file itself could be overwritten...
    # But I checked for this earlier.
    for code in input_codes:
        if code.is_local():
            # Note: this will possibly overwrite files
            for f in code.get_folder_list():
                transport.put(code.get_abs_path(f), f)
            transport.chmod(code.get_local_executable(), 0o755)  # rwxr-xr-x

    # In a dry_run, the working directory is the raw input folder, which will already contain these resources
    if not dry_run:
        for filename in folder.get_content_list():
            logger.debug(
                '[submission of calculation {}] copying file/folder {}...'.
                format(node.pk, filename))
            transport.put(folder.get_abs_path(filename), filename)

    # local_copy_list is a list of tuples, each with (uuid, dest_rel_path)
    # NOTE: validation of these lists are done inside calculation.presubmit()
    local_copy_list = calc_info.local_copy_list or []
    remote_copy_list = calc_info.remote_copy_list or []
    remote_symlink_list = calc_info.remote_symlink_list or []

    for uuid, filename, target in local_copy_list:
        logger.debug(
            '[submission of calculation {}] copying local file/folder to {}'.
            format(node.pk, target))

        try:
            data_node = load_node(uuid=uuid)
        except exceptions.NotExistent:
            logger.warning(
                'failed to load Node<{}> specified in the `local_copy_list`'.
                format(uuid))

        # Note, once #2579 is implemented, use the `node.open` method instead of the named temporary file in
        # combination with the new `Transport.put_object_from_filelike`
        # Since the content of the node could potentially be binary, we read the raw bytes and pass them on
        with NamedTemporaryFile(mode='wb+') as handle:
            handle.write(data_node.get_object_content(filename, mode='rb'))
            handle.flush()
            handle.seek(0)
            transport.put(handle.name, target)

    if dry_run:
        if remote_copy_list:
            with open(os.path.join(workdir, '_aiida_remote_copy_list.txt'),
                      'w') as handle:
                for remote_computer_uuid, remote_abs_path, dest_rel_path in remote_copy_list:
                    handle.write(
                        'would have copied {} to {} in working directory on remote {}'
                        .format(remote_abs_path, dest_rel_path, computer.name))

        if remote_symlink_list:
            with open(os.path.join(workdir, '_aiida_remote_symlink_list.txt'),
                      'w') as handle:
                for remote_computer_uuid, remote_abs_path, dest_rel_path in remote_symlink_list:
                    handle.write(
                        'would have created symlinks from {} to {} in working directory on remote {}'
                        .format(remote_abs_path, dest_rel_path, computer.name))

    else:

        for (remote_computer_uuid, remote_abs_path,
             dest_rel_path) in remote_copy_list:
            if remote_computer_uuid == computer.uuid:
                logger.debug(
                    '[submission of calculation {}] copying {} remotely, directly on the machine {}'
                    .format(node.pk, dest_rel_path, computer.name))
                try:
                    transport.copy(remote_abs_path, dest_rel_path)
                except (IOError, OSError):
                    logger.warning(
                        '[submission of calculation {}] Unable to copy remote resource from {} to {}! '
                        'Stopping.'.format(node.pk, remote_abs_path,
                                           dest_rel_path))
                    raise
            else:
                raise NotImplementedError(
                    '[submission of calculation {}] Remote copy between two different machines is '
                    'not implemented yet'.format(node.pk))

        for (remote_computer_uuid, remote_abs_path,
             dest_rel_path) in remote_symlink_list:
            if remote_computer_uuid == computer.uuid:
                logger.debug(
                    '[submission of calculation {}] copying {} remotely, directly on the machine {}'
                    .format(node.pk, dest_rel_path, computer.name))
                try:
                    transport.symlink(remote_abs_path, dest_rel_path)
                except (IOError, OSError):
                    logger.warning(
                        '[submission of calculation {}] Unable to create remote symlink from {} to {}! '
                        'Stopping.'.format(node.pk, remote_abs_path,
                                           dest_rel_path))
                    raise
            else:
                raise IOError(
                    'It is not possible to create a symlink between two different machines for '
                    'calculation {}'.format(node.pk))

    if not dry_run:
        # Make sure that attaching the `remote_folder` with a link is the last thing we do. This gives the biggest
        # chance of making this method idempotent. That is to say, if a runner gets interrupted during this action, it
        # will simply retry the upload, unless we got here and managed to link it up, in which case we move to the next
        # task. Because in that case, the check for the existence of this link at the top of this function will exit
        # early from this command.
        remotedata = RemoteData(computer=computer, remote_path=workdir)
        remotedata.add_incoming(node,
                                link_type=LinkType.CREATE,
                                link_label='remote_folder')
        remotedata.store()

    return calc_info, script_filename
Exemple #5
0
    def generate_calcjob_node(
        self,
        entry_point_name,
        retrieved=None,
        computer_name="localhost",
        options=None,
        mark_completed=False,
        remote_path=None,
        input_nodes=None,
    ):
        """Fixture to generate a mock `CalcJobNode` for testing parsers.

        Parameters
        ----------
        entry_point_name : str
            entry point name of the calculation class
        retrieved : aiida.orm.FolderData
            containing the file(s) to be parsed
        computer_name : str
            used to get or create a ``Computer``, by default 'localhost'
        options : None or dict
            any additional metadata options to set on the node
        remote_path : str
            path to a folder on the computer
        mark_completed : bool
            if True, set the process state to finished, and the exit_status = 0
        input_nodes: dict
            mapping of link label to node

        Returns
        -------
        aiida.orm.CalcJobNode
            instance with the `retrieved` node linked as outgoing

        """
        from aiida.common.links import LinkType
        from aiida.engine import ExitCode, ProcessState
        from aiida.orm import CalcJobNode, Node, RemoteData
        from aiida.plugins.entry_point import format_entry_point_string

        process = self.get_calc_cls(entry_point_name)
        computer = self.get_or_create_computer(computer_name)
        entry_point = format_entry_point_string("aiida.calculations",
                                                entry_point_name)

        calc_node = CalcJobNode(computer=computer, process_type=entry_point)
        calc_node.set_options({
            k: v.default() if callable(v.default) else v.default
            for k, v in process.spec_options.items() if v.has_default()
        })
        calc_node.set_option("resources", {
            "num_machines": 1,
            "num_mpiprocs_per_machine": 1
        })
        calc_node.set_option("max_wallclock_seconds", 1800)

        if options:
            calc_node.set_options(options)

        if mark_completed:
            calc_node.set_process_state(ProcessState.FINISHED)
            calc_node.set_exit_status(ExitCode().status)

        if input_nodes is not None:
            for label, in_node in input_nodes.items():
                in_node_map = in_node
                if isinstance(in_node, Node):
                    in_node_map = {None: in_node_map}
                for sublabel, in_node in in_node_map.items():
                    in_node.store()
                    link_label = (label if sublabel is None else
                                  "{}__{}".format(label, sublabel))
                    calc_node.add_incoming(in_node,
                                           link_type=LinkType.INPUT_CALC,
                                           link_label=link_label)

        calc_node.store()

        if retrieved is not None:
            retrieved.add_incoming(calc_node,
                                   link_type=LinkType.CREATE,
                                   link_label="retrieved")
            retrieved.store()

        if remote_path is not None:
            remote = RemoteData(remote_path=remote_path, computer=computer)
            remote.add_incoming(calc_node,
                                link_type=LinkType.CREATE,
                                link_label="remote_folder")
            remote.store()

        return calc_node
Exemple #6
0
def upload_calculation(node,
                       transport,
                       calc_info,
                       folder,
                       inputs=None,
                       dry_run=False):
    """Upload a `CalcJob` instance

    :param node: the `CalcJobNode`.
    :param transport: an already opened transport to use to submit the calculation.
    :param calc_info: the calculation info datastructure returned by `CalcJob.presubmit`
    :param folder: temporary local file system folder containing the inputs written by `CalcJob.prepare_for_submission`
    """
    # pylint: disable=too-many-locals,too-many-branches,too-many-statements
    from logging import LoggerAdapter
    from tempfile import NamedTemporaryFile
    from aiida.orm import load_node, Code, RemoteData

    # If the calculation already has a `remote_folder`, simply return. The upload was apparently already completed
    # before, which can happen if the daemon is restarted and it shuts down after uploading but before getting the
    # chance to perform the state transition. Upon reloading this calculation, it will re-attempt the upload.
    link_label = 'remote_folder'
    if node.get_outgoing(RemoteData, link_label_filter=link_label).first():
        execlogger.warning(
            f'CalcJobNode<{node.pk}> already has a `{link_label}` output: skipping upload'
        )
        return calc_info

    computer = node.computer

    codes_info = calc_info.codes_info
    input_codes = [
        load_node(_.code_uuid, sub_classes=(Code, )) for _ in codes_info
    ]

    logger_extra = get_dblogger_extra(node)
    transport.set_logger_extra(logger_extra)
    logger = LoggerAdapter(logger=execlogger, extra=logger_extra)

    if not dry_run and node.has_cached_links():
        raise ValueError(
            'Cannot submit calculation {} because it has cached input links! If you just want to test the '
            'submission, set `metadata.dry_run` to True in the inputs.'.format(
                node.pk))

    # If we are performing a dry-run, the working directory should actually be a local folder that should already exist
    if dry_run:
        workdir = transport.getcwd()
    else:
        remote_user = transport.whoami()
        remote_working_directory = computer.get_workdir().format(
            username=remote_user)
        if not remote_working_directory.strip():
            raise exceptions.ConfigurationError(
                "[submission of calculation {}] No remote_working_directory configured for computer '{}'"
                .format(node.pk, computer.label))

        # If it already exists, no exception is raised
        try:
            transport.chdir(remote_working_directory)
        except IOError:
            logger.debug(
                '[submission of calculation {}] Unable to chdir in {}, trying to create it'
                .format(node.pk, remote_working_directory))
            try:
                transport.makedirs(remote_working_directory)
                transport.chdir(remote_working_directory)
            except EnvironmentError as exc:
                raise exceptions.ConfigurationError(
                    '[submission of calculation {}] '
                    'Unable to create the remote directory {} on '
                    "computer '{}': {}".format(node.pk,
                                               remote_working_directory,
                                               computer.label, exc))
        # Store remotely with sharding (here is where we choose
        # the folder structure of remote jobs; then I store this
        # in the calculation properties using _set_remote_dir
        # and I do not have to know the logic, but I just need to
        # read the absolute path from the calculation properties.
        transport.mkdir(calc_info.uuid[:2], ignore_existing=True)
        transport.chdir(calc_info.uuid[:2])
        transport.mkdir(calc_info.uuid[2:4], ignore_existing=True)
        transport.chdir(calc_info.uuid[2:4])

        try:
            # The final directory may already exist, most likely because this function was already executed once, but
            # failed and as a result was rescheduled by the eninge. In this case it would be fine to delete the folder
            # and create it from scratch, except that we cannot be sure that this the actual case. Therefore, to err on
            # the safe side, we move the folder to the lost+found directory before recreating the folder from scratch
            transport.mkdir(calc_info.uuid[4:])
        except OSError:
            # Move the existing directory to lost+found, log a warning and create a clean directory anyway
            path_existing = os.path.join(transport.getcwd(),
                                         calc_info.uuid[4:])
            path_lost_found = os.path.join(remote_working_directory,
                                           REMOTE_WORK_DIRECTORY_LOST_FOUND)
            path_target = os.path.join(path_lost_found, calc_info.uuid)
            logger.warning(
                f'tried to create path {path_existing} but it already exists, moving the entire folder to {path_target}'
            )

            # Make sure the lost+found directory exists, then copy the existing folder there and delete the original
            transport.mkdir(path_lost_found, ignore_existing=True)
            transport.copytree(path_existing, path_target)
            transport.rmtree(path_existing)

            # Now we can create a clean folder for this calculation
            transport.mkdir(calc_info.uuid[4:])
        finally:
            transport.chdir(calc_info.uuid[4:])

        # I store the workdir of the calculation for later file retrieval
        workdir = transport.getcwd()
        node.set_remote_workdir(workdir)

    # I first create the code files, so that the code can put
    # default files to be overwritten by the plugin itself.
    # Still, beware! The code file itself could be overwritten...
    # But I checked for this earlier.
    for code in input_codes:
        if code.is_local():
            # Note: this will possibly overwrite files
            for filename in code.list_object_names():
                # Note, once #2579 is implemented, use the `node.open` method instead of the named temporary file in
                # combination with the new `Transport.put_object_from_filelike`
                # Since the content of the node could potentially be binary, we read the raw bytes and pass them on
                with NamedTemporaryFile(mode='wb+') as handle:
                    handle.write(code.get_object_content(filename, mode='rb'))
                    handle.flush()
                    transport.put(handle.name, filename)
            transport.chmod(code.get_local_executable(), 0o755)  # rwxr-xr-x

    # local_copy_list is a list of tuples, each with (uuid, dest_rel_path)
    # NOTE: validation of these lists are done inside calculation.presubmit()
    local_copy_list = calc_info.local_copy_list or []
    remote_copy_list = calc_info.remote_copy_list or []
    remote_symlink_list = calc_info.remote_symlink_list or []
    provenance_exclude_list = calc_info.provenance_exclude_list or []

    for uuid, filename, target in local_copy_list:
        logger.debug(
            f'[submission of calculation {node.uuid}] copying local file/folder to {target}'
        )

        def find_data_node(inputs, uuid):
            """Find and return the node with the given UUID from a nested mapping of input nodes.

            :param inputs: (nested) mapping of nodes
            :param uuid: UUID of the node to find
            :return: instance of `Node` or `None` if not found
            """
            from collections.abc import Mapping
            data_node = None

            for input_node in inputs.values():
                if isinstance(input_node, Mapping):
                    data_node = find_data_node(input_node, uuid)
                elif isinstance(input_node, Node) and input_node.uuid == uuid:
                    data_node = input_node
                if data_node is not None:
                    break

            return data_node

        try:
            data_node = load_node(uuid=uuid)
        except exceptions.NotExistent:
            data_node = find_data_node(inputs, uuid)

        if data_node is None:
            logger.warning(
                f'failed to load Node<{uuid}> specified in the `local_copy_list`'
            )
        else:
            dirname = os.path.dirname(target)
            if dirname:
                os.makedirs(os.path.join(folder.abspath, dirname),
                            exist_ok=True)
            with folder.open(target, 'wb') as handle:
                with data_node.open(filename, 'rb') as source:
                    shutil.copyfileobj(source, handle)
            provenance_exclude_list.append(target)

    # In a dry_run, the working directory is the raw input folder, which will already contain these resources
    if not dry_run:
        for filename in folder.get_content_list():
            logger.debug(
                f'[submission of calculation {node.pk}] copying file/folder {filename}...'
            )
            transport.put(folder.get_abs_path(filename), filename)

        for (remote_computer_uuid, remote_abs_path,
             dest_rel_path) in remote_copy_list:
            if remote_computer_uuid == computer.uuid:
                logger.debug(
                    '[submission of calculation {}] copying {} remotely, directly on the machine {}'
                    .format(node.pk, dest_rel_path, computer.label))
                try:
                    transport.copy(remote_abs_path, dest_rel_path)
                except (IOError, OSError):
                    logger.warning(
                        '[submission of calculation {}] Unable to copy remote resource from {} to {}! '
                        'Stopping.'.format(node.pk, remote_abs_path,
                                           dest_rel_path))
                    raise
            else:
                raise NotImplementedError(
                    '[submission of calculation {}] Remote copy between two different machines is '
                    'not implemented yet'.format(node.pk))

        for (remote_computer_uuid, remote_abs_path,
             dest_rel_path) in remote_symlink_list:
            if remote_computer_uuid == computer.uuid:
                logger.debug(
                    '[submission of calculation {}] copying {} remotely, directly on the machine {}'
                    .format(node.pk, dest_rel_path, computer.label))
                try:
                    transport.symlink(remote_abs_path, dest_rel_path)
                except (IOError, OSError):
                    logger.warning(
                        '[submission of calculation {}] Unable to create remote symlink from {} to {}! '
                        'Stopping.'.format(node.pk, remote_abs_path,
                                           dest_rel_path))
                    raise
            else:
                raise IOError(
                    f'It is not possible to create a symlink between two different machines for calculation {node.pk}'
                )
    else:

        if remote_copy_list:
            with open(os.path.join(workdir, '_aiida_remote_copy_list.txt'),
                      'w') as handle:
                for remote_computer_uuid, remote_abs_path, dest_rel_path in remote_copy_list:
                    handle.write(
                        'would have copied {} to {} in working directory on remote {}'
                        .format(remote_abs_path, dest_rel_path,
                                computer.label))

        if remote_symlink_list:
            with open(os.path.join(workdir, '_aiida_remote_symlink_list.txt'),
                      'w') as handle:
                for remote_computer_uuid, remote_abs_path, dest_rel_path in remote_symlink_list:
                    handle.write(
                        'would have created symlinks from {} to {} in working directory on remote {}'
                        .format(remote_abs_path, dest_rel_path,
                                computer.label))

    # Loop recursively over content of the sandbox folder copying all that are not in `provenance_exclude_list`. Note
    # that directories are not created explicitly. The `node.put_object_from_filelike` call will create intermediate
    # directories for nested files automatically when needed. This means though that empty folders in the sandbox or
    # folders that would be empty when considering the `provenance_exclude_list` will *not* be copied to the repo. The
    # advantage of this explicit copying instead of deleting the files from `provenance_exclude_list` from the sandbox
    # first before moving the entire remaining content to the node's repository, is that in this way we are guaranteed
    # not to accidentally move files to the repository that should not go there at all cost. Note that all entries in
    # the provenance exclude list are normalized first, just as the paths that are in the sandbox folder, otherwise the
    # direct equality test may fail, e.g.: './path/file.txt' != 'path/file.txt' even though they reference the same file
    provenance_exclude_list = [
        os.path.normpath(entry) for entry in provenance_exclude_list
    ]

    for root, _, filenames in os.walk(folder.abspath):
        for filename in filenames:
            filepath = os.path.join(root, filename)
            relpath = os.path.normpath(
                os.path.relpath(filepath, folder.abspath))
            if relpath not in provenance_exclude_list:
                with open(filepath, 'rb') as handle:
                    node._repository.put_object_from_filelike(handle,
                                                              relpath,
                                                              'wb',
                                                              force=True)  # pylint: disable=protected-access

    if not dry_run:
        # Make sure that attaching the `remote_folder` with a link is the last thing we do. This gives the biggest
        # chance of making this method idempotent. That is to say, if a runner gets interrupted during this action, it
        # will simply retry the upload, unless we got here and managed to link it up, in which case we move to the next
        # task. Because in that case, the check for the existence of this link at the top of this function will exit
        # early from this command.
        remotedata = RemoteData(computer=computer, remote_path=workdir)
        remotedata.add_incoming(node,
                                link_type=LinkType.CREATE,
                                link_label='remote_folder')
        remotedata.store()