Ejemplo n.º 1
0
def submit_calculation(calculation: CalcJobNode, transport: Transport) -> str:
    """Submit a previously uploaded `CalcJob` to the scheduler.

    :param calculation: the instance of CalcJobNode to submit.
    :param transport: an already opened transport to use to submit the calculation.
    :return: the job id as returned by the scheduler `submit_from_script` call
    """
    job_id = calculation.get_job_id()

    # If the `job_id` attribute is already set, that means this function was already executed once and the scheduler
    # submit command was successful as the job id it returned was set on the node. This scenario can happen when the
    # daemon runner gets shutdown right after accomplishing the submission task, but before it gets the chance to
    # finalize the state transition of the `CalcJob` to the `UPDATE` transport task. Since the job is already submitted
    # we do not want to submit it a second time, so we simply return the existing job id here.
    if job_id is not None:
        return job_id

    scheduler = calculation.computer.get_scheduler()
    scheduler.set_transport(transport)

    submit_script_filename = calculation.get_option('submit_script_filename')
    workdir = calculation.get_remote_workdir()
    job_id = scheduler.submit_from_script(workdir, submit_script_filename)
    calculation.set_job_id(job_id)

    return job_id
Ejemplo n.º 2
0
def retrieve_calculation(calculation: CalcJobNode, transport: Transport,
                         retrieved_temporary_folder: str) -> None:
    """Retrieve all the files of a completed job calculation using the given transport.

    If the job defined anything in the `retrieve_temporary_list`, those entries will be stored in the
    `retrieved_temporary_folder`. The caller is responsible for creating and destroying this folder.

    :param calculation: the instance of CalcJobNode to update.
    :param transport: an already opened transport to use for the retrieval.
    :param retrieved_temporary_folder: the absolute path to a directory in which to store the files
        listed, if any, in the `retrieved_temporary_folder` of the jobs CalcInfo
    """
    logger_extra = get_dblogger_extra(calculation)
    workdir = calculation.get_remote_workdir()

    EXEC_LOGGER.debug(f'Retrieving calc {calculation.pk}', extra=logger_extra)
    EXEC_LOGGER.debug(f'[retrieval of calc {calculation.pk}] chdir {workdir}',
                      extra=logger_extra)

    # If the calculation already has a `retrieved` folder, simply return. The retrieval was apparently already completed
    # before, which can happen if the daemon is restarted and it shuts down after retrieving but before getting the
    # chance to perform the state transition. Upon reloading this calculation, it will re-attempt the retrieval.
    link_label = calculation.link_label_retrieved
    if calculation.get_outgoing(FolderData,
                                link_label_filter=link_label).first():
        EXEC_LOGGER.warning(
            f'CalcJobNode<{calculation.pk}> already has a `{link_label}` output folder: skipping retrieval'
        )
        return

    # Create the FolderData node into which to store the files that are to be retrieved
    retrieved_files = FolderData()

    with transport:
        transport.chdir(workdir)

        # First, retrieve the files of folderdata
        retrieve_list = calculation.get_retrieve_list()
        retrieve_temporary_list = calculation.get_retrieve_temporary_list()
        retrieve_singlefile_list = calculation.get_retrieve_singlefile_list()

        with SandboxFolder() as folder:
            retrieve_files_from_list(calculation, transport, folder.abspath,
                                     retrieve_list)
            # Here I retrieved everything; now I store them inside the calculation
            retrieved_files.put_object_from_tree(folder.abspath)

        # Second, retrieve the singlefiles, if any files were specified in the 'retrieve_temporary_list' key
        if retrieve_singlefile_list:
            with SandboxFolder() as folder:
                _retrieve_singlefiles(calculation, transport, folder,
                                      retrieve_singlefile_list, logger_extra)

        # Retrieve the temporary files in the retrieved_temporary_folder if any files were
        # specified in the 'retrieve_temporary_list' key
        if retrieve_temporary_list:
            retrieve_files_from_list(calculation, transport,
                                     retrieved_temporary_folder,
                                     retrieve_temporary_list)

            # Log the files that were retrieved in the temporary folder
            for filename in os.listdir(retrieved_temporary_folder):
                EXEC_LOGGER.debug(
                    f"[retrieval of calc {calculation.pk}] Retrieved temporary file or folder '{filename}'",
                    extra=logger_extra)

        # Store everything
        EXEC_LOGGER.debug(
            f'[retrieval of calc {calculation.pk}] Storing retrieved_files={retrieved_files.pk}',
            extra=logger_extra)
        retrieved_files.store()

    # Make sure that attaching the `retrieved` folder with a link is the last thing we do. This gives the biggest chance
    # of making this method idempotent. That is to say, if a runner gets interrupted during this action, it will simply
    # retry the retrieval, unless we got here and managed to link it up, in which case we move to the next task.
    retrieved_files.add_incoming(calculation,
                                 link_type=LinkType.CREATE,
                                 link_label=calculation.link_label_retrieved)
Ejemplo n.º 3
0
def stash_calculation(calculation: CalcJobNode, transport: Transport) -> None:
    """Stash files from the working directory of a completed calculation to a permanent remote folder.

    After a calculation has been completed, optionally stash files from the work directory to a storage location on the
    same remote machine. This is useful if one wants to keep certain files from a completed calculation to be removed
    from the scratch directory, because they are necessary for restarts, but that are too heavy to retrieve.
    Instructions of which files to copy where are retrieved from the `stash.source_list` option.

    :param calculation: the calculation job node.
    :param transport: an already opened transport.
    """
    from aiida.common.datastructures import StashMode
    from aiida.orm import RemoteStashFolderData

    logger_extra = get_dblogger_extra(calculation)

    stash_options = calculation.get_option('stash')
    stash_mode = stash_options.get('mode', StashMode.COPY.value)
    source_list = stash_options.get('source_list', [])

    if not source_list:
        return

    if stash_mode != StashMode.COPY.value:
        EXEC_LOGGER.warning(
            f'stashing mode {stash_mode} is not implemented yet.')
        return

    cls = RemoteStashFolderData

    EXEC_LOGGER.debug(
        f'stashing files for calculation<{calculation.pk}>: {source_list}',
        extra=logger_extra)

    uuid = calculation.uuid
    target_basepath = os.path.join(stash_options['target_base'], uuid[:2],
                                   uuid[2:4], uuid[4:])

    for source_filename in source_list:

        source_filepath = os.path.join(calculation.get_remote_workdir(),
                                       source_filename)
        target_filepath = os.path.join(target_basepath, source_filename)

        # If the source file is in a (nested) directory, create those directories first in the target directory
        target_dirname = os.path.dirname(target_filepath)
        transport.makedirs(target_dirname, ignore_existing=True)

        try:
            transport.copy(source_filepath, target_filepath)
        except (IOError, ValueError) as exception:
            EXEC_LOGGER.warning(
                f'failed to stash {source_filepath} to {target_filepath}: {exception}'
            )
        else:
            EXEC_LOGGER.debug(
                f'stashed {source_filepath} to {target_filepath}')

    remote_stash = cls(
        computer=calculation.computer,
        target_basepath=target_basepath,
        stash_mode=StashMode(stash_mode),
        source_list=source_list,
    ).store()
    remote_stash.add_incoming(calculation,
                              link_type=LinkType.CREATE,
                              link_label='remote_stash')