예제 #1
0
    def test_get_scheduler_stderr(self):
        """Verify that the repository sandbox folder is cleaned after the node instance is garbage collected."""
        option_key = 'scheduler_stderr'
        option_value = '_scheduler-error.txt'
        stderr = 'some\nstandard error'

        # Note: cannot use pytest.mark.parametrize in unittest classes, so I just do a loop here
        for with_file in [True, False]:
            for with_option in [True, False]:
                node = CalcJobNode(computer=self.computer, )
                node.set_option('resources', {
                    'num_machines': 1,
                    'num_mpiprocs_per_machine': 1
                })
                retrieved = FolderData()

                if with_file:
                    retrieved.put_object_from_filelike(io.StringIO(stderr),
                                                       option_value)
                if with_option:
                    node.set_option(option_key, option_value)
                node.store()
                retrieved.store()
                retrieved.add_incoming(node,
                                       link_type=LinkType.CREATE,
                                       link_label='retrieved')

                # It should return `None` if no scheduler output is there (file not there, or option not set),
                # while it should return the content if both are set
                self.assertEqual(node.get_scheduler_stderr(),
                                 stderr if with_file and with_option else None)
예제 #2
0
    def _inner(file_path, input_settings=None):
        # Create a test computer
        computer = localhost

        process_type = 'aiida.calculations:{}'.format('vasp.vasp')

        node = CalcJobNode(computer=computer, process_type=process_type)
        node.set_attribute('input_filename', 'INCAR')
        node.set_attribute('output_filename', 'OUTCAR')
        #node.set_attribute('error_filename', 'aiida.err')
        node.set_attribute('scheduler_stderr', '_scheduler-stderr.txt')
        node.set_attribute('scheduler_stdout', '_scheduler-stdout.txt')
        node.set_option('resources', {
            'num_machines': 1,
            'num_mpiprocs_per_machine': 1
        })
        node.set_option('max_wallclock_seconds', 1800)

        if input_settings is None:
            input_settings = {}

        settings = Dict(dict=input_settings)
        node.add_incoming(settings,
                          link_type=LinkType.INPUT_CALC,
                          link_label='settings')
        settings.store()
        node.store()

        # Create a `FolderData` that will represent the `retrieved` folder. Store the test
        # output fixture in there and link it.
        retrieved = FolderData()
        retrieved.put_object_from_tree(file_path)
        retrieved.add_incoming(node,
                               link_type=LinkType.CREATE,
                               link_label='retrieved')
        retrieved.store()

        return node
예제 #3
0
    def test_get_scheduler_stderr(self):
        """Verify that the repository sandbox folder is cleaned after the node instance is garbage collected."""
        option_key = 'scheduler_stderr'
        option_value = '_scheduler-error.txt'
        stderr = 'some\nstandard error'

        node = CalcJobNode(computer=self.computer, )
        node.set_option('resources', {
            'num_machines': 1,
            'num_mpiprocs_per_machine': 1
        })
        retrieved = FolderData()

        # No scheduler error filename option so should return `None`
        self.assertEqual(node.get_scheduler_stderr(), None)

        # No retrieved folder so should return `None`
        node.set_option(option_key, option_value)
        self.assertEqual(node.get_scheduler_stderr(), None)

        # Now it has retrieved folder, but file does not actually exist in it, should not except but return `None
        node.store()
        retrieved.store()
        retrieved.add_incoming(node,
                               link_type=LinkType.CREATE,
                               link_label='retrieved')
        self.assertEqual(node.get_scheduler_stderr(), None)

        # Add the file to the retrieved folder
        with tempfile.NamedTemporaryFile(mode='w+') as handle:
            handle.write(stderr)
            handle.flush()
            handle.seek(0)
            retrieved.put_object_from_filelike(handle,
                                               option_value,
                                               force=True)
        self.assertEqual(node.get_scheduler_stderr(), stderr)
예제 #4
0
    def _fixture_calc_job_node(entry_point_name, computer, test_name, attributes=None):
        """Fixture to generate a mock `CalcJobNode` for testing parsers.

        :param entry_point_name: entry point name of the calculation class
        :param computer: a `Computer` instance
        :param test_name: relative path of directory with test output files in the `fixtures/{entry_point_name}` folder
        :param attributes: any optional attributes to set on the node
        :return: `CalcJobNode` instance with an attached `FolderData` as the `retrieved` node
        """
        from aiida.common.links import LinkType
        from aiida.orm import CalcJobNode, FolderData
        from aiida.plugins.entry_point import format_entry_point_string

        entry_point = format_entry_point_string('aiida.calculations', entry_point_name)

        node = CalcJobNode(computer=computer, process_type=entry_point)
        node.set_attribute('input_filename', 'aiida.in')
        node.set_attribute('output_filename', 'aiida.out')
        node.set_attribute('error_filename', 'aiida.err')
        node.set_option('resources', {'num_machines': 1, 'num_mpiprocs_per_machine': 1})
        node.set_option('max_wallclock_seconds', 1800)

        if attributes:
            node.set_attribute_many(attributes)

        node.store()

        basepath = os.path.dirname(os.path.abspath(__file__))
        filepath = os.path.join(basepath, 'parsers', 'fixtures', entry_point_name[len('codtools.'):], test_name)

        retrieved = FolderData()
        retrieved.put_object_from_tree(filepath)
        retrieved.add_incoming(node, link_type=LinkType.CREATE, link_label='retrieved')
        retrieved.store()

        return node
예제 #5
0
def retrieve_calculation(calculation, transport, retrieved_temporary_folder):
    """Retrieve all the files of a completed job calculation using the given transport.

    If the job defined anything in the `retrieve_temporary_list`, those entries will be stored in the
    `retrieved_temporary_folder`. The caller is responsible for creating and destroying this folder.

    :param calculation: the instance of CalcJobNode to update.
    :param transport: an already opened transport to use for the retrieval.
    :param retrieved_temporary_folder: the absolute path to a directory in which to store the files
        listed, if any, in the `retrieved_temporary_folder` of the jobs CalcInfo
    """
    logger_extra = get_dblogger_extra(calculation)
    workdir = calculation.get_remote_workdir()

    execlogger.debug('Retrieving calc {}'.format(calculation.pk),
                     extra=logger_extra)
    execlogger.debug('[retrieval of calc {}] chdir {}'.format(
        calculation.pk, workdir),
                     extra=logger_extra)

    # If the calculation already has a `retrieved` folder, simply return. The retrieval was apparently already completed
    # before, which can happen if the daemon is restarted and it shuts down after retrieving but before getting the
    # chance to perform the state transition. Upon reloading this calculation, it will re-attempt the retrieval.
    link_label = calculation.link_label_retrieved
    if calculation.get_outgoing(FolderData,
                                link_label_filter=link_label).first():
        execlogger.warning(
            'CalcJobNode<{}> already has a `{}` output folder: skipping retrieval'
            .format(calculation.pk, link_label))
        return

    # Create the FolderData node into which to store the files that are to be retrieved
    retrieved_files = FolderData()

    with transport:
        transport.chdir(workdir)

        # First, retrieve the files of folderdata
        retrieve_list = calculation.get_retrieve_list()
        retrieve_temporary_list = calculation.get_retrieve_temporary_list()
        retrieve_singlefile_list = calculation.get_retrieve_singlefile_list()

        with SandboxFolder() as folder:
            retrieve_files_from_list(calculation, transport, folder.abspath,
                                     retrieve_list)
            # Here I retrieved everything; now I store them inside the calculation
            retrieved_files.put_object_from_tree(folder.abspath)

        # Second, retrieve the singlefiles, if any files were specified in the 'retrieve_temporary_list' key
        if retrieve_singlefile_list:
            with SandboxFolder() as folder:
                _retrieve_singlefiles(calculation, transport, folder,
                                      retrieve_singlefile_list, logger_extra)

        # Retrieve the temporary files in the retrieved_temporary_folder if any files were
        # specified in the 'retrieve_temporary_list' key
        if retrieve_temporary_list:
            retrieve_files_from_list(calculation, transport,
                                     retrieved_temporary_folder,
                                     retrieve_temporary_list)

            # Log the files that were retrieved in the temporary folder
            for filename in os.listdir(retrieved_temporary_folder):
                execlogger.debug(
                    "[retrieval of calc {}] Retrieved temporary file or folder '{}'"
                    .format(calculation.pk, filename),
                    extra=logger_extra)

        # Store everything
        execlogger.debug('[retrieval of calc {}] '
                         'Storing retrieved_files={}'.format(
                             calculation.pk, retrieved_files.pk),
                         extra=logger_extra)
        retrieved_files.store()

    # Make sure that attaching the `retrieved` folder with a link is the last thing we do. This gives the biggest chance
    # of making this method idempotent. That is to say, if a runner gets interrupted during this action, it will simply
    # retry the retrieval, unless we got here and managed to link it up, in which case we move to the next task.
    retrieved_files.add_incoming(calculation,
                                 link_type=LinkType.CREATE,
                                 link_label=calculation.link_label_retrieved)
예제 #6
0
    def _generate_calc_job_node(
        entry_point_name,
        results_folder,
        inputs=None,
        computer=None,
        outputs=None,
        outfile_override=None,
    ):
        """
        Generate a CalcJob node with fake retrieved node in the
        tests/data
        """

        calc_class = CalculationFactory(entry_point_name)
        entry_point = format_entry_point_string('aiida.calculations',
                                                entry_point_name)
        builder = calc_class.get_builder()

        if not computer:
            computer = db_test_app.localhost
        node = CalcJobNode(computer=computer, process_type=entry_point)

        # Monkypatch the inputs
        if inputs is not None:
            inputs = AttributeDict(inputs)
            node.__dict__['inputs'] = inputs
            # Add direct inputs, pseudos are omitted
            for k, v in inputs.items():
                if isinstance(v, Node):
                    if not v.is_stored:
                        v.store()
                    node.add_incoming(v,
                                      link_type=LinkType.INPUT_CALC,
                                      link_label=k)

        options = builder.metadata.options
        options.update(inputs.metadata.options)
        node.set_attribute('input_filename', options.input_filename)
        node.set_attribute('seedname', options.seedname)
        node.set_attribute('output_filename', options.output_filename)
        node.set_attribute('error_filename', 'aiida.err')
        node.set_option('resources', {
            'num_machines': 1,
            'num_mpiprocs_per_machine': 1
        })
        node.set_option('max_wallclock_seconds', 1800)
        node.store()

        filepath = this_folder.parent / 'data' / results_folder
        retrieved = FolderData()
        retrieved.put_object_from_tree(str(filepath.resolve()))

        # Apply overriding output files
        if outfile_override is not None:
            for key, content in outfile_override.items():
                if content is None:
                    retrieved.delete_object(key)
                    continue
                buf = BytesIO(content.encode())
                retrieved.put_object_from_filelike(buf, key)

        retrieved.add_incoming(node,
                               link_type=LinkType.CREATE,
                               link_label='retrieved')
        retrieved.store()

        if outputs is not None:
            for label, out_node in outputs.items():
                out_node.add_incoming(node,
                                      link_type=LinkType.CREATE,
                                      link_label=label)
                if not out_node.is_stored:
                    out_node.store()

        return node