Example #1
0
    def test_unicode(self):
        """Check that there are no exceptions raised when using unicode folders."""
        tmpsource = tempfile.mkdtemp()
        tmpdest = tempfile.mkdtemp()

        with open(os.path.join(tmpsource, 'sąžininga'), 'w',
                  encoding='utf8') as fhandle:
            fhandle.write('test')
        with open(os.path.join(tmpsource, 'žąsis'), 'w',
                  encoding='utf8') as fhandle:
            fhandle.write('test')

        folder = Folder(tmpdest)
        folder.insert_path(tmpsource, 'destination')
        folder.insert_path(tmpsource, 'šaltinis')

        self.assertEqual(sorted(folder.get_content_list()),
                         sorted(['destination', 'šaltinis']))
        self.assertEqual(
            sorted(folder.get_subfolder('destination').get_content_list()),
            sorted(['sąžininga', 'žąsis']))
        self.assertEqual(
            sorted(folder.get_subfolder('šaltinis').get_content_list()),
            sorted(['sąžininga', 'žąsis']))

        folder = Folder(os.path.join(tmpsource, 'šaltinis'))
        folder.insert_path(tmpdest, 'destination')
        folder.insert_path(tmpdest, 'kitas-šaltinis')
        self.assertEqual(sorted(folder.get_content_list()),
                         sorted(['destination', 'kitas-šaltinis']))
Example #2
0
def _collect_files(base, path=''):
    """
    Recursively collects files from the tree, starting at a given path.
    """
    from aiida.common.folders import Folder
    from aiida.common.utils import md5_file,sha1_file
    import os
    if os.path.isdir(os.path.join(base,path)):
        folder = Folder(os.path.join(base,path))
        files_now = []
        if path != '':
            if not path.endswith(os.sep):
                path = "{}{}".format(path,os.sep)
            if path != '':
                files_now.append({
                    'name': path,
                    'type': 'folder',
                })
        for f in sorted(folder.get_content_list()):
            files = _collect_files(base,path=os.path.join(path,f))
            files_now.extend(files)
        return files_now
    else:
        with open(os.path.join(base,path)) as f:
            return [{
                'name': path,
                'contents': f.read(),
                'md5': md5_file(os.path.join(base,path)),
                'sha1': sha1_file(os.path.join(base,path)),
                'type': 'file',
                }]
Example #3
0
def _collect_files(base, path=''):
    """
    Recursively collects files from the tree, starting at a given path.
    """
    from aiida.common.folders import Folder
    from aiida.common.utils import md5_file, sha1_file
    import os

    def get_filename(file_dict):
        return file_dict['name']

    if os.path.isdir(os.path.join(base, path)):
        folder = Folder(os.path.join(base, path))
        files_now = []
        if path != '':
            if not path.endswith(os.sep):
                path = "{}{}".format(path, os.sep)
            if path != '':
                files_now.append({
                    'name': path,
                    'type': 'folder',
                })
        for f in folder.get_content_list():
            files = _collect_files(base, path=os.path.join(path, f))
            files_now.extend(files)
        return sorted(files_now, key=get_filename)
    elif path == '.aiida/calcinfo.json':
        files = []
        with open(os.path.join(base, path)) as f:
            files.append({
                'name': path,
                'contents': f.read(),
                'md5': md5_file(os.path.join(base, path)),
                'sha1': sha1_file(os.path.join(base, path)),
                'type': 'file',
            })
        import json
        with open(os.path.join(base, path)) as f:
            calcinfo = json.load(f)
        if 'local_copy_list' in calcinfo:
            for local_copy in calcinfo['local_copy_list']:
                with open(local_copy[0]) as f:
                    files.append({
                        'name': os.path.normpath(local_copy[1]),
                        'contents': f.read(),
                        'md5': md5_file(local_copy[0]),
                        'sha1': sha1_file(local_copy[0]),
                        'type': 'file',
                    })
        return files
    else:
        with open(os.path.join(base, path)) as f:
            return [{
                'name': path,
                'contents': f.read(),
                'md5': md5_file(os.path.join(base, path)),
                'sha1': sha1_file(os.path.join(base, path)),
                'type': 'file',
            }]
Example #4
0
def test_prepare(vasp_calc, vasp_chgcar, vasp_wavecar, vasp_inputs,
                 localhost_dir):
    """Check that preparing creates all necessary files."""
    from aiida.common.folders import Folder
    from aiida_vasp.calcs.vasp import VaspCalculation
    wavecar, _ = vasp_wavecar
    chgcar, _ = vasp_chgcar

    inputs_dict = {
        'gga': 'PE',
        'gga_compat': False,
        'lorbit': 11,
        'sigma': 0.5,
        'magmom': '30 * 2*0.',
        'icharg': 11
    }

    inputs = vasp_inputs(parameters=inputs_dict)
    inputs.charge_density = chgcar
    inputs.wavefunctions = wavecar

    calc = vasp_calc(inputs=inputs)
    temp_folder = Folder(str(localhost_dir.parent))
    calcinfo = calc.prepare_for_submission(temp_folder)
    input_files = temp_folder.get_content_list()

    for file_name in ['INCAR', 'KPOINTS', 'POSCAR', 'POTCAR']:
        assert file_name in input_files

    assert 'EIGENVAL' in calcinfo.retrieve_list
    assert 'DOSCAR' in calcinfo.retrieve_list
    assert 'wannier90*' in calcinfo.retrieve_list

    assert calcinfo.codes_info[0].stdout_name == VaspCalculation._VASP_OUTPUT
    assert calcinfo.codes_info[0].join_files is True

    inputs_dict.update({'icharg': 2})

    inputs = vasp_inputs(parameters=inputs_dict)
    inputs.charge_density = chgcar
    inputs.wavefunctions = wavecar

    calc = vasp_calc(inputs=inputs)
    temp_folder = Folder(str(localhost_dir.parent))

    calcinfo = calc.prepare_for_submission(temp_folder)

    assert 'WAVECAR' in [item[1] for item in calcinfo.local_copy_list]
Example #5
0
    def presubmit(self, folder: Folder) -> CalcInfo:
        """Prepares the calculation folder with all inputs, ready to be copied to the cluster.

        :param folder: a SandboxFolder that can be used to write calculation input files and the scheduling script.

        :return calcinfo: the CalcInfo object containing the information needed by the daemon to handle operations.

        """
        # pylint: disable=too-many-locals,too-many-statements,too-many-branches
        from aiida.common.exceptions import PluginInternalError, ValidationError, InvalidOperation, InputValidationError
        from aiida.common import json
        from aiida.common.utils import validate_list_of_string_tuples
        from aiida.common.datastructures import CodeInfo, CodeRunMode
        from aiida.orm import load_node, Code, Computer
        from aiida.plugins import DataFactory
        from aiida.schedulers.datastructures import JobTemplate

        computer = self.node.computer
        inputs = self.node.get_incoming(link_type=LinkType.INPUT_CALC)

        if not self.inputs.metadata.dry_run and self.node.has_cached_links(
        ):  # type: ignore[union-attr]
            raise InvalidOperation(
                'calculation node has unstored links in cache')

        codes = [_ for _ in inputs.all_nodes() if isinstance(_, Code)]

        for code in codes:
            if not code.can_run_on(computer):
                raise InputValidationError(
                    'The selected code {} for calculation {} cannot run on computer {}'
                    .format(code.pk, self.node.pk, computer.label))

            if code.is_local() and code.get_local_executable(
            ) in folder.get_content_list():
                raise PluginInternalError(
                    f'The plugin created a file {code.get_local_executable()} that is also the executable name!'
                )

        calc_info = self.prepare_for_submission(folder)
        calc_info.uuid = str(self.node.uuid)
        scheduler = computer.get_scheduler()

        # I create the job template to pass to the scheduler
        job_tmpl = JobTemplate()
        job_tmpl.shebang = computer.get_shebang()
        job_tmpl.submit_as_hold = False
        job_tmpl.rerunnable = False
        job_tmpl.job_environment = {}
        # 'email', 'email_on_started', 'email_on_terminated',
        job_tmpl.job_name = f'aiida-{self.node.pk}'
        job_tmpl.sched_output_path = self.options.scheduler_stdout
        if self.options.scheduler_stderr == self.options.scheduler_stdout:
            job_tmpl.sched_join_files = True
        else:
            job_tmpl.sched_error_path = self.options.scheduler_stderr
            job_tmpl.sched_join_files = False

        # Set retrieve path, add also scheduler STDOUT and STDERR
        retrieve_list = calc_info.retrieve_list or []
        if (job_tmpl.sched_output_path is not None
                and job_tmpl.sched_output_path not in retrieve_list):
            retrieve_list.append(job_tmpl.sched_output_path)
        if not job_tmpl.sched_join_files:
            if (job_tmpl.sched_error_path is not None
                    and job_tmpl.sched_error_path not in retrieve_list):
                retrieve_list.append(job_tmpl.sched_error_path)
        retrieve_list.extend(
            self.node.get_option('additional_retrieve_list') or [])
        self.node.set_retrieve_list(retrieve_list)

        retrieve_singlefile_list = calc_info.retrieve_singlefile_list or []
        # a validation on the subclasses of retrieve_singlefile_list
        for _, subclassname, _ in retrieve_singlefile_list:
            file_sub_class = DataFactory(subclassname)
            if not issubclass(file_sub_class, orm.SinglefileData):
                raise PluginInternalError(
                    '[presubmission of calc {}] retrieve_singlefile_list subclass problem: {} is '
                    'not subclass of SinglefileData'.format(
                        self.node.pk, file_sub_class.__name__))
        if retrieve_singlefile_list:
            self.node.set_retrieve_singlefile_list(retrieve_singlefile_list)

        # Handle the retrieve_temporary_list
        retrieve_temporary_list = calc_info.retrieve_temporary_list or []
        self.node.set_retrieve_temporary_list(retrieve_temporary_list)

        # the if is done so that if the method returns None, this is
        # not added. This has two advantages:
        # - it does not add too many \n\n if most of the prepend_text are empty
        # - most importantly, skips the cases in which one of the methods
        #   would return None, in which case the join method would raise
        #   an exception
        prepend_texts = [computer.get_prepend_text()] + \
            [code.get_prepend_text() for code in codes] + \
            [calc_info.prepend_text, self.node.get_option('prepend_text')]
        job_tmpl.prepend_text = '\n\n'.join(prepend_text
                                            for prepend_text in prepend_texts
                                            if prepend_text)

        append_texts = [self.node.get_option('append_text'), calc_info.append_text] + \
            [code.get_append_text() for code in codes] + \
            [computer.get_append_text()]
        job_tmpl.append_text = '\n\n'.join(append_text
                                           for append_text in append_texts
                                           if append_text)

        # Set resources, also with get_default_mpiprocs_per_machine
        resources = self.node.get_option('resources')
        scheduler.preprocess_resources(
            resources, computer.get_default_mpiprocs_per_machine())
        job_tmpl.job_resource = scheduler.create_job_resource(**resources)

        subst_dict = {
            'tot_num_mpiprocs': job_tmpl.job_resource.get_tot_num_mpiprocs()
        }

        for key, value in job_tmpl.job_resource.items():
            subst_dict[key] = value
        mpi_args = [
            arg.format(**subst_dict) for arg in computer.get_mpirun_command()
        ]
        extra_mpirun_params = self.node.get_option(
            'mpirun_extra_params')  # same for all codes in the same calc

        # set the codes_info
        if not isinstance(calc_info.codes_info, (list, tuple)):
            raise PluginInternalError(
                'codes_info passed to CalcInfo must be a list of CalcInfo objects'
            )

        codes_info = []
        for code_info in calc_info.codes_info:

            if not isinstance(code_info, CodeInfo):
                raise PluginInternalError(
                    'Invalid codes_info, must be a list of CodeInfo objects')

            if code_info.code_uuid is None:
                raise PluginInternalError(
                    'CalcInfo should have the information of the code to be launched'
                )
            this_code = load_node(code_info.code_uuid, sub_classes=(Code, ))

            this_withmpi = code_info.withmpi  # to decide better how to set the default
            if this_withmpi is None:
                if len(calc_info.codes_info) > 1:
                    raise PluginInternalError(
                        'For more than one code, it is necessary to set withmpi in codes_info'
                    )
                else:
                    this_withmpi = self.node.get_option('withmpi')

            if this_withmpi:
                this_argv = (mpi_args + extra_mpirun_params +
                             [this_code.get_execname()] +
                             (code_info.cmdline_params
                              if code_info.cmdline_params is not None else []))
            else:
                this_argv = [this_code.get_execname()
                             ] + (code_info.cmdline_params if
                                  code_info.cmdline_params is not None else [])

            # overwrite the old cmdline_params and add codename and mpirun stuff
            code_info.cmdline_params = this_argv

            codes_info.append(code_info)
        job_tmpl.codes_info = codes_info

        # set the codes execution mode

        if len(codes) > 1:
            try:
                job_tmpl.codes_run_mode = calc_info.codes_run_mode
            except KeyError as exc:
                raise PluginInternalError(
                    'Need to set the order of the code execution (parallel or serial?)'
                ) from exc
        else:
            job_tmpl.codes_run_mode = CodeRunMode.SERIAL
        ########################################################################

        custom_sched_commands = self.node.get_option(
            'custom_scheduler_commands')
        if custom_sched_commands:
            job_tmpl.custom_scheduler_commands = custom_sched_commands

        job_tmpl.import_sys_environment = self.node.get_option(
            'import_sys_environment')

        job_tmpl.job_environment = self.node.get_option(
            'environment_variables')

        queue_name = self.node.get_option('queue_name')
        account = self.node.get_option('account')
        qos = self.node.get_option('qos')
        if queue_name is not None:
            job_tmpl.queue_name = queue_name
        if account is not None:
            job_tmpl.account = account
        if qos is not None:
            job_tmpl.qos = qos
        priority = self.node.get_option('priority')
        if priority is not None:
            job_tmpl.priority = priority
        max_memory_kb = self.node.get_option('max_memory_kb')
        if max_memory_kb is not None:
            job_tmpl.max_memory_kb = max_memory_kb
        max_wallclock_seconds = self.node.get_option('max_wallclock_seconds')
        if max_wallclock_seconds is not None:
            job_tmpl.max_wallclock_seconds = max_wallclock_seconds
        max_memory_kb = self.node.get_option('max_memory_kb')
        if max_memory_kb is not None:
            job_tmpl.max_memory_kb = max_memory_kb

        submit_script_filename = self.node.get_option('submit_script_filename')
        script_content = scheduler.get_submit_script(job_tmpl)
        folder.create_file_from_filelike(io.StringIO(script_content),
                                         submit_script_filename,
                                         'w',
                                         encoding='utf8')

        subfolder = folder.get_subfolder('.aiida', create=True)
        subfolder.create_file_from_filelike(io.StringIO(json.dumps(job_tmpl)),
                                            'job_tmpl.json',
                                            'w',
                                            encoding='utf8')
        subfolder.create_file_from_filelike(io.StringIO(json.dumps(calc_info)),
                                            'calcinfo.json',
                                            'w',
                                            encoding='utf8')

        if calc_info.local_copy_list is None:
            calc_info.local_copy_list = []

        if calc_info.remote_copy_list is None:
            calc_info.remote_copy_list = []

        # Some validation
        this_pk = self.node.pk if self.node.pk is not None else '[UNSTORED]'
        local_copy_list = calc_info.local_copy_list
        try:
            validate_list_of_string_tuples(local_copy_list, tuple_length=3)
        except ValidationError as exception:
            raise PluginInternalError(
                f'[presubmission of calc {this_pk}] local_copy_list format problem: {exception}'
            ) from exception

        remote_copy_list = calc_info.remote_copy_list
        try:
            validate_list_of_string_tuples(remote_copy_list, tuple_length=3)
        except ValidationError as exception:
            raise PluginInternalError(
                f'[presubmission of calc {this_pk}] remote_copy_list format problem: {exception}'
            ) from exception

        for (remote_computer_uuid, _, dest_rel_path) in remote_copy_list:
            try:
                Computer.objects.get(uuid=remote_computer_uuid)  # pylint: disable=unused-variable
            except exceptions.NotExistent as exception:
                raise PluginInternalError(
                    '[presubmission of calc {}] '
                    'The remote copy requires a computer with UUID={}'
                    'but no such computer was found in the '
                    'database'.format(this_pk,
                                      remote_computer_uuid)) from exception
            if os.path.isabs(dest_rel_path):
                raise PluginInternalError(
                    '[presubmission of calc {}] '
                    'The destination path of the remote copy '
                    'is absolute! ({})'.format(this_pk, dest_rel_path))

        return calc_info
Example #6
0
    def dryrun_test(cls, inputs, castep_exe='castep.serial', verbose=True):
        """
        Do a dryrun test in a folder with prepared builder or inputs
        """

        if isinstance(inputs, ProcessBuilder):
            res = cls.submit_test(inputs)
        else:
            res = cls.submit_test(cls, **inputs)
        folder = Folder(res[1])
        dry_run_node = res[0]
        seedname = dry_run_node.get_option('seedname')

        def _print(inp):
            if verbose:
                print(inp)

        # Do a dryrun
        try:
            output = check_output([castep_exe, "-v"], universal_newlines=True)
        except OSError:
            _print("CASTEP executable '{}' is not found".format(castep_exe))
            return None

        # Now start dryrun
        _print("Running with {}".format(
            check_output(["which", castep_exe], universal_newlines=True)))
        _print(output)

        _print("Starting dryrun...")
        call([castep_exe, "--dryrun", seedname], cwd=folder.abspath)

        # Check if any *err files
        contents = folder.get_content_list()
        for fname in contents:
            if fnmatch(fname, "*.err"):
                with folder.open(fname) as fhandle:
                    _print("Error found in {}:\fname".format(fname))
                    _print(fhandle.read())
                raise InputValidationError("Error found during dryrun")

        # Gather information from the dryrun file
        dryrun_results = {}
        out_file = seedname + '.castep'
        with folder.open(out_file) as fhandle:
            for line in fhandle:
                mth = re.match(r"\s*k-Points For SCF Sampling:\s+(\d+)\s*",
                               line)
                if mth:
                    dryrun_results["num_kpoints"] = int(mth.group(1))
                    _print("Number of k-points: {}".format(mth.group(1)))
                    mth = None
                    continue
                mth = re.match(
                    r"\| Approx\. total storage required"
                    r" per process\s+([0-9.]+)\sMB\s+([0-9.]+)", line)
                if mth:
                    dryrun_results["memory_MB"] = (float(mth.group(1)))
                    dryrun_results["disk_MB"] = (float(mth.group(2)))
                    _print("RAM: {} MB, DISK: {} MB".format(
                        mth.group(1), mth.group(2)))
                    mth = None
                    continue

        return folder, dryrun_results