def test_unicode(self): """Check that there are no exceptions raised when using unicode folders.""" tmpsource = tempfile.mkdtemp() tmpdest = tempfile.mkdtemp() with open(os.path.join(tmpsource, 'sąžininga'), 'w', encoding='utf8') as fhandle: fhandle.write('test') with open(os.path.join(tmpsource, 'žąsis'), 'w', encoding='utf8') as fhandle: fhandle.write('test') folder = Folder(tmpdest) folder.insert_path(tmpsource, 'destination') folder.insert_path(tmpsource, 'šaltinis') self.assertEqual(sorted(folder.get_content_list()), sorted(['destination', 'šaltinis'])) self.assertEqual( sorted(folder.get_subfolder('destination').get_content_list()), sorted(['sąžininga', 'žąsis'])) self.assertEqual( sorted(folder.get_subfolder('šaltinis').get_content_list()), sorted(['sąžininga', 'žąsis'])) folder = Folder(os.path.join(tmpsource, 'šaltinis')) folder.insert_path(tmpdest, 'destination') folder.insert_path(tmpdest, 'kitas-šaltinis') self.assertEqual(sorted(folder.get_content_list()), sorted(['destination', 'kitas-šaltinis']))
def _collect_files(base, path=''): """ Recursively collects files from the tree, starting at a given path. """ from aiida.common.folders import Folder from aiida.common.utils import md5_file,sha1_file import os if os.path.isdir(os.path.join(base,path)): folder = Folder(os.path.join(base,path)) files_now = [] if path != '': if not path.endswith(os.sep): path = "{}{}".format(path,os.sep) if path != '': files_now.append({ 'name': path, 'type': 'folder', }) for f in sorted(folder.get_content_list()): files = _collect_files(base,path=os.path.join(path,f)) files_now.extend(files) return files_now else: with open(os.path.join(base,path)) as f: return [{ 'name': path, 'contents': f.read(), 'md5': md5_file(os.path.join(base,path)), 'sha1': sha1_file(os.path.join(base,path)), 'type': 'file', }]
def _collect_files(base, path=''): """ Recursively collects files from the tree, starting at a given path. """ from aiida.common.folders import Folder from aiida.common.utils import md5_file, sha1_file import os def get_filename(file_dict): return file_dict['name'] if os.path.isdir(os.path.join(base, path)): folder = Folder(os.path.join(base, path)) files_now = [] if path != '': if not path.endswith(os.sep): path = "{}{}".format(path, os.sep) if path != '': files_now.append({ 'name': path, 'type': 'folder', }) for f in folder.get_content_list(): files = _collect_files(base, path=os.path.join(path, f)) files_now.extend(files) return sorted(files_now, key=get_filename) elif path == '.aiida/calcinfo.json': files = [] with open(os.path.join(base, path)) as f: files.append({ 'name': path, 'contents': f.read(), 'md5': md5_file(os.path.join(base, path)), 'sha1': sha1_file(os.path.join(base, path)), 'type': 'file', }) import json with open(os.path.join(base, path)) as f: calcinfo = json.load(f) if 'local_copy_list' in calcinfo: for local_copy in calcinfo['local_copy_list']: with open(local_copy[0]) as f: files.append({ 'name': os.path.normpath(local_copy[1]), 'contents': f.read(), 'md5': md5_file(local_copy[0]), 'sha1': sha1_file(local_copy[0]), 'type': 'file', }) return files else: with open(os.path.join(base, path)) as f: return [{ 'name': path, 'contents': f.read(), 'md5': md5_file(os.path.join(base, path)), 'sha1': sha1_file(os.path.join(base, path)), 'type': 'file', }]
def test_prepare(vasp_calc, vasp_chgcar, vasp_wavecar, vasp_inputs, localhost_dir): """Check that preparing creates all necessary files.""" from aiida.common.folders import Folder from aiida_vasp.calcs.vasp import VaspCalculation wavecar, _ = vasp_wavecar chgcar, _ = vasp_chgcar inputs_dict = { 'gga': 'PE', 'gga_compat': False, 'lorbit': 11, 'sigma': 0.5, 'magmom': '30 * 2*0.', 'icharg': 11 } inputs = vasp_inputs(parameters=inputs_dict) inputs.charge_density = chgcar inputs.wavefunctions = wavecar calc = vasp_calc(inputs=inputs) temp_folder = Folder(str(localhost_dir.parent)) calcinfo = calc.prepare_for_submission(temp_folder) input_files = temp_folder.get_content_list() for file_name in ['INCAR', 'KPOINTS', 'POSCAR', 'POTCAR']: assert file_name in input_files assert 'EIGENVAL' in calcinfo.retrieve_list assert 'DOSCAR' in calcinfo.retrieve_list assert 'wannier90*' in calcinfo.retrieve_list assert calcinfo.codes_info[0].stdout_name == VaspCalculation._VASP_OUTPUT assert calcinfo.codes_info[0].join_files is True inputs_dict.update({'icharg': 2}) inputs = vasp_inputs(parameters=inputs_dict) inputs.charge_density = chgcar inputs.wavefunctions = wavecar calc = vasp_calc(inputs=inputs) temp_folder = Folder(str(localhost_dir.parent)) calcinfo = calc.prepare_for_submission(temp_folder) assert 'WAVECAR' in [item[1] for item in calcinfo.local_copy_list]
def presubmit(self, folder: Folder) -> CalcInfo: """Prepares the calculation folder with all inputs, ready to be copied to the cluster. :param folder: a SandboxFolder that can be used to write calculation input files and the scheduling script. :return calcinfo: the CalcInfo object containing the information needed by the daemon to handle operations. """ # pylint: disable=too-many-locals,too-many-statements,too-many-branches from aiida.common.exceptions import PluginInternalError, ValidationError, InvalidOperation, InputValidationError from aiida.common import json from aiida.common.utils import validate_list_of_string_tuples from aiida.common.datastructures import CodeInfo, CodeRunMode from aiida.orm import load_node, Code, Computer from aiida.plugins import DataFactory from aiida.schedulers.datastructures import JobTemplate computer = self.node.computer inputs = self.node.get_incoming(link_type=LinkType.INPUT_CALC) if not self.inputs.metadata.dry_run and self.node.has_cached_links( ): # type: ignore[union-attr] raise InvalidOperation( 'calculation node has unstored links in cache') codes = [_ for _ in inputs.all_nodes() if isinstance(_, Code)] for code in codes: if not code.can_run_on(computer): raise InputValidationError( 'The selected code {} for calculation {} cannot run on computer {}' .format(code.pk, self.node.pk, computer.label)) if code.is_local() and code.get_local_executable( ) in folder.get_content_list(): raise PluginInternalError( f'The plugin created a file {code.get_local_executable()} that is also the executable name!' ) calc_info = self.prepare_for_submission(folder) calc_info.uuid = str(self.node.uuid) scheduler = computer.get_scheduler() # I create the job template to pass to the scheduler job_tmpl = JobTemplate() job_tmpl.shebang = computer.get_shebang() job_tmpl.submit_as_hold = False job_tmpl.rerunnable = False job_tmpl.job_environment = {} # 'email', 'email_on_started', 'email_on_terminated', job_tmpl.job_name = f'aiida-{self.node.pk}' job_tmpl.sched_output_path = self.options.scheduler_stdout if self.options.scheduler_stderr == self.options.scheduler_stdout: job_tmpl.sched_join_files = True else: job_tmpl.sched_error_path = self.options.scheduler_stderr job_tmpl.sched_join_files = False # Set retrieve path, add also scheduler STDOUT and STDERR retrieve_list = calc_info.retrieve_list or [] if (job_tmpl.sched_output_path is not None and job_tmpl.sched_output_path not in retrieve_list): retrieve_list.append(job_tmpl.sched_output_path) if not job_tmpl.sched_join_files: if (job_tmpl.sched_error_path is not None and job_tmpl.sched_error_path not in retrieve_list): retrieve_list.append(job_tmpl.sched_error_path) retrieve_list.extend( self.node.get_option('additional_retrieve_list') or []) self.node.set_retrieve_list(retrieve_list) retrieve_singlefile_list = calc_info.retrieve_singlefile_list or [] # a validation on the subclasses of retrieve_singlefile_list for _, subclassname, _ in retrieve_singlefile_list: file_sub_class = DataFactory(subclassname) if not issubclass(file_sub_class, orm.SinglefileData): raise PluginInternalError( '[presubmission of calc {}] retrieve_singlefile_list subclass problem: {} is ' 'not subclass of SinglefileData'.format( self.node.pk, file_sub_class.__name__)) if retrieve_singlefile_list: self.node.set_retrieve_singlefile_list(retrieve_singlefile_list) # Handle the retrieve_temporary_list retrieve_temporary_list = calc_info.retrieve_temporary_list or [] self.node.set_retrieve_temporary_list(retrieve_temporary_list) # the if is done so that if the method returns None, this is # not added. This has two advantages: # - it does not add too many \n\n if most of the prepend_text are empty # - most importantly, skips the cases in which one of the methods # would return None, in which case the join method would raise # an exception prepend_texts = [computer.get_prepend_text()] + \ [code.get_prepend_text() for code in codes] + \ [calc_info.prepend_text, self.node.get_option('prepend_text')] job_tmpl.prepend_text = '\n\n'.join(prepend_text for prepend_text in prepend_texts if prepend_text) append_texts = [self.node.get_option('append_text'), calc_info.append_text] + \ [code.get_append_text() for code in codes] + \ [computer.get_append_text()] job_tmpl.append_text = '\n\n'.join(append_text for append_text in append_texts if append_text) # Set resources, also with get_default_mpiprocs_per_machine resources = self.node.get_option('resources') scheduler.preprocess_resources( resources, computer.get_default_mpiprocs_per_machine()) job_tmpl.job_resource = scheduler.create_job_resource(**resources) subst_dict = { 'tot_num_mpiprocs': job_tmpl.job_resource.get_tot_num_mpiprocs() } for key, value in job_tmpl.job_resource.items(): subst_dict[key] = value mpi_args = [ arg.format(**subst_dict) for arg in computer.get_mpirun_command() ] extra_mpirun_params = self.node.get_option( 'mpirun_extra_params') # same for all codes in the same calc # set the codes_info if not isinstance(calc_info.codes_info, (list, tuple)): raise PluginInternalError( 'codes_info passed to CalcInfo must be a list of CalcInfo objects' ) codes_info = [] for code_info in calc_info.codes_info: if not isinstance(code_info, CodeInfo): raise PluginInternalError( 'Invalid codes_info, must be a list of CodeInfo objects') if code_info.code_uuid is None: raise PluginInternalError( 'CalcInfo should have the information of the code to be launched' ) this_code = load_node(code_info.code_uuid, sub_classes=(Code, )) this_withmpi = code_info.withmpi # to decide better how to set the default if this_withmpi is None: if len(calc_info.codes_info) > 1: raise PluginInternalError( 'For more than one code, it is necessary to set withmpi in codes_info' ) else: this_withmpi = self.node.get_option('withmpi') if this_withmpi: this_argv = (mpi_args + extra_mpirun_params + [this_code.get_execname()] + (code_info.cmdline_params if code_info.cmdline_params is not None else [])) else: this_argv = [this_code.get_execname() ] + (code_info.cmdline_params if code_info.cmdline_params is not None else []) # overwrite the old cmdline_params and add codename and mpirun stuff code_info.cmdline_params = this_argv codes_info.append(code_info) job_tmpl.codes_info = codes_info # set the codes execution mode if len(codes) > 1: try: job_tmpl.codes_run_mode = calc_info.codes_run_mode except KeyError as exc: raise PluginInternalError( 'Need to set the order of the code execution (parallel or serial?)' ) from exc else: job_tmpl.codes_run_mode = CodeRunMode.SERIAL ######################################################################## custom_sched_commands = self.node.get_option( 'custom_scheduler_commands') if custom_sched_commands: job_tmpl.custom_scheduler_commands = custom_sched_commands job_tmpl.import_sys_environment = self.node.get_option( 'import_sys_environment') job_tmpl.job_environment = self.node.get_option( 'environment_variables') queue_name = self.node.get_option('queue_name') account = self.node.get_option('account') qos = self.node.get_option('qos') if queue_name is not None: job_tmpl.queue_name = queue_name if account is not None: job_tmpl.account = account if qos is not None: job_tmpl.qos = qos priority = self.node.get_option('priority') if priority is not None: job_tmpl.priority = priority max_memory_kb = self.node.get_option('max_memory_kb') if max_memory_kb is not None: job_tmpl.max_memory_kb = max_memory_kb max_wallclock_seconds = self.node.get_option('max_wallclock_seconds') if max_wallclock_seconds is not None: job_tmpl.max_wallclock_seconds = max_wallclock_seconds max_memory_kb = self.node.get_option('max_memory_kb') if max_memory_kb is not None: job_tmpl.max_memory_kb = max_memory_kb submit_script_filename = self.node.get_option('submit_script_filename') script_content = scheduler.get_submit_script(job_tmpl) folder.create_file_from_filelike(io.StringIO(script_content), submit_script_filename, 'w', encoding='utf8') subfolder = folder.get_subfolder('.aiida', create=True) subfolder.create_file_from_filelike(io.StringIO(json.dumps(job_tmpl)), 'job_tmpl.json', 'w', encoding='utf8') subfolder.create_file_from_filelike(io.StringIO(json.dumps(calc_info)), 'calcinfo.json', 'w', encoding='utf8') if calc_info.local_copy_list is None: calc_info.local_copy_list = [] if calc_info.remote_copy_list is None: calc_info.remote_copy_list = [] # Some validation this_pk = self.node.pk if self.node.pk is not None else '[UNSTORED]' local_copy_list = calc_info.local_copy_list try: validate_list_of_string_tuples(local_copy_list, tuple_length=3) except ValidationError as exception: raise PluginInternalError( f'[presubmission of calc {this_pk}] local_copy_list format problem: {exception}' ) from exception remote_copy_list = calc_info.remote_copy_list try: validate_list_of_string_tuples(remote_copy_list, tuple_length=3) except ValidationError as exception: raise PluginInternalError( f'[presubmission of calc {this_pk}] remote_copy_list format problem: {exception}' ) from exception for (remote_computer_uuid, _, dest_rel_path) in remote_copy_list: try: Computer.objects.get(uuid=remote_computer_uuid) # pylint: disable=unused-variable except exceptions.NotExistent as exception: raise PluginInternalError( '[presubmission of calc {}] ' 'The remote copy requires a computer with UUID={}' 'but no such computer was found in the ' 'database'.format(this_pk, remote_computer_uuid)) from exception if os.path.isabs(dest_rel_path): raise PluginInternalError( '[presubmission of calc {}] ' 'The destination path of the remote copy ' 'is absolute! ({})'.format(this_pk, dest_rel_path)) return calc_info
def dryrun_test(cls, inputs, castep_exe='castep.serial', verbose=True): """ Do a dryrun test in a folder with prepared builder or inputs """ if isinstance(inputs, ProcessBuilder): res = cls.submit_test(inputs) else: res = cls.submit_test(cls, **inputs) folder = Folder(res[1]) dry_run_node = res[0] seedname = dry_run_node.get_option('seedname') def _print(inp): if verbose: print(inp) # Do a dryrun try: output = check_output([castep_exe, "-v"], universal_newlines=True) except OSError: _print("CASTEP executable '{}' is not found".format(castep_exe)) return None # Now start dryrun _print("Running with {}".format( check_output(["which", castep_exe], universal_newlines=True))) _print(output) _print("Starting dryrun...") call([castep_exe, "--dryrun", seedname], cwd=folder.abspath) # Check if any *err files contents = folder.get_content_list() for fname in contents: if fnmatch(fname, "*.err"): with folder.open(fname) as fhandle: _print("Error found in {}:\fname".format(fname)) _print(fhandle.read()) raise InputValidationError("Error found during dryrun") # Gather information from the dryrun file dryrun_results = {} out_file = seedname + '.castep' with folder.open(out_file) as fhandle: for line in fhandle: mth = re.match(r"\s*k-Points For SCF Sampling:\s+(\d+)\s*", line) if mth: dryrun_results["num_kpoints"] = int(mth.group(1)) _print("Number of k-points: {}".format(mth.group(1))) mth = None continue mth = re.match( r"\| Approx\. total storage required" r" per process\s+([0-9.]+)\sMB\s+([0-9.]+)", line) if mth: dryrun_results["memory_MB"] = (float(mth.group(1))) dryrun_results["disk_MB"] = (float(mth.group(2))) _print("RAM: {} MB, DISK: {} MB".format( mth.group(1), mth.group(2))) mth = None continue return folder, dryrun_results