class MetadynInput(Input): def __init__(self): super(MetadynInput, self).__init__() self.iconst = GenericParameters(input_file_name=None, table_name="iconst", val_only=True, comment_char="!") self.penaltypot = GenericParameters(input_file_name=None, table_name="penaltypot", val_only=True, comment_char="!") self._constraint_dict = dict() self._complex_constraints = dict() def write(self, structure, modified_elements, directory=None): """ Writes all the input files to a specified directory Args: structure (atomistics.structure.atoms.Atoms instance): Structure to be written directory (str): The working directory for the VASP run """ # Writing the constraints, increments, and penalty potentials super(MetadynInput, self).write(structure, modified_elements, directory) self.iconst.write_file(file_name="ICONST", cwd=directory) self.penaltypot.write_file(file_name="PENALTYPOT", cwd=directory) def to_hdf(self, hdf): super(MetadynInput, self).to_hdf(hdf) with hdf.open("input") as hdf5_input: self.iconst.to_hdf(hdf5_input) self.penaltypot.to_hdf(hdf5_input) hdf5_input["constraint_dict"] = self._constraint_dict hdf5_input["complex_constraint_dict"] = self._complex_constraints def from_hdf(self, hdf): super(MetadynInput, self).from_hdf(hdf) with hdf.open("input") as hdf5_input: self.iconst.from_hdf(hdf5_input) self.penaltypot.from_hdf(hdf5_input) if "constraint_dict" in hdf5_input.list_nodes(): self._constraint_dict = hdf5_input["constraint_dict"] if "complex_constraint_dict" in hdf5_input.list_nodes(): self._complex_constraints = hdf5_input[ "complex_constraint_dict"]
class TemplateJob(GenericJob): def __init__(self, project, job_name): super().__init__(project, job_name) self.input = GenericParameters(table_name="input") def to_hdf(self, hdf=None, group_name=None): super().to_hdf( hdf=hdf, group_name=group_name ) with self.project_hdf5.open("input") as h5in: self.input.to_hdf(h5in) def from_hdf(self, hdf=None, group_name=None): super().from_hdf( hdf=hdf, group_name=group_name ) with self.project_hdf5.open("input") as h5in: self.input.from_hdf(h5in)
class SerialMasterBase(GenericMaster): """ The serial master class is a metajob consisting of a dynamic list of jobs which are executed in serial mode. The job is derived from the GenericMaster. Args: project (ProjectHDFio): ProjectHDFio instance which points to the HDF5 file the job is stored in job_name (str): name of the job, which has to be unique within the project Attributes: .. attribute:: job_name name of the job, which has to be unique within the project .. attribute:: status execution status of the job, can be one of the following [initialized, appended, created, submitted, running, aborted, collect, suspended, refresh, busy, finished] .. attribute:: job_id unique id to identify the job in the pyiron database .. attribute:: parent_id job id of the predecessor job - the job which was executed before the current one in the current job series .. attribute:: master_id job id of the master job - a meta job which groups a series of jobs, which are executed either in parallel or in serial. .. attribute:: child_ids list of child job ids - only meta jobs have child jobs - jobs which list the meta job as their master .. attribute:: project Project instance the jobs is located in .. attribute:: project_hdf5 ProjectHDFio instance which points to the HDF5 file the job is stored in .. attribute:: job_info_str short string to describe the job by it is job_name and job ID - mainly used for logging .. attribute:: working_directory working directory of the job is executed in - outside the HDF5 file .. attribute:: path path to the job as a combination of absolute file system path and path within the HDF5 file. .. attribute:: version Version of the hamiltonian, which is also the version of the executable unless a custom executable is used. .. attribute:: executable Executable used to run the job - usually the path to an external executable. .. attribute:: library_activated For job types which offer a Python library pyiron can use the python library instead of an external executable. .. attribute:: server Server object to handle the execution environment for the job. .. attribute:: queue_id the ID returned from the queuing system - it is most likely not the same as the job ID. .. attribute:: logger logger object to monitor the external execution and internal pyiron warnings. .. attribute:: restart_file_list list of files which are used to restart the calculation from these files. .. attribute:: job_type Job type object with all the available job types: ['ExampleJob', 'SerialMaster', 'ParallelMaster', 'ScriptJob', 'ListMaster'] .. attribute:: child_names Dictionary matching the child ID to the child job name. .. attribute:: start_job The first job of the series. .. attribute:: input The input of the start job - the first job of the series. """ def __init__(self, project, job_name): self._input = GenericParameters("parameters") # e.g. convergence goal super(SerialMasterBase, self).__init__(project, job_name=job_name) self.__name__ = "SerialMaster" self.__version__ = "0.3" self._output = GenericOutput() self._max_iterations = 100 self._start_job = None self._run_fast = False self._logger.debug("run_fast: {}".format(self._run_fast)) self._convergence_goal = None self._convergence_goal_qwargs = {} self._convergence_goal_str = None @property def start_job(self): """ Get the first job of the series. Returns: GenericJob: start job """ if self._start_job: return self._start_job elif len(self) > 0: self._start_job = self[-1] return self._start_job else: return None @start_job.setter def start_job(self, job): """ Set the first job of the series - that is the same like appending the job. Args: job (GenericJob): start job """ self.append(job) @property def ref_job(self): return self.start_job @ref_job.setter def ref_job(self, job): self.append(job) @property def input(self): """ Get the input of the start job - the first job of the series. Returns: GenericParameters: input of the start job """ if self.start_job: return self._start_job.input else: return None @input.setter def input(self, value): """ Set the input of the start job - the first job of the series. Args: value (GenericParameters): input of the start job """ if self.start_job: self._start_job.input = value else: raise ValueError( "Input can only be set after a start job has been assinged." ) def set_input_to_read_only(self): """ This function enforces read-only mode for the input classes, but it has to be implement in the individual classes. """ self._input.read_only = True def get_initial_child_name(self): """ Get name of the initial child. Returns: str: name of the initial child """ return self.project.db.get_item_by_id(self.child_ids[0])["job"] def create_next(self, job_name=None): """ Create the next job in the series by duplicating the previous job. Args: job_name (str): name of the new job - optional - default='job_<index>' Returns: GenericJob: next job """ if len(self) == 0: raise ValueError("No job available in job list, please append a job first.") if len(self._job_name_lst) > len(self.child_ids): return self.pop(-1) ham_old = self.project.load(self.child_ids[-1], convert_to_object=True) if ham_old.status.aborted: ham_old.status.created = True return ham_old elif not ham_old.status.finished: return None if job_name is None: job_name = "_".join( ham_old.job_name.split("_")[:-1] + [str(len(self.child_ids))] ) new_job = ham_old.restart(job_name=job_name) new_job.server.cores = self.server.cores return new_job def collect_output(self): """ Collect the output files of the individual jobs and set the output of the last job to be the output of the SerialMaster - so the SerialMaster contains the same output as its last child. """ ham_lst = [self.project_hdf5.inspect(child_id) for child_id in self.child_ids] if ( "output" in ham_lst[0].list_groups() and "generic" in ham_lst[0]["output"].list_groups() ): nodes = ham_lst[0]["output/generic"].list_nodes() with self.project_hdf5.open("output/generic") as hh: for node in nodes: hh[node] = np.concatenate( [ham["output/generic/{}".format(node)] for ham in ham_lst], axis=0, ) def collect_logfiles(self): """ The collect logfiles function is required by the GenericJob class, therefore we use an empty template here. """ pass def copy(self): """ Copy the GenericJob object which links to the job and its HDF5 file Returns: GenericJob: New GenericJob object pointing to the same job """ new_job = super(SerialMasterBase, self).copy() new_job.start_job = self.start_job return new_job def from_hdf(self, hdf=None, group_name=None): """ Restore the SerialMaster from an HDF5 file Args: hdf (ProjectHDFio): HDF5 group object - optional group_name (str): HDF5 subgroup name - optional """ super(SerialMasterBase, self).from_hdf(hdf=hdf, group_name=group_name) with self.project_hdf5.open("input") as hdf5_input: self._input.from_hdf(hdf5_input) convergence_goal_str = hdf5_input["convergence_goal"] if convergence_goal_str == "None": self._convergence_goal = None else: self._convergence_goal_str = convergence_goal_str self._convergence_goal = get_function_from_string(convergence_goal_str) self._convergence_goal_qwargs = hdf5_input["convergence_goal_qwargs"] def get_from_childs(self, path): """ Extract the output from all child jobs and appending it to a list Args: path (str): path inside the HDF5 files of the individual jobs like 'output/generic/volume' Returns: list: list of output from the child jobs """ var_lst = [] for child_id in self.child_ids: ham = self.project.load(child_id, convert_to_object=False) var = ham.__getitem__(path) var_lst.append(var) return np.array(var_lst) def iter_jobs(self, convert_to_object=True): """ Iterate over the jobs within the SerialMaster Args: convert_to_object (bool): load the full GenericJob object (default) or just the HDF5 / JobCore object Returns: yield: Yield of GenericJob or JobCore """ for job_id in self.child_ids: yield self.project.load(job_id, convert_to_object=convert_to_object) def run_if_interactive(self): pass def _get_job_template(self): self._logger.info("run serial master {}".format(self.job_info_str)) job = self.pop(-1) job._master_id = self.job_id if self.server.new_hdf: job._hdf5 = self.project_hdf5.create_hdf( path=self.project.open(self.job_name + "_hdf5").path, job_name=job.job_name, ) else: job._hdf5 = self.project_hdf5.open(job.job_name) self._logger.info("SerialMaster: run job {}".format(job.job_name)) return job @staticmethod def _run_child_job(job): job.run() def _run_if_master_queue(self, job): job.run() if job.python_execution_process: job.python_execution_process.communicate() self.run_if_refresh() def _run_if_master_non_modal_child_non_modal(self, job): job.run() if self.master_id: del self def _run_if_master_modal_child_modal(self, job): job.run() self.run_if_refresh() def _run_if_master_modal_child_non_modal(self, job): job.run() while not job.status.finished and not job.status.aborted: job.refresh_job_status() time.sleep(5) self.run_if_refresh() def run_static(self, **qwargs): self.status.running = True if len(self) > len(self.child_ids): job = self._get_job_template() self.status.suspended = True if self.server.run_mode.queue: self._run_if_master_queue(job) elif self.server.run_mode.non_modal and job.server.run_mode.non_modal: self._run_if_master_non_modal_child_non_modal(job) elif self.server.run_mode.modal and job.server.run_mode.modal: self._run_if_master_modal_child_modal(job) elif self.server.run_mode.modal and job.server.run_mode.non_modal: self._run_if_master_modal_child_non_modal(job) else: raise TypeError() else: self.status.collect = True self.run() def set_goal(self, convergence_goal, **qwargs): """ Set a convergence goal for the SerialMaster - this is necessary to stop the series. Args: convergence_goal (Function): the convergence goal can be any Python function, but if external packages are used like numpy they have to be imported within the function. **qwargs: arguments of the convergence goal function. """ self._convergence_goal = convergence_goal self._convergence_goal_qwargs = qwargs self._convergence_goal_str = inspect.getsource(convergence_goal) if self.project_hdf5.file_exists: self.to_hdf() def show(self): """ list all jobs in the SerialMaster Returns: list: list of jobs ['job', <index>, <GenericJob>] """ return [["job", str(i), str(job)] for i, job in enumerate(self)] def to_hdf(self, hdf=None, group_name=None): """ Store the SerialMaster in an HDF5 file Args: hdf (ProjectHDFio): HDF5 group object - optional group_name (str): HDF5 subgroup name - optional """ super(SerialMasterBase, self).to_hdf(hdf=hdf, group_name=group_name) with self.project_hdf5.open("input") as hdf5_input: self._input.to_hdf(hdf5_input) if self._convergence_goal is not None: try: hdf5_input["convergence_goal"] = inspect.getsource( self._convergence_goal ) except IOError: hdf5_input["convergence_goal"] = self._convergence_goal_str hdf5_input["convergence_goal_qwargs"] = self._convergence_goal_qwargs else: hdf5_input["convergence_goal"] = "None" def write_input(self): """ Write the input files - for the SerialMaster this only contains convergence goal. """ self._input.write_file(file_name="input.inp", cwd=self.working_directory) def __len__(self): """ Length of the SerialMaster equal the number of childs appended. Returns: int: length of the SerialMaster """ return len(self.child_ids + self._job_name_lst) def __getitem__(self, item): """ Get/ read data from the GenericMaster Args: item (str, slice): path to the data or key of the data object Returns: dict, list, float, int: data or data object """ child_id_lst = self.child_ids child_name_lst = [ self.project.db.get_item_by_id(child_id)["job"] for child_id in self.child_ids ] if isinstance(item, int): total_lst = child_name_lst + self._job_name_lst item = total_lst[item] return self._get_item_when_str( item=item, child_id_lst=child_id_lst, child_name_lst=child_name_lst ) def run_if_refresh(self): """ Internal helper function the run if refresh function is called when the job status is 'refresh'. If the job was suspended previously, the job is going to be started again, to be continued. """ conv_goal_exists = bool(self._convergence_goal) self._logger.info("Does the convergence goal exit: {}".format(conv_goal_exists)) if not conv_goal_exists: self.status.collect = True self.run() else: subjobs_statuses = set( [ self.project.db.get_item_by_id(child_id)["status"] for child_id in self.child_ids ] ) if len(subjobs_statuses) == 0 or subjobs_statuses == {"finished"}: ham = self._convergence_goal(self, **self._convergence_goal_qwargs) if ham is not True: self.append(ham) self.to_hdf() self.run_static() else: self.status.collect = True self.run()
class InteractiveWrapper(GenericMaster): def __init__(self, project, job_name): super(InteractiveWrapper, self).__init__(project, job_name) self._ref_job = None self.input = GenericParameters("parameters") @property def structure(self): if self.ref_job: return self._ref_job.structure else: return None @structure.setter def structure(self, basis): if self.ref_job: self._ref_job.structure = basis else: raise ValueError('A structure can only be set after a start job has been assinged.') @property def ref_job(self): """ Get the reference job template from which all jobs within the ParallelMaster are generated. Returns: GenericJob: reference job """ if self._ref_job is not None: return self._ref_job try: if isinstance(self[0], GenericJob): self._ref_job = self[0] return self._ref_job else: return None except IndexError: return None @ref_job.setter def ref_job(self, ref_job): """ Set the reference job template from which all jobs within the ParallelMaster are generated. Args: ref_job (GenericJob): reference job """ if not ref_job.server.run_mode.interactive: warnings.warn("Run mode of the reference job not set to interactive") self.append(ref_job) def set_input_to_read_only(self): """ This function enforces read-only mode for the input classes, but it has to be implement in the individual classes. """ self.input.read_only = True def validate_ready_to_run(self): """ Validate that the calculation is ready to be executed. By default no generic checks are performed, but one could check that the input information is complete or validate the consistency of the input at this point. """ self.ref_job.validate_ready_to_run() def check_setup(self): """ Checks whether certain parameters (such as plane wave cutoff radius in DFT) are changed from the pyiron standard values to allow for a physically meaningful results. This function is called manually or only when the job is submitted to the queueing system. """ try: self.ref_job.check_setup() except AttributeError: pass def ref_job_initialize(self): """ """ if len(self._job_name_lst) > 0: self._ref_job = self.pop(-1) if self._job_id is not None and self._ref_job._master_id is None: self._ref_job.master_id = self.job_id self._ref_job.server.cores = self.server.cores def get_final_structure(self): """ Returns: """ warnings.warn("get_final_structure() is deprecated - please use get_structure() instead.", DeprecationWarning) if self.ref_job: return self._ref_job.get_structure(iteration_step=-1) else: return None def to_hdf(self, hdf=None, group_name=None): """ Store the InteractiveWrapper in an HDF5 file Args: hdf (ProjectHDFio): HDF5 group object - optional group_name (str): HDF5 subgroup name - optional """ if self._ref_job is not None and self._ref_job.job_id is None: self.append(self._ref_job) super(InteractiveWrapper, self).to_hdf(hdf=hdf, group_name=group_name) with self.project_hdf5.open('input') as hdf5_input: self.input.to_hdf(hdf5_input) def from_hdf(self, hdf=None, group_name=None): """ Restore the InteractiveWrapper from an HDF5 file Args: hdf (ProjectHDFio): HDF5 group object - optional group_name (str): HDF5 subgroup name - optional """ super(InteractiveWrapper, self).from_hdf(hdf=hdf, group_name=group_name) with self.project_hdf5.open('input') as hdf5_input: self.input.from_hdf(hdf5_input) def collect_output(self): pass def collect_logfiles(self): pass def db_entry(self): """ Generate the initial database entry Returns: (dict): db_dict """ db_dict = super(InteractiveWrapper, self).db_entry() if self.structure: parent_structure = self.structure.get_parent_basis() db_dict["ChemicalFormula"] = parent_structure.get_chemical_formula() return db_dict def _db_entry_update_run_time(self): """ Returns: """ job_id = self.get_job_id() db_dict = {} start_time = self.project.db.get_item_by_id(job_id)["timestart"] db_dict["timestop"] = datetime.now() db_dict["totalcputime"] = (db_dict["timestop"] - start_time).seconds self.project.db.item_update(db_dict, job_id) def _finish_job(self): """ Returns: """ self.status.finished = True self._db_entry_update_run_time() self._logger.info("{}, status: {}, monte carlo master".format(self.job_info_str, self.status)) self._calculate_successor() self.send_to_database() self.update_master() def __getitem__(self, item): """ Get/ read data from the GenericMaster Args: item (str, slice): path to the data or key of the data object Returns: dict, list, float, int: data or data object """ child_id_lst = self.child_ids child_name_lst = [self.project.db.get_item_by_id(child_id)["job"] for child_id in self.child_ids] if isinstance(item, int): total_lst = child_name_lst + self._job_name_lst item = total_lst[item] return self._get_item_when_str(item=item, child_id_lst=child_id_lst, child_name_lst=child_name_lst)
class ScriptJob(GenericJob): """ The ScriptJob class allows to submit Python scripts and Jupyter notebooks to the pyiron job management system. Args: project (ProjectHDFio): ProjectHDFio instance which points to the HDF5 file the job is stored in job_name (str): name of the job, which has to be unique within the project Attributes: attribute: job_name name of the job, which has to be unique within the project .. attribute:: status execution status of the job, can be one of the following [initialized, appended, created, submitted, running, aborted, collect, suspended, refresh, busy, finished] .. attribute:: job_id unique id to identify the job in the pyiron database .. attribute:: parent_id job id of the predecessor job - the job which was executed before the current one in the current job series .. attribute:: master_id job id of the master job - a meta job which groups a series of jobs, which are executed either in parallel or in serial. .. attribute:: child_ids list of child job ids - only meta jobs have child jobs - jobs which list the meta job as their master .. attribute:: project Project instance the jobs is located in .. attribute:: project_hdf5 ProjectHDFio instance which points to the HDF5 file the job is stored in .. attribute:: job_info_str short string to describe the job by it is job_name and job ID - mainly used for logging .. attribute:: working_directory working directory of the job is executed in - outside the HDF5 file .. attribute:: path path to the job as a combination of absolute file system path and path within the HDF5 file. .. attribute:: version Version of the hamiltonian, which is also the version of the executable unless a custom executable is used. .. attribute:: executable Executable used to run the job - usually the path to an external executable. .. attribute:: library_activated For job types which offer a Python library pyiron can use the python library instead of an external executable. .. attribute:: server Server object to handle the execution environment for the job. .. attribute:: queue_id the ID returned from the queuing system - it is most likely not the same as the job ID. .. attribute:: logger logger object to monitor the external execution and internal pyiron warnings. .. attribute:: restart_file_list list of files which are used to restart the calculation from these files. .. attribute:: job_type Job type object with all the available job types: ['ExampleJob', 'SerialMaster', 'ParallelMaster', 'ScriptJob', 'ListMaster'] .. attribute:: script_path the absolute path to the python script """ def __init__(self, project, job_name): super(ScriptJob, self).__init__(project, job_name) self.__version__ = "0.1" self.__name__ = "Script" self._script_path = None self.input = GenericParameters(table_name="custom_dict") @property def script_path(self): """ Python script path Returns: str: absolute path to the python script """ return self._script_path @script_path.setter def script_path(self, path): """ Python script path Args: path (str): relative or absolute path to the python script or a corresponding notebook """ if isinstance(path, str): self._script_path = self._get_abs_path(path) self.executable = self._executable_command( working_directory=self.working_directory, script_path=self._script_path) else: raise TypeError("path should be a string, but ", path, " is a ", type(path), " instead.") def set_input_to_read_only(self): """ This function enforces read-only mode for the input classes, but it has to be implement in the individual classes. """ self.input.read_only = True def to_hdf(self, hdf=None, group_name=None): """ Store the ScriptJob in an HDF5 file Args: hdf (ProjectHDFio): HDF5 group object - optional group_name (str): HDF5 subgroup name - optional """ super(ScriptJob, self).to_hdf(hdf=hdf, group_name=group_name) with self.project_hdf5.open("input") as hdf5_input: hdf5_input["path"] = self._script_path self.input.to_hdf(hdf5_input) def from_hdf(self, hdf=None, group_name=None): """ Restore the ScriptJob from an HDF5 file Args: hdf (ProjectHDFio): HDF5 group object - optional group_name (str): HDF5 subgroup name - optional """ super(ScriptJob, self).from_hdf(hdf=hdf, group_name=group_name) with self.project_hdf5.open("input") as hdf5_input: try: self.script_path = hdf5_input["path"] self.input.from_hdf(hdf5_input) except TypeError: pass def write_input(self): """ Copy the script to the working directory - only python scripts and jupyter notebooks are supported """ file_name = os.path.basename(self._script_path) shutil.copyfile(src=self._script_path, dst=os.path.join(self.working_directory, file_name)) def collect_output(self): """ Collect output function updates the master ID entries for all the child jobs created by this script job, if the child job is already assigned to an master job nothing happens - master IDs are not overwritten. """ for job in self.project.iter_jobs(recursive=False, convert_to_object=False): pr_job = self.project.open( os.path.relpath(job.working_directory, self.project.path)) for subjob_id in pr_job.get_job_ids(recursive=False): if pr_job.db.get_item_by_id(subjob_id)["masterid"] is None: pr_job.db.item_update({"masterid": str(job.job_id)}, subjob_id) def run_if_lib(self): """ Compatibility function - but library run mode is not available """ raise NotImplementedError( "Library run mode is not implemented for script jobs.") def collect_logfiles(self): """ Compatibility function - but no log files are being collected """ pass @staticmethod def _executable_command(working_directory, script_path): """ internal function to generate the executable command to either use jupyter or python Args: working_directory (str): working directory of the current job script_path (str): path to the script which should be executed in the working directory Returns: str: executable command """ file_name = os.path.basename(script_path) path = os.path.join(working_directory, file_name) if file_name[-6:] == ".ipynb": return ( "jupyter nbconvert --ExecutePreprocessor.timeout=9999999 --to notebook --execute " + path) elif file_name[-3:] == ".py": return "python " + path else: raise ValueError("Filename not recognized: ", path) def _executable_activate_mpi(self): """ Internal helper function to switch the executable to MPI mode """ pass @staticmethod def _get_abs_path(path): """ internal function to convert absolute or relative paths to absolute paths, using os.path.normpath, os.path.abspath and os.path.curdir Args: path (str): relative or absolute path Returns: str: absolute path """ return os.path.normpath( os.path.join(os.path.abspath(os.path.curdir), path))
class SxHarmPotTst(AtomisticGenericJob): def __init__(self, project, job_name): super(SxHarmPotTst, self).__init__(project, job_name) self.__version__ = "0.1" self.__name__ = "SxHarmPotTst" self.input = GenericParameters(table_name="interaction") self.input["interaction_radius"] = 4.0 self.input["maximum_noise"] = 0.26 self._positions_lst = [] self._forces_lst = [] self._md_job_id = None self._md_job = None @property def md_job(self): if self._md_job is None and self._md_job_id is not None: self._md_job = self.project.load(self._md_job_id) return self._md_job @md_job.setter def md_job(self, job): if job.status == "finished": self._md_job_id = job.job_id self._md_job = job self._positions_lst = job["output/generic/positions"] self._forces_lst = job["output/generic/forces"] else: raise ValueError("Job not finished!") def set_input_to_read_only(self): """ This function enforces read-only mode for the input classes, but it has to be implement in the individual classes. """ super(SxHarmPotTst, self).set_input_to_read_only() self.input.read_only = True def write_harmpot(self, cwd, file_name="harmpot.sx"): harm_pot_str = ( "format harmpot;\n\n" + "valenceCharge=0;\n" + "harmonicPotential {\n" + ' //include "refSym.sx";\n' + ' //include "equivalence.sx";\n' + " maxDist=" + str(float(self.input["maximum_noise"]) / BOHR_TO_ANGSTROM) + ";\n" + ' include "shells.sx";\n' + "}\n" + 'include "structure_wrapper.sx";') if cwd is not None: file_name = os.path.join(cwd, file_name) with open(file_name, "w") as f: f.write(harm_pot_str) def write_structure(self, cwd, file_name="structure_wrapper.sx"): structure_file_name = "structure.sx" iw = InputWriter() iw.structure = self._md_job.structure iw.write_structure(file_name=structure_file_name, cwd=cwd) with open(os.path.join(cwd, file_name), "w") as f: f.writelines( ["structure { include <" + structure_file_name + ">; }"]) def validate_ready_to_run(self): if len(self._positions_lst) == 0 or len(self._forces_lst) == 0: raise ValueError() def write_input(self): self.write_structure(cwd=self.working_directory, file_name="structure_wrapper.sx") self.write_harmpot(cwd=self.working_directory, file_name="harmpot.sx") pos_force_mat = np.concatenate((self._positions_lst, self._forces_lst), axis=2) cont_pos_force_mat = pos_force_mat.reshape(-1, pos_force_mat.shape[-1]) np.savetxt(os.path.join(self.working_directory, "POSITIONs"), cont_pos_force_mat) lines = [ "#!/bin/bash\n", "sxstructparam -i structure_wrapper.sx -c " + str(float(self.input["interaction_radius"]) / BOHR_TO_ANGSTROM) + " --printReduced=shells.sx --log\n", "sxharmpottst --param=POSITIONs --vasp --printHesse HesseMatrix_sphinx -i harmpot.sx --log --svd\n", ] with open(os.path.join(self.working_directory, "sxharmpottst.sh"), "w") as f: f.writelines(lines) def get_hesse_matrix(self): if "output" in self.project_hdf5.list_groups(): return self.project_hdf5["output/hesse"] def collect_output(self): with self.project_hdf5.open("output") as hdf_out: hdf_out["hesse"] = np.loadtxt( os.path.join(self.working_directory, "HesseMatrix_sphinx")) def to_hdf(self, hdf=None, group_name=None): """ Store the ExampleJob object in the HDF5 File Args: hdf (ProjectHDFio): HDF5 group object - optional group_name (str): HDF5 subgroup name - optional """ super(SxHarmPotTst, self).to_hdf(hdf=hdf, group_name=group_name) with self.project_hdf5.open("input") as hdf5_input: self.input.to_hdf(hdf5_input) if len(self._positions_lst) != 0: hdf5_input["positions"] = self._positions_lst if len(self._forces_lst) != 0: hdf5_input["forces"] = self._forces_lst if self._md_job_id is not None: hdf5_input["md_job_id"] = self._md_job_id def from_hdf(self, hdf=None, group_name=None): """ Restore the ExampleJob object in the HDF5 File Args: hdf (ProjectHDFio): HDF5 group object - optional group_name (str): HDF5 subgroup name - optional """ super(SxHarmPotTst, self).from_hdf(hdf=hdf, group_name=group_name) with self.project_hdf5.open("input") as hdf5_input: self.input.from_hdf(hdf5_input) if "positions" in hdf5_input.list_nodes(): self._positions_lst = hdf5_input["positions"] if "forces" in hdf5_input.list_nodes(): self._forces_lst = hdf5_input["forces"] if "md_job_id" in hdf5_input.list_nodes(): self._md_job_id = hdf5_input["md_job_id"]
class SxUniqDispl(AtomisticGenericJob): def __init__(self, project, job_name): super(SxUniqDispl, self).__init__(project, job_name) self.__version__ = "0.1" self.__name__ = "SxUniqDispl" self.input = GenericParameters(table_name="displacement") self.input["displacement"] = 0.01 self.structure_lst = [] self._id_pyi_to_spx = [] self._id_spx_to_pyi = [] @property def id_spx_to_pyi(self): if self.structure is None: return None if len(self._id_spx_to_pyi) == 0: self._initialize_order() return self._id_spx_to_pyi @property def id_pyi_to_spx(self): if self.structure is None: return None if len(self._id_pyi_to_spx) == 0: self._initialize_order() return self._id_pyi_to_spx def _initialize_order(self): for elm_species in self.structure.get_species_objects(): self._id_pyi_to_spx.append( np.arange(len( self.structure))[self.structure.get_chemical_symbols() == elm_species.Abbreviation]) self._id_pyi_to_spx = np.array( [ooo for oo in self._id_pyi_to_spx for ooo in oo]) self._id_spx_to_pyi = np.array([0] * len(self._id_pyi_to_spx)) for i, p in enumerate(self._id_pyi_to_spx): self._id_spx_to_pyi[p] = i def set_input_to_read_only(self): """ This function enforces read-only mode for the input classes, but it has to be implement in the individual classes. """ super(SxUniqDispl, self).set_input_to_read_only() self.input.read_only = True def list_structures(self): if self.status.finished: return self.structure_lst else: return [] def write_structure(self, cwd, file_name="structure_wrapper.sx"): structure_file_name = "structure.sx" iw = InputWriter() iw.structure = self.structure iw.write_structure(file_name=structure_file_name, cwd=cwd) with open(os.path.join(cwd, file_name), "w") as f: f.writelines( ["structure { include <" + structure_file_name + ">; }"]) def extract_structure(self, working_directory): structure_lst = [self.structure] parser = Output(self) for f in os.listdir(working_directory): if "input-disp" in f: structure_template = self.structure.copy() parser.collect_relaxed_hist(file_name=f, cwd=working_directory) structure_template.cell = parser._parse_dict["cell"][0] structure_template.positions = parser._parse_dict["positions"][ 0] structure_lst.append(structure_template) return structure_lst def write_input(self): self.write_structure(cwd=self.working_directory, file_name="structure_wrapper.sx") lines = [ "#!/bin/bash\n", "sxuniqdispl --log -d " + str(float(self.input["displacement"]) / BOHR_TO_ANGSTROM) + " -i structure_wrapper.sx\n", ] with open(os.path.join(self.working_directory, "sxuniqdispl.sh"), "w") as f: f.writelines(lines) def collect_output(self): self.structure_lst = self.extract_structure( working_directory=self.working_directory) with self.project_hdf5.open("output") as hdf_out: for ind, struct in enumerate(self.structure_lst): struct.to_hdf(hdf=hdf_out, group_name="structure_" + str(ind)) def to_hdf(self, hdf=None, group_name=None): """ Store the ExampleJob object in the HDF5 File Args: hdf (ProjectHDFio): HDF5 group object - optional group_name (str): HDF5 subgroup name - optional """ super(SxUniqDispl, self).to_hdf(hdf=hdf, group_name=group_name) with self.project_hdf5.open("input") as hdf5_input: self.input.to_hdf(hdf5_input) def from_hdf(self, hdf=None, group_name=None): """ Restore the ExampleJob object in the HDF5 File Args: hdf (ProjectHDFio): HDF5 group object - optional group_name (str): HDF5 subgroup name - optional """ super(SxUniqDispl, self).from_hdf(hdf=hdf, group_name=group_name) with self.project_hdf5.open("input") as hdf5_input: self.input.from_hdf(hdf5_input) if "output" in self.project_hdf5.list_groups(): with self.project_hdf5.open("output") as hdf5_output: self.structure_lst = [ Atoms().from_hdf(hdf5_output, group_name) for group_name in hdf5_output.list_groups() ]
class ParallelMaster(GenericMaster): """ MasterJob that handles the creation and analysis of several parallel jobs (including master and continuation jobs), Examples are Murnaghan or Phonon calculations Args: project (ProjectHDFio): ProjectHDFio instance which points to the HDF5 file the job is stored in job_name (str): name of the job, which has to be unique within the project Attributes: .. attribute:: job_name name of the job, which has to be unique within the project .. attribute:: status execution status of the job, can be one of the following [initialized, appended, created, submitted, running, aborted, collect, suspended, refresh, busy, finished] .. attribute:: job_id unique id to identify the job in the pyiron database .. attribute:: parent_id job id of the predecessor job - the job which was executed before the current one in the current job series .. attribute:: master_id job id of the master job - a meta job which groups a series of jobs, which are executed either in parallel or in serial. .. attribute:: child_ids list of child job ids - only meta jobs have child jobs - jobs which list the meta job as their master .. attribute:: project Project instance the jobs is located in .. attribute:: project_hdf5 ProjectHDFio instance which points to the HDF5 file the job is stored in .. attribute:: job_info_str short string to describe the job by it is job_name and job ID - mainly used for logging .. attribute:: working_directory working directory of the job is executed in - outside the HDF5 file .. attribute:: path path to the job as a combination of absolute file system path and path within the HDF5 file. .. attribute:: version Version of the hamiltonian, which is also the version of the executable unless a custom executable is used. .. attribute:: executable Executable used to run the job - usually the path to an external executable. .. attribute:: library_activated For job types which offer a Python library pyiron can use the python library instead of an external executable. .. attribute:: server Server object to handle the execution environment for the job. .. attribute:: queue_id the ID returned from the queuing system - it is most likely not the same as the job ID. .. attribute:: logger logger object to monitor the external execution and internal pyiron warnings. .. attribute:: restart_file_list list of files which are used to restart the calculation from these files. .. attribute:: job_type Job type object with all the available job types: ['ExampleJob', 'SerialMaster', 'ParallelMaster', 'ScriptJob', 'ListMaster'] .. attribute:: child_names Dictionary matching the child ID to the child job name. .. attribute:: ref_job Reference job template from which all jobs within the ParallelMaster are generated. .. attribute:: number_jobs_total Total number of jobs """ def __init__(self, project, job_name): self.input = GenericParameters("parameters") super(ParallelMaster, self).__init__(project, job_name=job_name) self.__name__ = "ParallelMaster" self.__version__ = "0.3" self._ref_job = None self._output = GenericOutput() self._job_generator = None self.submission_status = SubmissionStatus(db=project.db, job_id=self.job_id) self.refresh_submission_status() @property def ref_job(self): """ Get the reference job template from which all jobs within the ParallelMaster are generated. Returns: GenericJob: reference job """ if self._ref_job: return self._ref_job try: ref_job = self[0] if isinstance(ref_job, GenericJob): self._ref_job = ref_job self._ref_job._job_id = None self._ref_job._status = JobStatus(db=self.project.db) return self._ref_job else: return None except IndexError: return None @ref_job.setter def ref_job(self, ref_job): """ Set the reference job template from which all jobs within the ParallelMaster are generated. Args: ref_job (GenericJob): reference job """ self.append(ref_job) @property def number_jobs_total(self): """ Get number of total jobs Returns: int: number of total jobs """ return self.submission_status.total_jobs @number_jobs_total.setter def number_jobs_total(self, num_jobs): """ Set number of total jobs (optional: default = None) Args: num_jobs (int): number of submitted jobs """ self.submission_status.total_jobs = num_jobs def set_input_to_read_only(self): """ This function enforces read-only mode for the input classes, but it has to be implement in the individual classes. """ self.input.read_only = True def reset_job_id(self, job_id=None): """ Reset the job id sets the job_id to None as well as all connected modules like JobStatus and SubmissionStatus. """ super(ParallelMaster, self).reset_job_id(job_id=job_id) if job_id is not None: self.submission_status = SubmissionStatus(db=self.project.db, job_id=job_id) else: self.submission_status = SubmissionStatus(db=self.project.db, job_id=self.job_id) def to_hdf(self, hdf=None, group_name=None): """ Store the ParallelMaster in an HDF5 file Args: hdf (ProjectHDFio): HDF5 group object - optional group_name (str): HDF5 subgroup name - optional """ super(ParallelMaster, self).to_hdf(hdf=hdf, group_name=group_name) with self.project_hdf5.open("input") as hdf5_input: self.input.to_hdf(hdf5_input) def from_hdf(self, hdf=None, group_name=None): """ Restore the ParallelMaster from an HDF5 file Args: hdf (ProjectHDFio): HDF5 group object - optional group_name (str): HDF5 subgroup name - optional """ super(ParallelMaster, self).from_hdf(hdf=hdf, group_name=group_name) with self.project_hdf5.open("input") as hdf5_input: self.input.from_hdf(hdf5_input) def write_input(self): """ Write the input files - this contains the GenericInput of the ParallelMaster as well as reseting the submission status. """ self.submission_status.submitted_jobs = 0 self.input.write_file(file_name="input.inp", cwd=self.working_directory) def collect_output(self): """ Collect the output files of the external executable and store the information in the HDF5 file. This method has to be implemented in the individual meta jobs derived from the ParallelMaster. """ raise NotImplementedError("Implement in derived class") def collect_logfiles(self): """ Collect the log files of the external executable and store the information in the HDF5 file. This method is currently not implemented for the ParallelMaster. """ pass def output_to_pandas(self, sort_by=None, h5_path="output"): """ Convert output of all child jobs to a pandas Dataframe object. Args: sort_by (str): sort the output using pandas.DataFrame.sort_values(by=sort_by) h5_path (str): select child output to include - default='output' Returns: pandas.Dataframe: output as dataframe """ # TODO: The output to pandas function should no longer be required with self.project_hdf5.open(h5_path) as hdf: for key in hdf.list_nodes(): self._output[key] = hdf[key] df = pandas.DataFrame(self._output) if sort_by is not None: df = df.sort_values(by=sort_by) return df # TODO: make it more general and move it then into genericJob def show_hdf(self): """ Display the output of the child jobs in a human readable print out """ try: display = getattr(importlib.import_module("IPython"), "display") except ModuleNotFoundError: print("show_hdf() requires IPython to be installed.") else: for nn in self.project_hdf5.list_groups(): with self.project_hdf5.open(nn) as hdf_dir: display.display(nn) if nn.strip() == "output": display.display(self.output_to_pandas(h5_path=nn)) continue for n in hdf_dir.list_groups(): display.display("-->" + n) try: display.display(hdf_dir.get_pandas(n)) except Exception as e: print(e) print("Not a pandas object") def save(self): """ Save the object, by writing the content to the HDF5 file and storing an entry in the database. Returns: (int): Job ID stored in the database """ job_id = super(ParallelMaster, self).save() self.refresh_submission_status() return job_id def refresh_submission_status(self): """ Refresh the submission status - if a job ID job_id is set then the submission status is loaded from the database. """ if self.job_id: self.submission_status = SubmissionStatus(db=self.project.db, job_id=self.job_id) self.submission_status.refresh() def interactive_ref_job_initialize(self): """ To execute the reference job in interactive mode it is necessary to initialize it. """ if len(self._job_name_lst) > 0: self._ref_job = self.pop(-1) self._ref_job.job_name = self.job_name + "_" + self._ref_job.job_name if self._job_id is not None and self._ref_job._master_id is None: self._ref_job.master_id = self.job_id def copy(self): """ Copy the GenericJob object which links to the job and its HDF5 file Returns: GenericJob: New GenericJob object pointing to the same job """ new_job = super(ParallelMaster, self).copy() new_job.ref_job = self.ref_job return new_job def copy_to(self, project=None, new_job_name=None, input_only=False, new_database_entry=True): """ Copy the content of the job including the HDF5 file to a new location Args: project (ProjectHDFio): project to copy the job to new_job_name (str): to duplicate the job within the same porject it is necessary to modify the job name - optional input_only (bool): [True/False] to copy only the input - default False new_database_entry (bool): [True/False] to create a new database entry - default True Returns: GenericJob: GenericJob object pointing to the new location. """ new_generic_job = super(ParallelMaster, self).copy_to( project=project, new_job_name=new_job_name, input_only=input_only, new_database_entry=new_database_entry, ) new_generic_job.submission_status = SubmissionStatus( db=new_generic_job._hdf5.project.db, job_id=new_generic_job.job_id) return new_generic_job def is_finished(self): """ Check if the ParallelMaster job is finished - by checking the job status and the submission status. Returns: bool: [True/False] """ if self.status.finished: return True if len(self.child_ids) < len(self._job_generator): return False return set([ self.project.db.get_item_by_id(child_id)["status"] for child_id in self.child_ids ]) < {"finished", "busy", "refresh", "aborted", "not_converged"} def iter_jobs(self, convert_to_object=True): """ Iterate over the jobs within the ListMaster Args: convert_to_object (bool): load the full GenericJob object (default) or just the HDF5 / JobCore object Returns: yield: Yield of GenericJob or JobCore """ for job_id in self._get_jobs_sorted(): yield self.project.load(job_id, convert_to_object=convert_to_object) def _get_jobs_sorted(self): job_names = self.child_names.values() return [ j for j in [ self._job_generator.job_name(p) for p in self._job_generator.parameter_list ] if j in job_names ] def __getitem__(self, item): """ Get/ read data from the HDF5 file Args: item (str, slice): path to the data or key of the data object Returns: dict, list, float, int: data or data object """ child_id_lst = self.child_ids child_name_lst = [ self.project.db.get_item_by_id(child_id)["job"] for child_id in self.child_ids ] if isinstance(item, int): total_lst = self._job_name_lst + child_name_lst item = total_lst[item] return self._get_item_when_str(item=item, child_id_lst=child_id_lst, child_name_lst=child_name_lst) def __len__(self): """ Length of the ListMaster equal the number of childs appended. Returns: int: length of the ListMaster """ return len(self.child_ids) def run_if_refresh(self): """ Internal helper function the run if refresh function is called when the job status is 'refresh'. If the job was suspended previously, the job is going to be started again, to be continued. """ self._logger.info("{}, status: {}, finished: {} parallel master " "refresh".format(self.job_info_str, self.status, self.is_finished())) if self.is_finished(): self.status.collect = True self.run() # self.run_if_collect() elif (self.server.run_mode.non_modal or self.server.run_mode.queue ) and not self.submission_status.finished: self.run_static() else: self.refresh_job_status() if self.status.refresh: self.status.suspended = True if self.status.busy: self.status.refresh = True self.run_if_refresh() def _run_if_collect(self): """ Internal helper function the run if collect function is called when the job status is 'collect'. It collects the simulation output using the standardized functions collect_output() and collect_logfiles(). Afterwards the status is set to 'finished'. """ self._logger.info("{}, status: {}, finished".format( self.job_info_str, self.status)) self.collect_output() job_id = self.get_job_id() db_dict = {} start_time = self.project.db.get_item_by_id(job_id)["timestart"] db_dict["timestop"] = datetime.now() db_dict["totalcputime"] = (db_dict["timestop"] - start_time).seconds self.project.db.item_update(db_dict, job_id) self.status.finished = True self._hdf5["status"] = self.status.string self._logger.info("{}, status: {}, parallel master".format( self.job_info_str, self.status)) self.update_master() # self.send_to_database() def _validate_cores(self, job, cores_for_session): """ Check if enough cores are available to start the next child job. Args: job (GenericJob): child job to be started cores_for_session (list): list of currently active cores - list of integers Returns: bool: [True/False] """ return (self.get_child_cores() + job.server.cores + sum(cores_for_session) > self.server.cores) def _next_job_series(self, job): """ Generate a list of child jobs to be executed in the next iteration. Args: job (GenericJob): child job to be started Returns: list: list of GenericJob objects """ job_to_be_run_lst, cores_for_session = [], [] while job is not None: self._logger.debug("create job: %s %s", job.job_info_str, job.master_id) if not job.status.finished: self.submission_status.submit_next() job_to_be_run_lst.append(job) cores_for_session.append(job.server.cores) self._logger.info("{}: finished job {}".format( self.job_name, job.job_name)) job = next(self._job_generator, None) if job is not None and self._validate_cores( job, cores_for_session): job = None return job_to_be_run_lst def _run_if_child_queue(self, job): """ run function which is executed when the child jobs are submitted to the queue. In this case all child jobs are submitted at the same time without considering the number of cores specified for the Parallelmaster. Args: job (GenericJob): child job to be started """ while job is not None: self._logger.debug("create job: %s %s", job.job_info_str, job.master_id) if not job.status.finished: job.run() self._logger.info("{}: submitted job {}".format( self.job_name, job.job_name)) job = next(self._job_generator, None) self.submission_status.submitted_jobs = self.submission_status.total_jobs self.status.suspended = True if self.is_finished(): self.status.collect = True self.run() def _run_if_master_non_modal_child_non_modal(self, job): """ run function which is executed when the Parallelmaster as well as its childs are running in non modal mode. Args: job (GenericJob): child job to be started """ job_to_be_run_lst = self._next_job_series(job) if self.project.db.get_item_by_id(self.job_id)["status"] != "busy": self.status.suspended = True for job in job_to_be_run_lst: job.run() if self.master_id: del self else: self.run_static() def _run_if_master_modal_child_modal(self, job): """ run function which is executed when the Parallelmaster as well as its childs are running in modal mode. Args: job (GenericJob): child job to be started """ while job is not None: self._logger.debug("create job: %s %s", job.job_info_str, job.master_id) if not job.status.finished: self.submission_status.submit_next() job.run() self._logger.info("{}: finished job {}".format( self.job_name, job.job_name)) job = next(self._job_generator, None) if self.is_finished(): self.status.collect = True self.run() def _run_if_master_modal_child_non_modal(self, job): """ run function which is executed when the Parallelmaster is running in modal mode and its childs are running in non modal mode. Args: job (GenericJob): child job to be started """ pool = multiprocessing.Pool(self.server.cores) job_lst = [] for i, p in enumerate(self._job_generator.parameter_list): if hasattr(self._job_generator, "job_name"): job = self.create_child_job( self._job_generator.job_name(parameter=p)) else: job = self.create_child_job(self.ref_job.job_name + "_" + str(i)) job = self._job_generator.modify_job(job=job, parameter=p) job.server.run_mode.modal = True job.save() job.project_hdf5.create_working_directory() job.write_input() if s.database_is_disabled or (s.queue_adapter is not None and s.queue_adapter.remote_flag): job_lst.append( (job.project.path, None, job.project_hdf5.file_name + job.project_hdf5.h5_path, False, False)) else: job_lst.append( (job.project.path, job.job_id, None, False, False)) pool.map(job_wrap_function, job_lst) if s.database_is_disabled: self.project.db.update() self.status.collect = True self.run() # self.run_if_collect() def run_static(self): """ The run_static function is executed within the GenericJob class and depending on the run_mode of the Parallelmaster and its child jobs a more specific run function is selected. """ self._logger.info("{} run parallel master (modal)".format( self.job_info_str)) self.status.running = True self.submission_status.total_jobs = len(self._job_generator) self.submission_status.submitted_jobs = 0 if self.job_id and not self.is_finished(): self._logger.debug("{} child project {}".format( self.job_name, self.project.__str__())) job = next(self._job_generator, None) if self.server.run_mode.queue: self._run_if_master_modal_child_non_modal(job=job) elif job.server.run_mode.queue: self._run_if_child_queue(job) elif self.server.run_mode.non_modal and job.server.run_mode.non_modal: self._run_if_master_non_modal_child_non_modal(job) elif (self.server.run_mode.modal and job.server.run_mode.modal ) or (self.server.run_mode.interactive and job.server.run_mode.interactive): self._run_if_master_modal_child_modal(job) elif self.server.run_mode.modal and job.server.run_mode.non_modal: self._run_if_master_modal_child_non_modal(job) else: raise TypeError() else: self.status.collect = True self.run() def run_if_interactive(self): if not (self.ref_job.server.run_mode.interactive or self.ref_job.server.run_mode.interactive_non_modal): raise ValueError( "The child job has to be run_mode interactive or interactive_non_modal." ) if isinstance(self.ref_job, GenericMaster): self.run_static() elif self.server.cores == 1: self.interactive_ref_job_initialize() for parameter in self._job_generator.parameter_list: self._job_generator.modify_job(job=self.ref_job, parameter=parameter) self.ref_job.run() self.ref_job.interactive_close() else: if self.server.cores > len(self._job_generator.parameter_list): number_of_jobs = len(self._job_generator.parameter_list) else: number_of_jobs = self.server.cores max_tasks_per_job = (int( len(self._job_generator.parameter_list) // number_of_jobs) + 1) parameters_sub_lst = [ self._job_generator.parameter_list[i:i + max_tasks_per_job] for i in range(0, len(self._job_generator.parameter_list), max_tasks_per_job) ] list_of_sub_jobs = [ self.create_child_job("job_" + str(i)) for i in range(number_of_jobs) ] primary_job = list_of_sub_jobs[0] if not primary_job.server.run_mode.interactive_non_modal: raise ValueError( "The child job has to be run_mode interactive_non_modal.") if primary_job.server.cores != 1: raise ValueError("The child job can only use a single core.") for iteration in range(len(parameters_sub_lst[0])): for job_ind, job in enumerate(list_of_sub_jobs): if iteration < len(parameters_sub_lst[job_ind]): self._job_generator.modify_job( job=job, parameter=parameters_sub_lst[job_ind][iteration]) job.run() for job_ind, job in enumerate(list_of_sub_jobs): if iteration < len(parameters_sub_lst[job_ind]): job.interactive_fetch() for job in list_of_sub_jobs: job.interactive_close() self.interactive_ref_job_initialize() self.ref_job.run() for key in primary_job.interactive_cache.keys(): output_sum = [] for job in list_of_sub_jobs: output = job["output/interactive/" + key] if isinstance(output, np.ndarray): output = output.tolist() if isinstance(output, list): output_sum += output else: raise TypeError( "output should be list or numpy.ndarray but it is ", type(output), ) self.ref_job.interactive_cache[key] = output_sum interactive_cache_backup = self.ref_job.interactive_cache.copy() self.ref_job.interactive_flush(path="generic", include_last_step=True) self.ref_job.interactive_cache = interactive_cache_backup self.ref_job.interactive_close() self.status.collect = True self.run() def create_child_job(self, job_name): """ Internal helper function to create the next child job from the reference job template - usually this is called as part of the create_jobs() function. Args: job_name (str): name of the next job Returns: GenericJob: next job """ if not self.server.new_hdf: project = self.project where_dict = { "job": str(job_name), "project": str(self.project_hdf5.project_path), "subjob": str(self.project_hdf5.h5_path + "/" + job_name), } response = self.project.db.get_items_dict(where_dict, return_all_columns=False) if len(response) > 0: job_id = response[-1]["id"] else: job_id = None else: project = self.project.open(self.job_name + "_hdf5") job_id = project.get_job_id(job_specifier=job_name) if job_id is not None: ham = project.load(job_id) self._logger.debug("job {} found, status: {}".format( job_name, ham.status)) if ham.server.run_mode.queue: self.project.refresh_job_status_based_on_job_id(job_id, que_mode=True) else: self.project.refresh_job_status_based_on_job_id(job_id, que_mode=False) if ham.status.aborted: ham.status.created = True self._logger.debug("job - status: {}".format(ham.status)) return ham job = self.ref_job.copy() job = self._load_all_child_jobs(job_to_load=job) if self.server.new_hdf: job.project_hdf5 = self.project_hdf5.create_hdf( path=self.project.open(self.job_name + "_hdf5").path, job_name=job_name) else: job.project_hdf5 = self.project_hdf5.open(job_name) if isinstance(job, GenericMaster): for sub_job in job._job_object_dict.values(): self._child_job_update_hdf(parent_job=job, child_job=sub_job) self._logger.debug("create_job:: {} {} {} {}".format( self.project_hdf5.path, self._name, self.project_hdf5.h5_path, str(self.get_job_id()), )) job._name = job_name job.master_id = self.get_job_id() job.status.initialized = True if self.server.run_mode.non_modal and job.server.run_mode.modal: job.server.run_mode.non_modal = True elif self.server.run_mode.queue: job.server.run_mode.thread = True self._logger.info("{}: run job {}".format(self.job_name, job.job_name)) return job def _db_server_entry(self): """ connect all the info regarding the server into a single word that can be used e.g. as entry in a database Returns: (str): server info as single word """ db_entry = super(ParallelMaster, self)._db_server_entry() if self.submission_status.total_jobs: return (db_entry + "#" + str(self.submission_status.submitted_jobs) + "/" + str(self.submission_status.total_jobs)) else: return db_entry + "#" + str(self.submission_status.submitted_jobs)
class InteractiveWrapper(GenericMaster): def __init__(self, project, job_name): super(InteractiveWrapper, self).__init__(project, job_name) self._ref_job = None self.input = GenericParameters("parameters") @property def structure(self): if self.ref_job: return self._ref_job.structure else: return None @structure.setter def structure(self, basis): if self.ref_job: self._ref_job.structure = basis else: raise ValueError( 'A structure can only be set after a start job has been assinged.' ) @property def ref_job(self): """ Get the reference job template from which all jobs within the ParallelMaster are generated. Returns: GenericJob: reference job """ if self._ref_job is not None: return self._ref_job try: if isinstance(self[0], GenericJob): self._ref_job = self[0] return self._ref_job else: return None except IndexError: return None @ref_job.setter def ref_job(self, ref_job): """ Set the reference job template from which all jobs within the ParallelMaster are generated. Args: ref_job (GenericJob): reference job """ self.append(ref_job) def validate_ready_to_run(self): """ Validate that the calculation is ready to be executed. By default no generic checks are performed, but one could check that the input information is complete or validate the consistency of the input at this point. """ self.ref_job.validate_ready_to_run() def ref_job_initialize(self): """ """ if len(self._job_name_lst) > 0: self._ref_job = self.pop(-1) if self._job_id is not None and self._ref_job._master_id is None: self._ref_job.master_id = self.job_id def get_final_structure(self): """ Returns: """ warnings.warn( "get_final_structure() is deprecated - please use get_structure() instead.", DeprecationWarning) if self.ref_job: return self._ref_job.get_structure(iteration_step=-1) else: return None def to_hdf(self, hdf=None, group_name=None): """ Store the InteractiveWrapper in an HDF5 file Args: hdf (ProjectHDFio): HDF5 group object - optional group_name (str): HDF5 subgroup name - optional """ if self._ref_job is not None and self._ref_job.job_id is None: self.append(self._ref_job) super(InteractiveWrapper, self).to_hdf(hdf=hdf, group_name=group_name) with self.project_hdf5.open('input') as hdf5_input: self.input.to_hdf(hdf5_input) def from_hdf(self, hdf=None, group_name=None): """ Restore the InteractiveWrapper from an HDF5 file Args: hdf (ProjectHDFio): HDF5 group object - optional group_name (str): HDF5 subgroup name - optional """ super(InteractiveWrapper, self).from_hdf(hdf=hdf, group_name=group_name) with self.project_hdf5.open('input') as hdf5_input: self.input.from_hdf(hdf5_input) def collect_output(self): pass def collect_logfiles(self): pass def db_entry(self): """ Generate the initial database entry Returns: (dict): db_dict """ db_dict = super(InteractiveWrapper, self).db_entry() if self.structure: parent_structure = self.structure.get_parent_basis() db_dict["ChemicalFormula"] = parent_structure.get_chemical_formula( ) return db_dict def _db_entry_update_run_time(self): """ Returns: """ job_id = self.get_job_id() db_dict = {} start_time = self.project.db.get_item_by_id(job_id)["timestart"] db_dict["timestop"] = datetime.now() db_dict["totalcputime"] = (db_dict["timestop"] - start_time).seconds self.project.db.item_update(db_dict, job_id) def _finish_job(self): """ Returns: """ self.status.finished = True self._db_entry_update_run_time() self._logger.info("{}, status: {}, monte carlo master".format( self.job_info_str, self.status)) self._calculate_successor() self.send_to_database() self.update_master() def __getitem__(self, item): """ Get/ read data from the GenericMaster Args: item (str, slice): path to the data or key of the data object Returns: dict, list, float, int: data or data object """ child_id_lst = self.child_ids child_name_lst = [ self.project.db.get_item_by_id(child_id)["job"] for child_id in self.child_ids ] if isinstance(item, str): name_lst = item.split("/") if name_lst[0] in child_name_lst: child_id = child_id_lst[child_name_lst.index(name_lst[0])] if len(name_lst) > 1: return self.project.inspect(child_id)['/'.join( name_lst[1:])] else: return self.project.load(child_id, convert_to_object=True) if name_lst[0] in self._job_name_lst: child = getattr(self, name_lst[0]) if len(name_lst) == 1: return child else: return child['/'.join(name_lst[1:])] return super(GenericMaster, self).__getitem__(item) elif isinstance(item, int): total_lst = child_name_lst + self._job_name_lst job_name = total_lst[item] if job_name in child_name_lst: child_id = child_id_lst[child_name_lst.index(job_name)] return self.project.load(child_id, convert_to_object=True) else: return self._job_object_lst[item]