Esempio n. 1
0
    def test_from_hdf_readonly(self):
        self.pl.to_hdf(hdf=self.hdf, group_name="read_only_from")
        pl = InputList()
        pl.from_hdf(self.hdf, group_name="read_only_from")
        self.assertEqual(pl.read_only, self.hdf["read_only_from/read_only"],
                         "read-only parameter not correctly read from HDF")

        self.hdf["read_only_from/read_only"] = True
        pl.from_hdf(self.hdf, group_name="read_only_from")
        self.assertEqual(pl.read_only, self.hdf["read_only_from/read_only"],
                         "read-only parameter not correctly read from HDF")
Esempio n. 2
0
class ImageJob(GenericJob):
    """
    A job type for storing and processing image data.

    TODO: Consider allowing the `data` field of each image to be saved to hdf5...

    Attributes:
        images (DistributingList): A list of `Image` objects.
    """
    def __init__(self, project, job_name):
        super(ImageJob, self).__init__(project, job_name)
        self.__name__ = "ImageJob"
        self._images = DistributingList()
        self.input = InputList(table_name="input")
        self.output = InputList(table_name="output")

    @property
    def images(self):
        return self._images

    @images.setter
    def images(self, val):
        if isinstance(val, DistributingList):
            self._images = val
        elif isinstance(val, (tuple, list, np.ndarray)):
            if not all([isinstance(obj, Image) for obj in val]):
                raise ValueError(
                    "Only `Image`-type objects can be set to the `images` attribute."
                )
            self._images = DistributingList(val)
        else:
            raise ValueError(
                "Images was expecting a list-like object, but got {}".format(
                    type(val)))

    @staticmethod
    def _get_factors(n):
        i = int(n**0.5 + 0.5)
        while n % i != 0:
            i -= 1
        return i, int(n / i)

    def plot(self,
             mask=None,
             subplots_kwargs=None,
             imshow_kwargs=None,
             hide_axes=True):
        """
        Make a simple matplotlib `imshow` plot for each of the images on a grid.

        Args:
            mask (list/numpy.ndarray): An integer index mask for selecting a subset of the images to plot.
            subplots_kwargs (dict): Keyword arguments to pass to the figure generation. (Default is None.)
            imshow_kwargs (dict): Keyword arguments to pass to the `imshow` plotting command. (Default is None.)
            hide_axes (bool): Whether to hide axis ticks and labels. (Default is True.)

        Returns:
            (matplotlib.figure.Figure): The figure the plots are in.
            (list): The axes the plot is on.
        """

        if mask is not None:
            images = self.images[mask]
        else:
            images = self.images

        subplots_kwargs = subplots_kwargs or {}
        imshow_kwargs = imshow_kwargs or {}
        nrows, ncols = self._get_factors(len(images))
        fig, axes = plt.subplots(nrows=nrows, ncols=ncols, **subplots_kwargs)
        axes = np.atleast_2d(axes)
        for n, img in enumerate(images):
            i = int(np.floor(n / ncols))
            j = n % ncols
            ax = axes[i, j]
            img.plot(ax=ax, imshow_kwargs=imshow_kwargs, hide_axes=hide_axes)

        fig.tight_layout()
        return fig, axes

    def add_image(self,
                  source,
                  metadata=None,
                  as_gray=False,
                  relative_path=True):
        """
        Add an image to the job.

        Args:
            source (str/numpy.ndarray): The filepath to the data, or the raw array of data itself.
            metadata (Metadata): The metadata associated with the source. (Default is None.)
            as_gray (bool): Whether to interpret the new data as grayscale. (Default is False.)
            relative_path (bool): Whether the path provided is relative. (Default is True, automatically converts to an
                absolute path before setting the `source` value of the image.)
        """

        if not isfile(source) and not isinstance(source, np.ndarray):
            raise ValueError(
                "Could not find a file at {}, nor is source an array.".format(
                    source))
        if isinstance(source, str) and relative_path:
            source = abspath(source)
        self.images.append(
            Image(source=source, metadata=metadata, as_gray=as_gray))

    def add_images(self, sources, metadata=None, as_gray=False):
        """
        Add multiple images to the job.

        Args:
            sources (str/list/tuple/numpy.ndarray): When a string, uses the `glob` module to look for matching files.
                When list-like, iteratively uses each element as a new source.
            metadata (Metadata): The metadata associated with all these sources. (Default is None.)
            as_gray (bool): Whether to interpret all this data as grayscale. (Default is False.)
            relative_path (bool): Whether the path provided is relative. (Default is True, automatically converts to an
                absolute path before setting the `source` value of the image.)
        """

        if isinstance(sources, str):
            for match in iglob(sources):
                self.add_image(match, metadata=metadata, as_gray=as_gray)
        elif isinstance(sources, (list, tuple, np.ndarray)):
            for source in sources:
                self.add_image(source, metadata=metadata, as_gray=as_gray)

    def run(self, run_again=False, repair=False, debug=False, run_mode=None):
        super(ImageJob, self).run(run_again=run_again,
                                  repair=repair,
                                  debug=debug,
                                  run_mode=run_mode)

    def run_static(self):
        """This is just a toy example right now."""
        self.status.running = True
        if hasattr(self.input,
                   'filter') and self.input.filter == 'brightness_filter':
            fractions = []
            cutoffs = []
            masks = []
            for img in self.images:
                frac, cut, mask = brightness_filter(img)
                fractions.append(frac)
                cutoffs.append(cut)
                masks.append(mask)
            self.output.fractions = np.array(fractions)
            self.output.cutoffs = np.array(cutoffs)
            self.output.masks = np.array(masks)

        else:
            self.logger.warning("Didn't run anything. Check input.")
        self.status.collect = True
        self.run()

    def write_input(self):
        """Must define abstract method"""
        pass

    def collect_output(self):
        """Must define abstract method"""
        self.to_hdf()

    def to_hdf(self, hdf=None, group_name=None):
        """
        Store the job in an HDF5 file.

        Args:
            hdf (ProjectHDFio): HDF5 group object - optional
            group_name (str): HDF5 subgroup name - optional
        """
        super(ImageJob, self).to_hdf(hdf=hdf, group_name=group_name)
        self.input.to_hdf(hdf=self._hdf5, group_name=None)
        self.output.to_hdf(hdf=self._hdf5, group_name=None)
        with self._hdf5.open("images") as hdf5_server:
            for n, image in enumerate(self.images):
                image.to_hdf(hdf=hdf5_server, group_name="img{}".format(n))
        self._hdf5["n_images"] = n + 1

    def from_hdf(self, hdf=None, group_name=None):
        """
        Load the Protocol from an HDF5 file.

        Args:
            hdf (ProjectHDFio): HDF5 group object - optional
            group_name (str): HDF5 subgroup name - optional
        """
        super(ImageJob, self).from_hdf(hdf=hdf, group_name=group_name)
        self.input.from_hdf(hdf=self._hdf5, group_name=None)
        self.output.from_hdf(hdf=self._hdf5, group_name=None)
        with self._hdf5.open("images") as hdf5_server:
            for n in np.arange(self._hdf5["n_images"], dtype=int):
                img = Image(source=None)
                img.from_hdf(hdf=hdf5_server, group_name="img{}".format(n))
                self.images.append(img)
Esempio n. 3
0
 def test_from_hdf(self):
     self.pl.to_hdf(hdf=self.hdf)
     l = InputList(table_name="input")
     l.from_hdf(hdf=self.hdf)
     self.assertEqual(self.pl, l)
Esempio n. 4
0
 def test_from_hdf_group(self):
     self.pl.to_hdf(hdf=self.hdf, group_name="test_group")
     l = InputList(table_name="input")
     l.from_hdf(hdf=self.hdf, group_name="test_group")
     self.assertEqual(self.pl, l)
Esempio n. 5
0
class ScriptJob(GenericJob):
    """
    The ScriptJob class allows to submit Python scripts and Jupyter notebooks to the pyiron job management system.

    Args:
        project (ProjectHDFio): ProjectHDFio instance which points to the HDF5 file the job is stored in
        job_name (str): name of the job, which has to be unique within the project

    Attributes:

        attribute: job_name

            name of the job, which has to be unique within the project

        .. attribute:: status

            execution status of the job, can be one of the following [initialized, appended, created, submitted, running,
                                                                      aborted, collect, suspended, refresh, busy, finished]

        .. attribute:: job_id

            unique id to identify the job in the pyiron database

        .. attribute:: parent_id

            job id of the predecessor job - the job which was executed before the current one in the current job series

        .. attribute:: master_id

            job id of the master job - a meta job which groups a series of jobs, which are executed either in parallel or in
            serial.

        .. attribute:: child_ids

            list of child job ids - only meta jobs have child jobs - jobs which list the meta job as their master

        .. attribute:: project

            Project instance the jobs is located in

        .. attribute:: project_hdf5

            ProjectHDFio instance which points to the HDF5 file the job is stored in

        .. attribute:: job_info_str

            short string to describe the job by it is job_name and job ID - mainly used for logging

        .. attribute:: working_directory

            working directory of the job is executed in - outside the HDF5 file

        .. attribute:: path

            path to the job as a combination of absolute file system path and path within the HDF5 file.

        .. attribute:: version

            Version of the hamiltonian, which is also the version of the executable unless a custom executable is used.

        .. attribute:: executable

            Executable used to run the job - usually the path to an external executable.

        .. attribute:: library_activated

            For job types which offer a Python library pyiron can use the python library instead of an external executable.

        .. attribute:: server

            Server object to handle the execution environment for the job.

        .. attribute:: queue_id

            the ID returned from the queuing system - it is most likely not the same as the job ID.

        .. attribute:: logger

            logger object to monitor the external execution and internal pyiron warnings.

        .. attribute:: restart_file_list

            list of files which are used to restart the calculation from these files.

        .. attribute:: job_type

            Job type object with all the available job types: ['ExampleJob', 'SerialMaster', 'ParallelMaster', 'ScriptJob',
                                                               'ListMaster']

        .. attribute:: script_path

            the absolute path to the python script
    """

    def __init__(self, project, job_name):
        super(ScriptJob, self).__init__(project, job_name)
        self.__version__ = "0.1"
        self.__hdf_version__ = "0.2.0"
        self.__name__ = "Script"
        self._script_path = None
        self.input = InputList(table_name="custom_dict")

    @property
    def script_path(self):
        """
        Python script path

        Returns:
            str: absolute path to the python script
        """
        return self._script_path

    @script_path.setter
    def script_path(self, path):
        """
        Python script path

        Args:
            path (str): relative or absolute path to the python script or a corresponding notebook
        """
        if isinstance(path, str):
            self._script_path = self._get_abs_path(path)
            self.executable = self._executable_command(
                working_directory=self.working_directory, script_path=self._script_path
            )
        else:
            raise TypeError(
                "path should be a string, but ", path, " is a ", type(path), " instead."
            )

    def set_input_to_read_only(self):
        """
        This function enforces read-only mode for the input classes, but it has to be implement in the individual
        classes.
        """
        self.input.read_only = True

    def to_hdf(self, hdf=None, group_name=None):
        """
        Store the ScriptJob in an HDF5 file

        Args:
            hdf (ProjectHDFio): HDF5 group object - optional
            group_name (str): HDF5 subgroup name - optional
        """
        super(ScriptJob, self).to_hdf(hdf=hdf, group_name=group_name)
        with self.project_hdf5.open("input") as hdf5_input:
            hdf5_input["path"] = self._script_path
            self.input.to_hdf(hdf5_input)

    def from_hdf(self, hdf=None, group_name=None):
        """
        Restore the ScriptJob from an HDF5 file

        Args:
            hdf (ProjectHDFio): HDF5 group object - optional
            group_name (str): HDF5 subgroup name - optional
        """
        super(ScriptJob, self).from_hdf(hdf=hdf, group_name=group_name)
        if "HDF_VERSION" in self.project_hdf5.list_nodes():
            version = self.project_hdf5["HDF_VERSION"]
        else:
            version = "0.1.0"
        if version == "0.1.0":
            with self.project_hdf5.open("input") as hdf5_input:
                try:
                    self.script_path = hdf5_input["path"]
                    gp = GenericParameters(table_name="custom_dict")
                    gp.from_hdf(hdf5_input)
                    for k in gp.keys():
                        self.input[k] = gp[k]
                except TypeError:
                    pass
        elif version == "0.2.0":
            with self.project_hdf5.open("input") as hdf5_input:
                try:
                    self.script_path = hdf5_input["path"]
                except TypeError:
                    pass
                self.input.from_hdf(hdf5_input)
        else:
            raise ValueError("Cannot handle hdf version: {}".format(version))


    def write_input(self):
        """
        Copy the script to the working directory - only python scripts and jupyter notebooks are supported
        """
        if self._script_path is not None:
            file_name = os.path.basename(self._script_path)
            shutil.copyfile(
                src=self._script_path, dst=os.path.join(self.working_directory, file_name)
            )

    def collect_output(self):
        """
        Collect output function updates the master ID entries for all the child jobs created by this script job, if the
        child job is already assigned to an master job nothing happens - master IDs are not overwritten.
        """
        for job in self.project.iter_jobs(recursive=False, convert_to_object=False):
            pr_job = self.project.open(
                os.path.relpath(job.working_directory, self.project.path)
            )
            for subjob_id in pr_job.get_job_ids(recursive=False):
                if pr_job.db.get_item_by_id(subjob_id)["masterid"] is None:
                    pr_job.db.item_update({"masterid": str(job.job_id)}, subjob_id)

    def run_if_lib(self):
        """
        Compatibility function - but library run mode is not available
        """
        raise NotImplementedError(
            "Library run mode is not implemented for script jobs."
        )

    def collect_logfiles(self):
        """
        Compatibility function - but no log files are being collected
        """
        pass

    @staticmethod
    def _executable_command(working_directory, script_path):
        """
        internal function to generate the executable command to either use jupyter or python

        Args:
            working_directory (str): working directory of the current job
            script_path (str): path to the script which should be executed in the working directory

        Returns:
            str: executable command
        """
        file_name = os.path.basename(script_path)
        path = os.path.join(working_directory, file_name)
        if file_name[-6:] == ".ipynb":
            return (
                "jupyter nbconvert --ExecutePreprocessor.timeout=9999999 --to notebook --execute "
                + path
            )
        elif file_name[-3:] == ".py":
            return "python " + path
        else:
            raise ValueError("Filename not recognized: ", path)

    def _executable_activate_mpi(self):
        """
        Internal helper function to switch the executable to MPI mode
        """
        pass

    @staticmethod
    def _get_abs_path(path):
        """
        internal function to convert absolute or relative paths to absolute paths, using os.path.normpath,
        os.path.abspath and os.path.curdir

        Args:
           path (str): relative or absolute path

        Returns:
            str: absolute path
        """
        return os.path.normpath(os.path.join(os.path.abspath(os.path.curdir), path))