Python ImageDataset Examples

Programming Language: Python

Namespace/Package Name: deepchem.data

Class/Type: ImageDataset

Examples at hotexamples.com: 2

Python ImageDataset - 2 examples found. These are the top rated real world Python examples of deepchem.data.ImageDataset extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

ImageDataset(2)

Frequently Used Methods

ImageDataset (2)

Example #1

Show file

File: data_loader.py Project: heartcored98/deepchem

    def featurize(self,
                  input_files,
                  labels=None,
                  weights=None,
                  in_memory=False):
        """Featurizes image files.

        Parameters
        ----------
        input_files: list
          Each file in this list should either be of a supported image format
          (.png, .tif only for now) or of a compressed folder of image files
          (only .zip for now).
        in_memory: bool
          If true, return in-memory NumpyDataset. Else return ImageDataset.
        """
        if not isinstance(input_files, list):
            input_files = [input_files]

        image_files = []
        # Sometimes zip files contain directories within. Traverse directories
        while len(input_files) > 0:
            remainder = []
            for input_file in input_files:
                filename, extension = os.path.splitext(input_file)
                extension = extension.lower()
                # TODO(rbharath): Add support for more extensions
                if os.path.isdir(input_file):
                    dirfiles = [
                        os.path.join(input_file, subfile)
                        for subfile in os.listdir(input_file)
                    ]
                    remainder += dirfiles
                elif extension == ".zip":
                    zip_dir = tempfile.mkdtemp()
                    zip_ref = zipfile.ZipFile(input_file, 'r')
                    zip_ref.extractall(path=zip_dir)
                    zip_ref.close()
                    zip_files = [
                        os.path.join(zip_dir, name)
                        for name in zip_ref.namelist()
                    ]
                    for zip_file in zip_files:
                        _, extension = os.path.splitext(zip_file)
                        extension = extension.lower()
                        if extension in [".png", ".tif"]:
                            image_files.append(zip_file)
                elif extension in [".png", ".tif"]:
                    image_files.append(input_file)
                else:
                    raise ValueError("Unsupported file format")
            input_files = remainder

        if in_memory:
            return NumpyDataset(self.load_img(image_files),
                                y=labels,
                                w=weights,
                                ids=image_files)
        else:
            return ImageDataset(image_files,
                                y=labels,
                                w=weights,
                                ids=image_files)

Example #2

Show file

File: data_loader.py Project: yingyingjin/deepchem

    def create_dataset(self,
                       inputs: Union[OneOrMany[str], Tuple[Any]],
                       data_dir: Optional[str] = None,
                       shard_size: Optional[int] = 8192,
                       in_memory: bool = False) -> Dataset:
        """Creates and returns a `Dataset` object by featurizing provided image files and labels/weights.

    Parameters
    ----------
    inputs: `Union[OneOrMany[str], Tuple[Any]]`
      The inputs provided should be one of the following

        - filename
        - list of filenames
        - Tuple (list of filenames, labels)
        - Tuple (list of filenames, labels, weights)

      Each file in a given list of filenames should either be of a supported
      image format (.png, .tif only for now) or of a compressed folder of
      image files (only .zip for now). If `labels` or `weights` are provided,
      they must correspond to the sorted order of all filenames provided, with
      one label/weight per file.
    data_dir: str, optional (default None)
      Directory to store featurized dataset.
    shard_size: int, optional (default 8192)
      Shard size when loading data.
    in_memory: bool, optioanl (default False)
      If true, return in-memory NumpyDataset. Else return ImageDataset.

    Returns
    -------
    Dataset
      A `Dataset` object containing a featurized representation of data
      from `input_files`, `labels`, and `weights`.
    """
        labels, weights = None, None
        if isinstance(inputs, tuple):
            if len(inputs) == 1:
                input_files = inputs[0]
                if isinstance(inputs, str):
                    input_files = [inputs]
            elif len(inputs) == 2:
                input_files, labels = inputs
            elif len(inputs) == 3:
                input_files, labels, weights = inputs
            else:
                raise ValueError("Input must be a tuple of length 1, 2, or 3")
        else:
            input_files = inputs
        if isinstance(input_files, str):
            input_files = [input_files]

        image_files = []
        # Sometimes zip files contain directories within. Traverse directories
        while len(input_files) > 0:
            remainder = []
            for input_file in input_files:
                filename, extension = os.path.splitext(input_file)
                extension = extension.lower()
                # TODO(rbharath): Add support for more extensions
                if os.path.isdir(input_file):
                    dirfiles = [
                        os.path.join(input_file, subfile)
                        for subfile in os.listdir(input_file)
                    ]
                    remainder += dirfiles
                elif extension == ".zip":
                    zip_dir = tempfile.mkdtemp()
                    zip_ref = zipfile.ZipFile(input_file, 'r')
                    zip_ref.extractall(path=zip_dir)
                    zip_ref.close()
                    zip_files = [
                        os.path.join(zip_dir, name)
                        for name in zip_ref.namelist()
                    ]
                    for zip_file in zip_files:
                        _, extension = os.path.splitext(zip_file)
                        extension = extension.lower()
                        if extension in [".png", ".tif"]:
                            image_files.append(zip_file)
                elif extension in [".png", ".tif"]:
                    image_files.append(input_file)
                else:
                    raise ValueError("Unsupported file format")
            input_files = remainder

        # Sort image files
        image_files = sorted(image_files)

        if in_memory:
            if data_dir is None:
                return NumpyDataset(load_image_files(image_files),
                                    y=labels,
                                    w=weights,
                                    ids=image_files)
            else:
                dataset = DiskDataset.from_numpy(load_image_files(image_files),
                                                 y=labels,
                                                 w=weights,
                                                 ids=image_files,
                                                 tasks=self.tasks,
                                                 data_dir=data_dir)
                if shard_size is not None:
                    dataset.reshard(shard_size)
                return dataset
        else:
            return ImageDataset(image_files,
                                y=labels,
                                w=weights,
                                ids=image_files)