Esempio n. 1
0
    def __init__(self, data_dir, json_file, transform=None):
        """
        :param data_dir: the base directory where the data is located
        :type data_dir: str
        :param json_file: the name of the json file containing the data
        :type json_file: str
        :param transform: a transform object (can be the result of a composition of transforms)
        :type transform: callable

        .. code-block:: python

            from eisen.datasets import JsonDataset
            dset = JsonDataset(
                data_dir='/abs/path/to/data',
                json_file='/abs/path/to/file.json',
                transform=transform
            )

        <json>
        [
            {"name": "json_file", "type": "string", "value": ""}
        ]
        </json>
        """
        json_file = os.path.join(data_dir, json_file)

        self.json_dataset = read_json_from_file(json_file)

        self.transform = transform
Esempio n. 2
0
    def __init__(self, data_dir, json_file, phase, transform=None):
        """
        :param data_dir: the base directory where the data is located (dataset location after unzipping)
        :type data_dir: str
        :param json_file: the name of the json file containing for the MSD dataset
        :type json_file: str
        :param phase: training or test phase as per MSD dataset convention (look at MSD json file)
        :type phase: string
        :param transform: a transform object (can be the result of a composition of transforms)
        :type transform: callable

        .. code-block:: python

            from eisen.datasets import MSDDataset

            dataset = MSDDataset(
                data_dir='/abs/path/to/data',
                json_file='/path/to/dataset.json',
                phase='training',
                transform=transform,
            )

        <json>
        [
            {"name": "json_file", "type": "string", "value": ""},
            {"name": "phase", "type": "string", "value": ["training", "test"]}
        ]
        </json>
        """
        json_file = os.path.join(data_dir, json_file)

        msd_dataset = read_json_from_file(json_file)

        self.json_dataset = msd_dataset[phase]

        msd_dataset.pop("training", None)
        msd_dataset.pop("test", None)

        if phase == "test":
            # test images are stored as list of filenames instead of dictionaries. Need to convert that.
            dset = []
            for elem in self.json_dataset:
                dset.append({"image": elem})

            self.json_dataset = dset

        self.attributes = msd_dataset

        self.transform = transform
    def __init__(self,
                 data_dir,
                 json_file,
                 aws_id=None,
                 aws_secret=None,
                 transform=None):
        self.s3_client = boto3.client('s3',
                                      aws_access_key_id=aws_id,
                                      aws_secret_access_key=aws_secret)

        self.tempdir = tempfile.mkdtemp()

        json_file = get_file_from_s3(self.s3_client,
                                     os.path.join(data_dir, json_file),
                                     self.tempdir)

        self.json_dataset = read_json_from_file(json_file)

        self.transform = transform
    def __init__(self,
                 data_dir,
                 json_file,
                 phase,
                 aws_id=None,
                 aws_secret=None,
                 transform=None):

        self.s3_client = boto3.client('s3',
                                      aws_access_key_id=aws_id,
                                      aws_secret_access_key=aws_secret)

        self.tempdir = tempfile.mkdtemp()

        json_file = get_file_from_s3(self.s3_client,
                                     os.path.join(data_dir, json_file),
                                     self.tempdir)

        msd_dataset = read_json_from_file(json_file)

        self.json_dataset = msd_dataset[phase]

        msd_dataset.pop('training', None)
        msd_dataset.pop('test', None)

        if phase == 'test':
            # test images are stored as list of filenames instead of dictionaries. Need to convert that.
            dset = []
            for elem in self.json_dataset:
                dset.append({'image': elem})

            self.json_dataset = dset

        self.attributes = msd_dataset

        self.transform = transform