コード例 #1
0
 def __init__(self, root, mode, seq_len):
     """
     Args:
         root: directory path
         mode: either ['train', 'valid']
         len: sequence length
     """
     assert mode in ['train', 'valid'], 'Invalid dataset mode'
     Dataset.__init__(self)
     path = {
         'train': 'seq_mnist_train.pickle',
         'valid': 'seq_mnist_validation.pickle'
     }[mode]
     # Path to the pickle file
     path = os.path.join(root, path)
     # Load dataset
     with open(path, 'rb') as f:
         dataset = pickle.load(f, encoding='latin1')
         
     # (T, N, H, W)
     self.imgs = dataset['imgs'][:seq_len]
     # (N, 2), numbers in the digits
     self.labels = dataset['labels']
     # (T, N, 2, 4), the last dimension being [x, y, w, h]
     self.coords = dataset['coords']
     # (1, N, 3), bool
     self.nums = dataset['nums']
     # (1, N, 3) -> (1, N)
     self.nums = np.sum(self.nums, axis=-1)
コード例 #2
0
    def __init__(self,
                 data_paths,
                 input_transform=None,
                 target_transform=None,
                 cache=False,
                 data_root='/'):
        """
    data_paths: list of lists, [[str_path_to_input, str_path_to_label], [...]]
    """
        Dataset.__init__(self)

        # Allows easier path concatenation
        if not isinstance(data_root, Path):
            data_root = Path(data_root)

        self.data_root = data_root
        self.data_paths = sorted(data_paths)
        self.input_transform = input_transform
        self.target_transform = target_transform

        # dictionary of input
        self.data_loader_dict = {
            'input': (self.load_input, self.input_transform),
            'target': (self.load_target, self.target_transform)
        }

        # For large dataset, do not cache
        self.cache = cache
        self.cache_dict = defaultdict(dict)
        self.loading_key_order = ['input', 'target']
コード例 #3
0
    def __init__(self, config, transforms, train=True, datakeys=None):
        Dataset.__init__(self)
        LoggingParent.__init__(self)
        if datakeys is None:
            self.datakeys = ["images"]
        if not path.isdir(config.datapath):
            self.basepath = None
        else:
            self.basepath = config.datapath

        self.spatial_size = config.reconstr_dim
        self.transforms = transforms
        self.train = train

        self.datadict = {"img_path": []}
        self._read_data()
        self.datadict = {
            key: np.asarray(self.datadict[key])
            for key in self.datadict
        }

        assert self.basepath is not None
        assert self.datadict["img_path"].shape[0] > 0

        self._output_dict = {"images": self._get_img}

        self.logger.info(
            f'Constructed {self.__class__.__name__} in {"train" if self.train else "test"}-mode; dataset consists of {self.__len__()} samples.'
        )
コード例 #4
0
 def __init__(self, dataframe, sequences, transform=None):
     Dataset.__init__(self)
     NiftiDataset.__init__(self)
     self.df = dataframe
     self.sequences = sequences
     self.patients = self.df.index.values
     self.transform = transform
コード例 #5
0
    def __init__(self, objs, atts, gqa_data_path, dset, with_atts, att_categories):
        Dataset.__init__(self)
        self.objs = objs
        self.atts = atts
        self.with_atts = with_atts
        self.gqa_env = lmdb.open(gqa_descriptors_file, subdir=False, readonly=True, lock=False, readahead=False,
                                 meminit=False)
        self.gqa_txn = self.gqa_env.begin(write=False)
        self.gqa_curs = self.gqa_txn.cursor()
        if os.path.isfile(gqa_data_path):
            self.gqa_data = list(json.load(open(gqa_data_path)).items())
        else:
            gqa_data_dict = get_objs_and_atts_datasets(self.objs, self.atts, dset)
            with open(gqa_data_path, 'w') as out_f:
                json.dump(gqa_data_dict, out_f, indent=2)
            self.gqa_data = list(gqa_data_dict.items())

        self.categorize_atts = False
        if att_categories is not None:
            self.categorize_atts = True
            self.att_categories = att_categories
            self.att_to_category = \
                {x: (key, idx) for key, value in att_categories.items() for idx, x in enumerate(value)}

        self.num_labels_distribution = [x / sum(CC_NUM_LABELS_ORDERED) for x in CC_NUM_LABELS_ORDERED]
コード例 #6
0
    def __init__(
        self,
        img_tensor_paths=None,
        heatmap_infos=None,
        label_name_to_value=LABEL_NAME_TO_VALUE,
        is_sigma_fixed=IS_SIGMA_FIXED,
        sigma_fixed=SIGMA_FIXED,
        sigma_scale=SIGMA_SCALE,
        heatmap_types=HEATMAP_TYPES_HANDLED,
        heatmap_labels=HEATMAP_LABELS,
    ):
        Dataset.__init__(self)
        if img_tensor_paths is None:
            img_tensor_paths = IMG_000_PATHS
        if heatmap_infos is None:
            heatmap_infos = HEATMAPS_000_INFOS
        assert len(img_tensor_paths) == len(heatmap_infos)

        self.img_tensor_paths = img_tensor_paths
        self.heatmap_infos = heatmap_infos
        self.label_name_to_value = label_name_to_value
        self.is_sigma_fixed = is_sigma_fixed
        self.sigma_fixed = sigma_fixed
        self.sigma_scale = sigma_scale
        self.heatmap_types = heatmap_types
        self.heatmap_labels = heatmap_labels
コード例 #7
0
 def __init__(self, index_tensor, data_tensor, target_tensor, sensitive_tensor):
     Dataset.__init__(self)
     assert data_tensor.size(0) == target_tensor.size(0)
     self.data_tensor = data_tensor
     self.target_tensor = target_tensor
     self.sensitive_tensor = sensitive_tensor
     self.index_tensor = index_tensor
コード例 #8
0
 def __init__(self, split, transform=None, texture_dataset=None):
     Dataset.__init__(self)
     if texture_dataset is None:
         texture_dataset = TextureDescriptionData(phid_format=None)
     self.dataset = texture_dataset
     self.split = split
     self.transform = transform
コード例 #9
0
 def __init__(
         self,
         datadir,  # JSON format
         batch_size,
         feature2idx,
         qual_features,
         binary_features,
         quant_features,
         dimred_dict,
         labelcol,
         label2idx,
         assigned_partitions=None,
         interested_partitions=[],
         h5dir=None,
         filename_fmt='data_{0:09d}.h5',
         device='cpu'):
     Dataset.__init__(self)
     if h5dir is None:
         self.h5tempdir = tempfile.TemporaryDirectory(
         )  # storing the context so that it is not removed after exiting the constructor
         h5dir = self.h5tempdir.name
     self.store_parameter(h5dir, batch_size, feature2idx, qual_features,
                          binary_features, quant_features, dimred_dict,
                          labelcol, label2idx, assigned_partitions,
                          interested_partitions, filename_fmt, device)
     self.reshuffle_batch = False
     self.datadir = datadir
     self.prepare_h5_files()
コード例 #10
0
    def __init__(self,
                 img_dir=FACADE_ROT_IMAGES_TENSORS_DIR,
                 add_targets_fn=None,
                 img_to_num_rot=None,
                 caching=False,
                 init_caching=False,
                 device=None):
        Dataset.__init__(self)
        self.dir_path = img_dir
        self.aux_targets_fn = add_targets_fn
        if img_to_num_rot is None:
            img_to_num_rot = create_img_to_num_rot(NUM_IMAGES, NUM_ROTATIONS)
        self.img_to_num_rot = img_to_num_rot
        self.cached_images = None
        self.device = device
        assert not (device is not None and
                    (init_caching
                     or caching)), 'cannot cache on GPU -> GPU_RAM'

        # checking all files exist
        for idx, num_rot in enumerate(self.img_to_num_rot):
            for rot_idx in range(num_rot):
                for is_img in [True, False]:
                    fname = self.get_filename(idx, rot_idx, is_img)
                    assert os.path.isfile(
                        fname), 'file ({}) does not exist'.format(fname)

        if caching or init_caching:
            self.cached_images = dict()
            if init_caching:
                for img_idx in tqdm(
                        list(range(FacadeRandomRotDataset.__len__(self)))):
                    for rot_idx in range(NUM_ROTATIONS):
                        img, lbl = self.get_rot_item(img_idx, rot_idx)
                        self.cached_images[(img_idx, rot_idx)] = (img, lbl)
コード例 #11
0
 def __init__(self,
              data_type,
              year,
              datadir,
              batch_size,
              im_processor,
              cfg,
              processes=5,
              shuffle=True,
              dst_size=None):
     self.imdb_name = '%s%s' % (data_type, year)
     ImageDataset.__init__(self, 'coco_' + self.imdb_name, datadir,
                           batch_size, im_processor, cfg, processes,
                           shuffle, dst_size)
     Dataset.__init__(self)
     anno_path = os.path.join(datadir, 'data', 'annotations',
                              'instances_%s%s.json' % (data_type, year))
     self.coco = COCO(annotation_file=anno_path)
     self.year = str(year)
     self._load_class_ids()
     self._image_ids = self._get_image_ids()
     print('load annotations and image_names')
     st = time.time()
     self._annotations, self._image_names = self._load_annotation()
     print('done, time=%5.2f' % (time.time() - st))
     self._image__indexes = np.arange(len(self._image_names))
コード例 #12
0
    def __init__(self, opt, aligned=True):
        Dataset.__init__(self)
        self.imgA = []
        self.imgB = []
        self.size_in = opt.network["input_patch_size"]
        self.opt = opt
        self.filenamesA = None
        self.filenamesB = None
        if opt.isTrain:  # only need a name when training
            self.name = opt.name
        self.batch_size = opt.network["batch_size"]
        self.resizeA = opt.resizeA
        self.netG = opt.network["netG"]
        self.model = opt.network["model"]
        self.shift_dict = {}

        # only for stn and when opt.stn_adjust_image use
        self.stn_adjust_dict = {}

        self.up_scale = (1, 1, 1)
        self.size_out = self.size_in
        self.aligned = aligned

        module_name = "aics_transfer_function.util.preprocessing"
        norm_module = importlib.import_module(module_name)
        func_name_src = self.opt["normalization"]["source"]["method"]
        self.source_norm = getattr(norm_module, func_name_src)
        self.source_norm_param = self.opt["normalization"]["source"]["params"]
        if "target" in self.opt.datapath and self.opt.datapath[
                "target"] is not None:
            func_name_tar = self.opt["normalization"]["target"]["method"]
            self.target_norm = getattr(norm_module, func_name_tar)
            self.target_norm_param = self.opt["normalization"]["target"][
                "params"]
コード例 #13
0
    def __init__(self, index_tensor, x, f):
        Dataset.__init__(self)
        assert index_tensor.size(0) == x.size(0)
        assert x.size(0) == f.size(0)

        self.index_tensor = index_tensor
        self.x = x
        self.f = f
コード例 #14
0
 def __init__(
     self,
     phase: str,
     data_root: str = "/data/yjwa/sparse_torch/MinkowskiEngine/nnbar_overlay",
 ):
     Dataset.__init__(self)
     self.w_xy, self.w_val, self.label = self.load_data(data_root, phase)
     self.phase = phase
コード例 #15
0
    def __init__(
        self,
        data_dir,
        normalize_images=True,
        split=None,
        return_mesh=False,
        voxel_size=32,
        num_samples=5000,
        sample_online=False,
        in_memory=False,
        return_id_str=False,
        input_views=[0, 6, 7],
    ):
        # call the PyTorch Dataset interface in this way
        # since the immediate parent is MeshVoxDataset
        Dataset.__init__(self)
        if not return_mesh and sample_online:
            raise ValueError("Cannot sample online without returning mesh")

        self.data_dir = data_dir
        self.return_mesh = return_mesh
        self.voxel_size = voxel_size
        self.num_samples = num_samples
        self.sample_online = sample_online
        self.return_id_str = return_id_str

        self.synset_ids = []
        self.model_ids = []
        self.mid_to_samples = {}
        # TODO: get the image ids from parameters
        self.image_ids = input_views

        self.transform = self.get_transform(normalize_images)

        summary_json = os.path.join(data_dir, "summary.json")
        with open(summary_json, "r") as f:
            summary = json.load(f)
            for sid in summary:
                logger.info("Starting synset %s" % sid)
                allowed_mids = None
                if split is not None:
                    if sid not in split:
                        logger.info("Skipping synset %s" % sid)
                        continue
                    elif isinstance(split[sid], list):
                        allowed_mids = set(split[sid])
                    elif isinstance(split, dict):
                        allowed_mids = set(split[sid].keys())
                for mid, num_imgs in summary[sid].items():
                    if allowed_mids is not None and mid not in allowed_mids:
                        continue
                    if not sample_online and in_memory:
                        samples_path = os.path.join(data_dir, sid, mid,
                                                    "samples.pt")
                        samples = torch.load(samples_path)
                        self.mid_to_samples[mid] = samples
                    self.synset_ids.append(sid)
                    self.model_ids.append(mid)
コード例 #16
0
ファイル: open_image.py プロジェクト: Ttsoccer/yolo2-pytorch
 def __init__(self, data_type, datadir, batch_size, im_processor, cfg, processes=5, shuffle=True, dst_size=None):
     self.imdb_name = 'OpenImage_%s'%(data_type)
     self.data_type = data_type
     ImageDataset.__init__(self, self.imdb_name, datadir, batch_size, im_processor, cfg, processes, shuffle, dst_size)
     Dataset.__init__(self)
     anno_path = os.path.join(datadir, 'json_data', '%s_annotation.json'%(data_type))
     self._classes = cfg.label_names
     self._annotations,self._image_names = self._load_annotation(anno_path)
     self._image__indexes = np.arange(len(self._image_names))
コード例 #17
0
 def __init__(self, xmls_folder, height, img_folder, config):
     Dataset.__init__(self)
     BaseDataset.__init__(self, xmls_folder, height, img_folder, config)
     self.Y = []
     for y in self.raw_Y:
         zeros = torch.zeros(self.max_output)
         zeros[torch.arange(len(y))] = torch.tensor(y, dtype=torch.float32)
         self.Y.append(zeros)
     self.Y = torch.stack(self.Y)
コード例 #18
0
    def __init__(self, index_tensor, x, g, g_label=None):
        Dataset.__init__(self)
        assert index_tensor.size(0) == x.size(0)
        assert x.size(0) == g.size(0)
        assert x.size(0) == g_label.size(0)

        self.index_tensor = index_tensor
        self.x = x
        self.g = g
        self.g_label = g_label
コード例 #19
0
 def __init__(self, dataframe, sequences, transform=None, preprocess_config=None):
     Dataset.__init__(self)
     NiftiDataset.__init__(self)
     self.df = dataframe
     self.sequences = sequences
     self.patients = self.df.index.values
     print(self.df['seg'][0])
     print(self.df['seg'][1])
     print(self.df['seg'][2])
     self.transform = transform
     self.config_file = preprocess_config
コード例 #20
0
ファイル: pointnet.py プロジェクト: AlexeyGB/MinkowskiEngine
 def __init__(
     self,
     phase: str,
     data_root: str = "modelnet40h5",
     num_points=2048,
 ):
     Dataset.__init__(self)
     phase = "test" if phase in ["val", "test"] else "train"
     self.data, self.label = self.load_data(data_root, phase)
     self.phase = phase
     self.num_points = num_points
コード例 #21
0
    def __init__(self,
                 dataset,
                 dataset_path,
                 training,
                 validation,
                 transform=None,
                 downsample_training=False):
        """
        Parameters
        ----------
        dataset: str
            Kitti2012 or kitti2015
        dataset_path: str
            Kitti dataset path
        training: bool
            Loads training images
        validation: bool
            Loads validation data
        transform: torchvision.transforms
            Transform to be applied to all pair
        downsample_training: bool
            Downsample during training. Some networks dont't need big images to converge faster

        Returns
        -------
        None
        """
        Dataset.__init__(self)

        self.dataset = dataset
        self.dataset_path = dataset_path
        self.training = training
        self.validation = validation
        self.transform = transform
        self.downsample_training = downsample_training

        # Load list of images
        tr_l, tr_r, tr_l_disp, test_l, test_r, test_l_disp = lt.dataloader(
            dataset_path)

        self.l_im_paths = []
        self.r_im_paths = []
        self.l_disp_paths = []

        if self.training:
            self.l_im_paths = self.l_im_paths + tr_l
            self.r_im_paths = self.r_im_paths + tr_r
            self.l_disp_paths = self.l_disp_paths + tr_l_disp
        if self.validation:
            self.l_im_paths = self.l_im_paths + test_l
            self.r_im_paths = self.r_im_paths + test_r
            self.l_disp_paths = self.l_disp_paths + test_l_disp
コード例 #22
0
 def __init__(self, base_dir, config,):
     Dataset.__init__(self)
     LoggingParent.__init__(self)
     self.logger.info(f"Initialize GoogleImgDataset with basepath {base_dir}")
     self.config = config
     img_paths = [p for p in glob(path.join(base_dir,"*")) if path.isfile(p) and any(map(lambda x: p.endswith(x),["jpg","jpeg","png"]))]
     self.datadict = {"img_path": np.asarray(img_paths)}
     self.transforms = tt.Compose(
         [
             tt.ToTensor(),
             tt.Lambda(lambda x: (x * 2.0) - 1.0),
         ])
     self.logger.info(f"Initialized Dataset with {self.__len__()} images")
コード例 #23
0
    def __init__(self, file, pipeline=[]): # cvs file and pipeline object
        Dataset.__init__(self)
        data = []
        with open(file, "r", encoding='utf-8') as f:
            # list of splitted lines : line is also list
            lines = csv.reader(f, delimiter='\t', quotechar=None)
            for instance in self.get_instances(lines): # instance : tuple of fields
                for proc in pipeline: # a bunch of pre-processing
                    instance = proc(instance)
                data.append(instance)

        # To Tensors
        self.tensors = [torch.tensor(x, dtype=torch.long) for x in zip(*data)]
コード例 #24
0
    def __init__(self, mode='train', transform=None, preload=False, name=None, data=None, mask_threshold=0):
        Dataset.__init__(self)
        self.mode = mode
        self.transform = transform
        if name is not None:
            self.name = name
        else:
            self.name = mode

        self.mask_threshold = mask_threshold
        if data is None:
            self.data = load_data(self.name, self.mode, preload, self.mask_threshold)
        else:
            self.data = data
コード例 #25
0
 def __init__(self,
              dataframe,
              sequences,
              transform=None,
              brainmask=True,
              segmentation=True):
     Dataset.__init__(self)
     NiftiDataset.__init__(self)
     self.brainmask = brainmask
     self.df = dataframe
     self.sequences = sequences
     self.patients = self.df.index.values
     self.transform = transform
     self.segmentation = segmentation
コード例 #26
0
 def __init__(
     self,
     phase: str,
     data_root: str = "modelnet40h5",
     translation_max: float = 0.25,
     num_points=2048,
 ):
     Dataset.__init__(self)
     download_modelnet40_dataset()
     phase = "test" if phase in ["val", "test"] else "train"
     self.data, self.label = self.load_data(data_root, phase)
     self.transform = CoordinateTransformation(trans=translation_max)
     self.phase = phase
     self.num_points = num_points
コード例 #27
0
    def __init__(self,
                 imdb_name,
                 datadir,
                 batch_size,
                 im_processor,
                 processes=3,
                 shuffle=True,
                 dst_size=None,
                 classes=None,
                 n_classes=None):
        ImageDataset.__init__(self, imdb_name, datadir, batch_size,
                              im_processor, processes, shuffle, dst_size)
        Dataset.__init__(self)
        meta = imdb_name.split('_')
        self._year = meta[1]
        self._image_set = meta[2]
        self._devkit_path = os.path.join(datadir,
                                         'VOCdevkit{}'.format(self._year))
        self._data_path = os.path.join(self._devkit_path,
                                       'VOC{}'.format(self._year))
        assert os.path.exists(
            self._devkit_path), 'VOCdevkit path does not exist: {}'.format(
                self._devkit_path)
        assert os.path.exists(
            self._data_path), 'Path does not exist: {}'.format(self._data_path)

        if classes is None:
            self._classes = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle',
                             'bus', 'car', 'cat', 'chair', 'cow',
                             'diningtable', 'dog', 'horse', 'motorbike',
                             'person', 'pottedplant', 'sheep', 'sofa', 'train',
                             'tvmonitor')
        else:
            self._classes = classes

        if n_classes is not None:
            self._classes = self._classes[:n_classes]

        self._class_to_ind = dict(
            list(zip(self.classes, list(range(self.num_classes)))))
        self._image_ext = '.jpg'

        self._salt = str(uuid.uuid4())
        self._comp_id = 'comp4'

        # PASCAL specific config options
        self.config = {'cleanup': True, 'use_salt': True}

        self.load_dataset()
コード例 #28
0
    def __init__(self, opt):
        Dataset.__init__(self)

        self.opt = opt
        self.audios = []

        #load audio files here
        with open(os.path.join(opt.splitPath, opt.mode + ".txt"),
                  'r') as cur_f:
            audio_files = cur_f.readlines()
        self.audios = [audio_file[:-1] for audio_file in audio_files]

        normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                         std=[0.229, 0.224, 0.225])
        vision_transform_list = [transforms.ToTensor(), normalize]
        self.vision_transform = transforms.Compose(vision_transform_list)
コード例 #29
0
 def __init__(self,
              data_root,
              max_dataset_size=float('inf'),
              is_train=True,
              tumor_threshold=0.2,
              is_3d=False):
     Dataset.__init__(self)
     self.tumor_threshold = tumor_threshold
     if is_train:
         self.dir = os.path.join(data_root, 'train')
     else:
         self.dir = os.path.join(data_root, 'test')
     self.is_3d = is_3d
     self.dataset = self.make_dataset_from_tensor(self.dir,
                                                  max_dataset_size)
     self.size = len(self.dataset)
コード例 #30
0
 def __init__(self, path):
     Dataset.__init__(self)
     self.path = path
     self.dataset = pd.read_csv(path, delimiter=",")
     self.dataset = self.dataset.sample(frac=1)
     self.dataset.replace(to_replace='blues', value=0, inplace=True)
     self.dataset.replace(to_replace='classical', value=1, inplace=True)
     self.dataset.replace(to_replace='country', value=2, inplace=True)
     self.dataset.replace(to_replace='disco', value=3, inplace=True)
     self.dataset.replace(to_replace='hiphop', value=4, inplace=True)
     self.dataset.replace(to_replace='jazz', value=5, inplace=True)
     self.dataset.replace(to_replace='metal', value=6, inplace=True)
     self.dataset.replace(to_replace='pop', value=7, inplace=True)
     self.dataset.replace(to_replace='reggae', value=8, inplace=True)
     self.dataset.replace(to_replace='rock', value=9, inplace=True)
     self.data = self.dataset.iloc[:-200, :-1].as_matrix()
     self.label = self.dataset.iloc[:-200, -1:].as_matrix()
コード例 #31
0
    def __init__(self, config):
        Dataset.__init__(self)
        Configurable.__init__(self, config)

        # get dataloader parameters
        self.shuffle = self.config.shuffle
        self.batch_size = self.config.batch_size
        self.num_workers = self.config.num_workers
        self.pin_memory = self.config.pin_memory

        # get labels if provided in config and not set in class
        if not self.labels and self.config.labels:
            self.labels = self.config.labels

        # get the transformations to be applied for the image and for the target
        self.transform = self.load_transforms(self.config.transforms)
        self.target_transform = self.load_transforms(self.config.target_transforms)