def test():
        if test_set is None:
            return

        start_time = time.time()
        t = test_set.start_test()

        test_loss = []

        with torch.no_grad():
            for data in tqdm(test_loader):
                data = {
                    k: v.to(device) if torch.is_tensor(v) else v
                    for k, v in data.items()
                }
                if hasattr(dataset, "prepare"):
                    data = dataset.prepare(data)

                net_out = run_model(data)
                test_set.verify_result(t, data, net_out)

                test_loss.append(
                    dataset.loss(net_out, data["output"]).data.item())
            avg_loss = sum(test_loss) / len(test_loss)
            perplexity = math.exp(avg_loss)
            test_loss_plot.add_point(epoch, avg_loss)
            if epoch > 5:  # Perplexity is immensely high in the beginning
                ppl_plot.add_point(epoch, perplexity)

        test_set.show_test_results(epoch, t)
        print("Test done in %gs" % (time.time() - start_time))
Exemple #2
0
def _load_dataset(
    path,
    mmap_mode=None,
    as_torch_tensors=True
) -> Tuple[torch_geometric.data.Data, Dict[str, torch.Tensor]]:
    data = np.load(path, mmap_mode=mmap_mode)
    data_class = pickle.loads(data['_data_class'])

    data_dict = {}
    slices_dict = {}

    if as_torch_tensors:

        def convert_tensor(x):
            return torch.from_numpy(x)
    else:

        def convert_tensor(x):
            return x

    for k, v in data.items():
        if k == '_data_class':
            continue

        group, key = k.split('_', 1)

        if group == 'data':
            data_dict[key] = convert_tensor(v)
        elif group == 'slices':
            slices_dict[key] = convert_tensor(v)
        else:
            raise ValueError('Unknown key prefix {} for key {}'.format(
                group, k))

    return data_class.from_dict(data_dict), slices_dict
 def _get_text_dim(self, data):
     for k, v in data.items():
         for vv in v:
             x_text = torch.Tensor(vv['text_embedding_{}'.format(
                 self.text)])
             if (len(x_text.size())):
                 return x_text.shape
Exemple #4
0
def filter_data(data, threshold):
    filtered_data = {}
    for k, v in data.items():
        if len(v) > threshold:
            filtered_data[k] = v
    logger.info('root cause numbers: %d' % len(filtered_data))
    return filtered_data
Exemple #5
0
def load_android_examples(android_data, test=False):
    '''yields data in the form of
    (id, query (title, body), example (title, body), +1/-1)'''

    neg_file = 'android_data/dev.neg.txt'
    pos_file = 'android_data/dev.pos.txt'
    if test:
        neg_file = 'android_data/test.neg.txt'
        pos_file = 'android_data/test.pos.txt'
    d = {}

    with open(pos_file) as f:
        for line in f:
            query, compare = line.split()
            if query not in d:
                d[query] = []
            d[query].append(compare)

    with open(neg_file) as f:
        for line in f:
            query, compare = line.split()
            d[query].append(compare)

    for q, c in d.items():
        yield (q, c[0], c[1:])
def extract_embeddings(data_path: typing.AnyStr, ) -> typing.Dict:
    embeddings_backup_path = data_path + "embeddings.pkl"
    if not os.path.isfile(embeddings_backup_path):
        crops_backup_path = data_path + "crops.pkl"
        if not os.path.isfile(crops_backup_path):
            data = extract_crop_data(data_path)
            with open(crops_backup_path, "wb") as fd:
                pickle.dump(data, fd)
        else:
            with open(crops_backup_path, "rb") as fd:
                data = pickle.load(fd)
        resnet50_imnet = torchvision.models.resnet50(pretrained=True).eval()
        imnet_transforms = torchvision.transforms.Compose([
            torchvision.transforms.ToTensor(),
            torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                             std=[0.229, 0.224, 0.225]),
        ])
        with torch.no_grad():
            for seq_name, seq_data in data.items():
                for frame_idx, frame_data in seq_data.items():
                    crop = cv.cvtColor(frame_data["CROP"], cv.COLOR_BGR2RGB)
                    tensor = torch.unsqueeze(imnet_transforms(crop), 0)
                    logits = torch.squeeze(resnet50_imnet(tensor), 0).numpy()
                    embed = compute_resnet_embedding_without_fc(
                        tensor, resnet50_imnet)
                    embed = torch.squeeze(embed, 0).numpy()
                    frame_data["imnet_logits"], frame_data[
                        "imnet_embed"] = logits, embed
        with open(embeddings_backup_path, "wb") as fd:
            pickle.dump(data, fd)
    else:
        with open(embeddings_backup_path, "rb") as fd:
            data = pickle.load(fd)
    return data
Exemple #7
0
 def _examples_to_batch(
         self, data: Dict[ColumnName,
                          OneorMore[ArrayLike]]) -> ProcessedBatch:
     """
     Converts examples in a dataset to model inputs by using the fields to transform
     the inputs to tensors. Override in subclass to add custom behavior.
     """
     in_data = {}
     tgt_data = {}
     for k, batch in data.items():
         fld = self.dataset.fields[k]
         if isinstance(fld, (tuple, list)):
             for f, v in zip(fld, batch):
                 data_dict = tgt_data if f.is_target else in_data
                 if k not in data_dict: data_dict[k] = []
                 data_dict[k].append(
                     f.transform_batch(v,
                                       device=self.device,
                                       train=self.dataset.train))
         else:
             tsr = fld.transform_batch(batch,
                                       device=self.device,
                                       train=self.dataset.train)
             # add to data dicts
             if fld.is_target: tgt_data[k] = tsr
             else: in_data[k] = tsr
     return in_data, tgt_data
def get_loaders(file_path, batch_size, ratio):

    with open(file_path, "br") as r:
        data = pickle.load(r)

    src_lang = PhonemeVocab()
    trg_lang = WordVocab()
    src = []
    trg = []
    for k, v in data.items():
        trg_lang.add_word(k)
        src += v
        trg += [k for _ in range(len(v))]

    # shuffle data
    d = list(zip(src, trg))
    random.shuffle(d)
    src, trg = zip(*d)
    src, trg = list(src), list(trg)

    r_range = get_ratio_range(len(src), ratio)
    data_loaders = []
    for r in range(len(ratio) - 1):
        sub_src = src[r_range[r]:r_range[r + 1]]
        sub_trg = trg[r_range[r]:r_range[r + 1]]
        dataset = DomainDataset(sub_src, sub_trg, src_lang.sent2idx,
                                trg_lang.word2idx)
        data_loader = torch.utils.data.DataLoader(dataset=dataset,
                                                  batch_size=batch_size[r],
                                                  shuffle=True,
                                                  drop_last=True,
                                                  collate_fn=collate_fn)
        data_loaders.append(data_loader)
    return data_loaders, src_lang, trg_lang
Exemple #9
0
def get_video_names_and_annotations(data, subset):
    video_names = []
    annotations = []

    for key, value in data.items():
        this_subset = value['subset']
        if this_subset == subset:

            if subset == 'testing':
                video_names.append('test/{}'.format(key))
            elif subset == 'train':
                st = int(value['annotations']['segment'][0])
                end = int(value['annotations']['segment'][1])
                label = value['annotations']['label'].replace(' ', '_')
                video_names.append('{}/{}_{}_{}'.format(
                    label, key,
                    str(st).zfill(6),
                    str(end).zfill(6)))
                annotations.append(value['annotations'])
            else:
                label = value['annotations']['label'].replace(' ', '_')
                video_names.append('{}/{}'.format(label, key))
                annotations.append(value['annotations'])

    return video_names, annotations
def main():
    # the_url contains the required url to obtain the full list using an identifier
    prefix = 'http://www.image-net.org/api/text/imagenet.synset.geturls?wnid='
    file = open('./imagenet_class_info.json')
    data = json.load(file)

    class_folder_path = os.path.join(args.data_root, args.main_class)
    if not os.path.isdir(class_folder_path):
        os.makedirs(class_folder_path)

    wnids = []
    for k, v in data.items():
        if v['class_name'] in args.subclass_list:
            wnids.append(k)
    print(wnids)
    for wnid in wnids:
        the_list_url = prefix + wnid
        resp = requests.get(the_list_url)
        urls = [url.decode('utf-8') for url in resp.content.splitlines()]
        num_images = 0

        for url in urls:
            if num_images >= args.images_per_subclass:
                print(num_images)
                break
            num_images += get_image(url, class_folder_path)
Exemple #11
0
 def __iter__(self):
     for data in super(FixedDatasetLoader, self).__iter__():
         if isinstance(data, (list, tuple)):
             assert len(data) == len(self.field_names)
             yield dict(zip(self.field_names, data))
         else:
             yield {self.rename_dict.get(k, k): v for k, v in data.items()}
Exemple #12
0
 def __init__(self, data_json_path, batch_size, max_length_in, max_length_out,
              num_batches=0):
     # From: espnet/src/asr/asr_utils.py: make_batchset()
     """
     Args:
         data: espnet/espnet json format file.
         num_batches: for debug. only use num_batches minibatch but not all.
     """
     super(AudioDataset, self).__init__()
     with open(data_json_path, 'rb') as f:
         data = json.load(f)['utts']
     # sort it by input lengths (long to short)
     sorted_data = sorted(data.items(), key=lambda data: int(
         data[1]['input'][0]['shape'][0]), reverse=True)
     # change batchsize depending on the input and output length
     minibatch = []
     start = 0
     while True:
         ilen = int(sorted_data[start][1]['input'][0]['shape'][0])
         olen = int(sorted_data[start][1]['output'][0]['shape'][0])
         factor = max(int(ilen / max_length_in), int(olen / max_length_out))
         # if ilen = 1000 and max_length_in = 800
         # then b = batchsize / 2
         # and max(1, .) avoids batchsize = 0
         b = max(1, int(batch_size / (1 + factor)))
         end = min(len(sorted_data), start + b)
         minibatch.append(sorted_data[start:end])
         if end == len(sorted_data):
             break
         start = end
     if num_batches > 0:
         minibatch = minibatch[:num_batches]
     self.minibatch = minibatch
Exemple #13
0
def log_write(fid, data, step = 0):
	try:
		import tensorflow as tf
		fid, summary, sess, var = fid
		feed_dict = {}
		for k in var.keys():
			if k in data.keys():
				feed_dict[var[k]] = data[k]
			else:
				feed_dict[var[k]] = 0
		summary = sess.run(summary, feed_dict = feed_dict)
		fid.add_summary(summary, step)
	except ModuleNotFoundError as e:
		try:
			fid.add_scalars('train', data, step)
		except AttributeError as e:
			s = 'Step: %d\t' % step
			for i,(k, v) in enumerate(data.items()):
				try:
					v = float(v)
					s += '%s:%3.5f' % (k, float(v))
					if i != len(data) - 1:
						s += '\t'
					else:
						s += '\n'
				except:
					continue
			fid.write(s)
			fid.flush()
Exemple #14
0
    def __getitem__(self, index):
        # print(self.image_list[index])
        data = self.data_list[index]
        img_path, text_polys, text_tags = self.data_list[index]['img_path'], self.data_list[index]['text_polys'], self.data_list[index]['ignore_tags']
        data = image_label(data, input_size=self.data_shape,n=self.n,m=self.m)

        im = cv2.imread(img_path)
        if self.transform:
            img = self.transform(data['img'])
        shape = (data['img'].shape[0], data['img'].shape[1])

        data['img'] = img
        data['shape'] = shape
        # data['score_maps'] = score_maps
        # data['training_mask'] = training_mask
        # data['text_polys'] =torch.Tensor(list(text_polys))
        # data['ignore_tags'] = [text_tags]
        # data['shape'] = shape
        # data['texts'] = [data['texts']]

        if len(self.filter_keys):
            data_dict = {}
            for k, v in data.items():
                if k not in self.filter_keys:
                    data_dict[k] = v
            return data_dict
        else:
            # return {'img': img, 'score_maps': score_maps, 'training_mask': training_mask, 'shape': shape, 'text_polys': list(text_polys), 'ignore_tags': text_tags}
            return {}
Exemple #15
0
    def load_next_buffer(self):
        """ Loads next buffer """
        self._buffer_fnames = self._files[self.
                                          _buffer_index:self._buffer_index +
                                          self._buffer_size]

        self._buffer_index += self._buffer_size
        self._buffer_index = self._buffer_index % len(self._files)
        self._buffer = []
        self._cum_size = [0]

        # progress bar
        pbar = tqdm(total=len(self._buffer_fnames),
                    bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} {postfix}')
        pbar.set_description("Loading file buffer ...")

        for f in self._buffer_fnames:
            with np.load(f) as data:
                self._buffer += [{k: np.copy(v) for k, v in data.items()}]
                self._cum_size += [
                    self._cum_size[-1] +
                    self._data_per_sequence(data['rewards'].shape[0])
                ]
                pbar.set_description_str('data_len{}'.format(
                    data['rewards'].shape[0]))
            pbar.update(1)
        pbar.close()
Exemple #16
0
def validate_imagenet(val_loader, model, args):
    batch_time = AverageMeter('Time', ':6.3f')
    top1 = AverageMeter('Real Acc@1', ':6.2f')
    top5 = AverageMeter('Real Acc@5', ':6.2f')
    progress = ProgressMeter(len(val_loader), [batch_time, top1, top5], prefix='Test: ')

    # switch to evaluate mode
    model.eval()

    with torch.no_grad():
        end = time.time()
        for i, data in enumerate(val_loader):
            if args.gpu is not None or torch.cuda.is_available():
                data = {k: v.cuda(args.gpu, non_blocking=True) for k, v in data.items()}

            # compute output
            out = model(data['ims'].cuda())

            # measure accuracy and record loss
            acc1, acc5 = accuracy(out['avg_preds'], data['labels'], topk=(1, 5))
            top1.update(acc1[0], data['labels'].size(0))
            top5.update(acc5[0], data['labels'].size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            # logging
            if i % args.print_freq == 0:
                progress.display(i)

    print(f'* Real: Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}')

    return {'acc1/1_real': top1.avg,
            'acc5/1_real': top5.avg}
Exemple #17
0
 def to_cpu(data):
     if isinstance(data, dict):
         for k, v in data.items():
             data[k] = to_cpu(v)
         return data
     if isinstance(data, torch.Tensor):
         return data.cpu()
     return data
Exemple #18
0
 def write_element(descriptor, data, name):
     if isinstance(data, torch.Tensor):
         descriptor.create_dataset(name, data=data)
         return
     assert isinstance(data, dict), name
     subgroup = descriptor.create_group(name)
     for k, v in data.items():
         write_element(subgroup, v, k)
Exemple #19
0
def to_device(data):
    if type(data) == dict:
        for k, v in data.items():
            data[k] = v.to(device=device)
    else:
        for idx, item in enumerate(data):
            data[idx] = item.to(device=device)
    return data
 def fromdict(cls, data, fields):
     ex = cls()
     for key, val in data.items():
         if key in fields:
             name, field = fields[key]
             if field is not None:
                 setattr(ex, name, field.preprocess(val))
     return ex
Exemple #21
0
    def __init__(self, data, uk_data, set_type, config):
        super().__init__()
        self.uk_data = uk_data
        print(set_type, set(list(self.uk_data.values())))

        c = Counter(list(self.uk_data.values()))

        print(c)
        self.audio_files = list(data.keys())
        self.set_type = set_type
        self.audio_labels = list(data.values())
        print(np.bincount(np.array(self.audio_labels)))
        config["bg_noise_files"] = list(
            filter(lambda x: x.endswith("wav"),
                   config.get("bg_noise_files", [])))
        self.bg_noise_audio = [
            librosa.core.load(file, sr=16000)[0]
            for file in config["bg_noise_files"]
        ]
        self.unknown_prob = config["unknown_prob"]
        self.silence_prob = config["silence_prob"]
        self.noise_prob = config["noise_prob"]
        self.n_dct = config["n_dct_filters"]
        self.input_length = config["input_length"]
        self.timeshift_ms = config["timeshift_ms"]
        self.filters = librosa.filters.dct(config["n_dct_filters"],
                                           config["n_mels"])
        self.n_mels = config["n_mels"]
        self._audio_cache = SimpleCache(config["cache_size"])
        self._file_cache = SimpleCache(config["cache_size"])

        n_unk = len(list(filter(lambda x: x == 1, self.audio_labels)))
        print(set_type, self.unknown_prob)
        self.uk_audio_files = [k for (k, v) in data.items() if v == 1]
        self.n_unknown = int(
            self.unknown_prob *
            (len(self.audio_labels) - len(self.uk_audio_files)))

        #shuffle(self.uk_audio_files)
        self.uk_index = 0
        self.n_silence = int(
            self.silence_prob *
            (len(self.audio_labels) - len(self.uk_audio_files)))

        self.audio_files = [k for (k, v) in data.items() if v != 1]
        self.audio_labels = [v for (k, v) in data.items() if v != 1]
Exemple #22
0
def get_rc_data(data, rc_lists):
    new_data = {}
    count = 0
    for k, v in data.items():
        if k in rc_lists:
            new_data[k] = v
            count += len(v)
    logger.info('Remained data numbers: %d' % count)
    return new_data
    def __init__(self, dict_path="data/data.pkl", val_size=50):
        with open(dict_path, 'rb') as f:
            data = pickle.load(f)

        data_items = list(data.items())
        random.shuffle(data_items)
        self.data_dict = dict(data_items)

        self.maxlen = 1500
Exemple #24
0
def read_object_labels(root, dataset, set):
    path_labels = os.path.join(root, 'VOCdevkit', dataset, 'ImageSets', 'Main')
    labeled_data = dict()
    num_classes = len(object_categories)

    for i in range(num_classes):
        file = os.path.join(path_labels, object_categories[i] + '_' + set + '.txt')
        data = read_image_label(file)

        if i == 0:
            for (name, label) in data.items():
                labels = np.zeros(num_classes)
                labels[i] = label
                labeled_data[name] = labels
        else:
            for (name, label) in data.items():
                labeled_data[name][i] = label

    return labeled_data
Exemple #25
0
def get_data(mode):
    data = json.load(open('./lab5_dataset/' + mode + '.json', 'r'))
    if mode == 'train':
        Data = []
        for item in data.items():
            Data.append(item)

        return np.squeeze(Data)
    else:
        return data
def read_object_labels(root, dataset, set):
    path_labels = os.path.join(root, 'VOCdevkit', dataset, 'ImageSets', 'Main')
    labeled_data = dict()
    num_classes = len(object_categories)

    for i in range(num_classes):
        file = os.path.join(path_labels, object_categories[i] + '_' + set + '.txt')
        data = read_image_label(file)

        if i == 0:
            for (name, label) in data.items():
                labels = np.zeros(num_classes)
                labels[i] = label
                labeled_data[name] = labels
        else:
            for (name, label) in data.items():
                labeled_data[name][i] = label

    return labeled_data
Exemple #27
0
    def __permute_data__(self, data, node_idx, adj):
        out = copy.copy(data)
        for key, value in data.items():
            if data.is_node_attr(key):
                out[key] = value[node_idx]

        out.edge_index = None
        out.adj = adj

        return out
def collate_fn(batch):
    r"""Puts each data field into a tensor with outer dimension batch size"""

    flattened_batch = []
    for data in batch:
        num_examples = len(data['image'])
        for i in range(num_examples):
            flattened_batch.append({k: v[i] for k, v in data.items()})

    return default_collate(flattened_batch)
    def evaluate(self,
                 data: dict,
                 model: Model,
                 path_to_model: str = "") -> dict:
        """
        Evaluates the saved best model against train, val and test data
        :param data: a dictionary tuple containing the data loaders for train, val and test
        :param model: the model to be evaluated
        :param path_to_model: the path to the saved serialization of the best model
        :return: the eval of the model on train, val and test data, including metrics, gt and preds
        """
        model.evaluation_mode()

        if path_to_model != "":
            model.load(path_to_model)

        metrics, gt, preds = {}, {}, {}

        for set_type, dataloader in data.items():

            loss, accuracy, y_scores, y_true = [], [], [], []

            with torch.no_grad():

                for i, (x, y) in enumerate(dataloader):
                    x, y = x.float().to(self.__device), y.long().to(
                        self.__device)
                    o = model.predict(x).to(self.__device)

                    loss += [model.get_loss(o, y)]
                    accuracy += [self.batch_accuracy(o, y)]

                    y_scores += torch.exp(o).cpu().numpy().tolist()
                    y_true += y.cpu().numpy().tolist()

            y_scores, y_true = np.array(y_scores).reshape(
                (len(y_scores), 2)), np.array(y_true)
            y_pred = np.array((y_scores[:, 1] >= self.__optimal_roc_threshold(
                y_true, y_scores[:, 1])),
                              dtype=np.int)
            set_metrics = self.__compute_metrics(y_true,
                                                 y_pred,
                                                 y_1_scores=y_scores[:, 1])
            set_metrics["accuracy"], set_metrics["loss"] = np.mean(
                accuracy), np.mean(loss)

            print("\n {} metrics: \n".format(set_type.upper()))
            for metric, value in set_metrics.items():
                print(("\t - {} " + "".join(["."] * (15 - len(metric))) +
                       " : {:.4f}").format(metric, value))

            metrics[set_type], gt[set_type], preds[
                set_type] = set_metrics, y_true, y_pred

        return {"metrics": metrics, "gt": gt, "preds": preds}
Exemple #30
0
def validate_counterfactual(val_loader, model, args):
    batch_time = AverageMeter('Time', ':6.3f')
    top1_shape = AverageMeter('Shape Acc@1', ':6.2f')
    top5_shape = AverageMeter('Shape Acc@5', ':6.2f')
    top1_texture = AverageMeter('Texture Acc@1', ':6.2f')
    top5_texture = AverageMeter('Texture Acc@5', ':6.2f')
    top1_bg = AverageMeter('Bg Acc@1', ':6.2f')
    top5_bg = AverageMeter('Bg Acc@5', ':6.2f')
    progress = ProgressMeter(len(val_loader),
                             [batch_time, top1_shape, top5_shape, top1_texture, top5_texture, top1_bg, top5_bg],
                             prefix='Test: ')

    # switch to evaluate mode
    model.eval()

    with torch.no_grad():
        end = time.time()
        for i, data in enumerate(val_loader):
            if args.gpu is not None or torch.cuda.is_available():
                data = {k: v.cuda(args.gpu, non_blocking=True) for k, v in data.items()}

            # compute output
            out = model(data['ims'])

            # measure accuracy and record loss
            sz = len(data['ims'])
            acc1, acc5 = accuracy(out['shape_preds'], data['shape_labels'], topk=(1, 5))
            top1_shape.update(acc1[0], sz)
            top5_shape.update(acc5[0], sz)
            acc1, acc5 = accuracy(out['texture_preds'], data['texture_labels'], topk=(1, 5))
            top1_texture.update(acc1[0], sz)
            top5_texture.update(acc5[0], sz)
            acc1, acc5 = accuracy(out['bg_preds'], data['bg_labels'], topk=(1, 5))
            top1_bg.update(acc1[0], sz)
            top5_bg.update(acc5[0], sz)

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            # logging
            if i % args.print_freq == 0:
                progress.display(i)

    print(f'* Shape: Acc@1 {top1_shape.avg:.3f} Acc@5 {top5_shape.avg:.3f}')
    print(f'* Texture: Acc@1 {top1_texture.avg:.3f} Acc@5 {top5_texture.avg:.3f}')
    print(f'* BG: Acc@1 {top1_bg.avg:.3f} Acc@5 {top5_bg.avg:.3f}')

    return {'acc1/2_shape': top1_shape.avg,
            'acc1/3_texture': top1_texture.avg,
            'acc1/4_bg': top1_bg.avg,
            'acc5/2_shape': top5_shape.avg,
            'acc5/3_texture': top5_texture.avg,
            'acc5/4_bg': top5_bg.avg}
Exemple #31
0
def _to_device(data, device):
    if isinstance(data, torch.Tensor):
        return data.to(device)
    elif isinstance(data, (tuple, list)):
        return [_to_device(d, device) for d in data]
    elif isinstance(data, dict):
        return {k: _to_device(v, device) for k, v in data.items()}
    elif isinstance(data, np.ndarray):
        return torch.tensor(data).to(device)
    else:
        return data
Exemple #32
0
def _apply_to_data(data, func, unpack_dict=False):
    """Apply a function to data, trying to unpack different data
    types.

    """
    apply_ = partial(_apply_to_data, func=func, unpack_dict=unpack_dict)
    if isinstance(data, dict):
        if unpack_dict:
            return [apply_(v) for v in data.values()]
        return {k: apply_(v) for k, v in data.items()}
    elif isinstance(data, (list, tuple)):
        try:
            # e.g.list/tuple of arrays
            return [apply_(x) for x in data]
        except TypeError:
            return func(data)
    return func(data)
Exemple #33
0
    def load_next_buffer(self):
        """ Loads next buffer """
        self._buffer_fnames = self._files[self._buffer_index:self._buffer_index + self._buffer_size]
        self._buffer_index += self._buffer_size
        self._buffer_index = self._buffer_index % len(self._files)
        self._buffer = []
        self._cum_size = [0]

        # progress bar
        pbar = tqdm(total=len(self._buffer_fnames),
                    bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} {postfix}')
        pbar.set_description("Loading file buffer ...")

        for f in self._buffer_fnames:
            with np.load(f) as data:
                self._buffer += [{k: np.copy(v) for k, v in data.items()}]
                self._cum_size += [self._cum_size[-1] +
                                   self._data_per_sequence(data['rewards'].shape[0])]
            pbar.update(1)
        pbar.close()