def get_chainer_dataset(self) -> "chainer.datasets.Tupledataset": """ get train/test splited chainer.datasets.Tupledataset data call this after exec self.build_data() """ return (TupleDataset(self.x_train, self.y_train), TupleDataset(self.x_test, self.y_test))
def create_dataset(path, size=-1): data = collections.defaultdict(lambda: [[], []]) with open(path, mode='r') as fin: for i, line in enumerate(fin): q, r, v = _parse_single(line) if r not in {0, 1, 2}: raise DatasetParseError( "L%d: Score must be 0, 1 or 2, but found %d" % (i, r) ) data[q][0].append(r) data[q][1].append(v) vectors = [] scores = [] for d in data.values(): v = np.array(d[1], dtype=np.float32) s = np.array(d[0], dtype=np.float32) vectors.append(v) scores.append(s) s = max(map(len, scores)) vectors_pad = np.zeros((len(vectors), s, v.shape[-1]), dtype=np.float32) scores_pad = np.zeros((len(scores), s), dtype=np.float32) length = np.empty((len(scores)), dtype=np.int32) for i, (s, v) in enumerate(zip(scores, vectors)): vectors_pad[i, :len(v), :] = v scores_pad[i, :len(s)] = s length[i] = len(v) if size > 0: ind = np.random.permutation(len(vectors))[:size] return TupleDataset(vectors_pad[ind], scores_pad[ind], length[ind]) else: return TupleDataset(vectors_pad, scores_pad, length)
def load_new_dataset(data_dir): train_dogs = glob(os.path.join(data_dir, 'train/dog/*.jpg')) train_cats = glob(os.path.join(data_dir, 'train/cat/*.jpg')) valid_dogs = glob(os.path.join(data_dir, 'validation/dog/*.jpg')) valid_cats = glob(os.path.join(data_dir, 'validation/cat/*.jpg')) test_dogs = glob(os.path.join(data_dir, 'test_v2/dog/*.jpg')) test_cats = glob(os.path.join(data_dir, 'test_v2/cat/*.jpg')) # 教師ラベルの作成 train_label = {'cat': 1, 'dog': 0} df = pd.DataFrame({ 'file_path': train_cats + train_dogs + valid_dogs + valid_cats + test_dogs + test_cats, }) df['label'] = df['file_path'].str.split('/', expand=True)[5] df['dataset'] = df['file_path'].str.split('/', expand=True)[4] df['target'] = df['label'].replace(train_label) # データセットを作成 train_df = df[df['dataset'] == 'train'] valid_df = df[df['dataset'] == 'validation'] test_df = df[df['dataset'] == 'test_v2'] train = TupleDataset(train_df['file_path'].values, train_df['target'].values.astype('int32')) valid = TupleDataset(valid_df['file_path'].values, valid_df['target'].values.astype('int32')) test = TupleDataset(test_df['file_path'].values, test_df['target'].values.astype('int32')) return train, valid, test
def create_dataset(path, vocab, ind_unks, size=-1): texts = [] hypotheses = [] labels = [] with codecs.open(path, mode='r', encoding='utf-8') as fin: # Data has one json per line for i, line in enumerate(fin): d = json.loads(line) t, h, l = _parse_single(d, vocab, ind_unks) if t is None: continue texts.append(t) hypotheses.append(h) labels.append(l) texts, texts_len = _pad_create(texts, np.int32) hypotheses, hypotheses_len = _pad_create(hypotheses, np.int32) labels = np.array(labels, dtype=np.int32) if size > 0: # Sample data AFTER all data has been loaded. This is because # There might be bias in data ordering. ind = np.random.permutation(len(labels))[:size] return TupleDataset(texts[ind], hypotheses[ind], texts_len[ind], hypotheses_len[ind], labels[ind]) else: return TupleDataset(texts, hypotheses, texts_len, hypotheses_len, labels)
def select_data(dataset, n_train=100, n_test=100, n_dim=1, with_label=True, classes=None): """ :param n_train: nr of training examples per class :param n_test: nr of test examples per class :param n_dim: 1 or 3 (for convolutional input) :param with_label: whether or not to also provide labels :param classes: if not None, then it selects only those classes, e.g. [0, 1] :return: """ train_data, test_data = dataset(ndim=n_dim, withlabel=True) for d in range(2): if d == 0: data = train_data._datasets[0] labels = train_data._datasets[1] n = n_train else: data = test_data._datasets[0] labels = test_data._datasets[1] n = n_test if not classes: c = set(labels) else: c = classes n_classes = len(c) for i in range(n_classes): lidx = np.where(labels == c[i])[0][:n] if i == 0: idx = lidx else: idx = np.hstack([idx, lidx]) if with_label: L = np.concatenate([i * np.ones(n) for i in np.arange(n_classes)]).astype('int32') if d == 0: train_data = TupleDataset(data[idx], L) else: test_data = TupleDataset(data[idx], L) else: if d == 0: train_data = data[idx] else: test_data = data[idx] return train_data, test_data
def setUp(self): np.random.shuffle(self.labels_0) np.random.shuffle(self.labels_1) self.dataset_0 = TupleDataset( np.random.uniform(size=(N, ) + self.img_shape), self.labels_0) self.dataset_1 = TupleDataset( np.random.uniform(size=(N, ) + self.img_shape), self.labels_1) self.n_class = np.max((self.labels_0, self.labels_1)) + 1
def svhn_small(): _get_svhn() dir_name = os.path.join('dataset', 'svhn') train = scipy.io.loadmat(os.path.join(dir_name, 'train.mat')) train = TupleDataset(train['X'].transpose(3, 2, 0, 1).astype(np.float32), train['y'].flatten().astype(np.int32) - 1) test = scipy.io.loadmat(os.path.join(dir_name, 'test.mat')) test = TupleDataset(test['X'].transpose(3, 2, 0, 1).astype(np.float32), test['y'].flatten().astype(np.int32) - 1) return train, test
def load_dataset(): train = h5py.File(path.join(ROOT_PATH, 'dataset/General100_train.hdf5')) test = h5py.File(path.join(ROOT_PATH, 'dataset/Set14_test.hdf5')) train_x, train_y = np.array(train['x_data']) / 255, np.array(train['y_data']) /255 test_x, test_y = np.array(test['x_data']) / 255, np.array(test['y_data']) / 255 train = TupleDataset(train_x, train_y) test = TupleDataset(test_x, test_y) train = TransformDataset(train, transform) return train, test
def get_mvmc_flatten(cam=None, tr_percent=0.5): if cam is None: cam = np.arange(6).tolist() url = 'https://www.dropbox.com/s/rofaov8tgqhh6jv/MVMC.npz' base_dir = get_dataset_directory('mvmc/') path = os.path.join(base_dir, 'mvmc.npz') if not os.path.isfile(path): download(url, path) data = np.load(path) X = data['X'] y = data['y'] # Turn 3 to negative -1 for empty view y = y.astype(np.int32) y[y == 3] = -1 # Get the max and last = np.max(y, 1) last = last[:, np.newaxis] y = np.hstack([y, last]) ridx = np.random.permutation(range(len(X))).tolist() sidx = int(len(X) * tr_percent) Xtrain = X[ridx[:sidx]][:, cam] ytrain = y[ridx[:sidx]][:, cam] Xtest = X[ridx[sidx:]][:, cam] ytest = y[ridx[sidx:]][:, cam] train_xs = Xtrain.transpose((1, 0, 2, 3, 4)).tolist() train_xs = [np.array(train_x).astype(np.float32) for train_x in train_xs] train_ys = ytrain.transpose((1, 0)).tolist() train_ys = [np.array(train_y).astype(np.int32) for train_y in train_ys] test_xs = Xtest.transpose((1, 0, 2, 3, 4)).tolist() test_xs = [np.array(test_x).astype(np.float32) for test_x in test_xs] test_ys = ytest.transpose((1, 0)).tolist() test_ys = [np.array(test_y).astype(np.int32) for test_y in test_ys] train = TupleDataset(*(train_xs + train_ys)) test = TupleDataset(*(test_xs + test_ys)) train = permute(train) test = permute(test) return train, test
def get_mvmc_flatten_eval(cam): url = 'https://www.dropbox.com/s/uk8c6iymy8nprc0/MVMC.npz' base_dir = get_dataset_directory('mvmc/') path = os.path.join(base_dir, 'mvmc.npz') if not os.path.isfile(path): download(url, path) data = np.load(path) X = data['X'] y = data['y'] # Turn 3 to negative -1 for empty view y = y.astype(np.int32) y[y==3] = -1 # Get the max and last = np.max(y,1) last = last[:,np.newaxis] y = np.hstack([y,last]) ridx = [770, 723, 240, 21, 548, 440, 378, 192, 435, 792, 248, 784, 608, 676, 406, 353, 515, 709, 692, 303, 58, 565, 549, 82, 418, 825, 108, 562, 333, 226, 427, 431, 483, 165, 72, 386, 290, 186, 714, 740, 682, 218, 701, 417, 652, 352, 775, 60, 150, 404, 554, 823, 755, 232, 831, 221, 839, 167, 198, 567, 337, 238, 420, 400, 79, 242, 53, 474, 383, 684, 747, 537, 590, 389, 700, 423, 665, 377, 185, 301, 791, 434, 468, 231, 486, 820, 822, 798, 4, 403, 455, 233, 320, 817, 5, 407, 91, 56, 104, 151, 125, 415, 574, 316, 659, 387, 512, 661, 669, 155, 824, 518, 126, 587, 499, 205, 842, 725, 522, 342, 645, 612, 365, 65, 813, 399, 818, 38, 762, 644, 563, 463, 462, 350, 131, 343, 767, 370, 366, 630, 154, 675, 172, 270, 410, 175, 541, 478, 696, 295, 598, 766, 95, 306, 275, 286, 788, 105, 112, 210, 761, 207, 40, 48, 703, 450, 330, 493, 837, 245, 349, 732, 236, 182, 92, 201, 419, 90, 552, 19, 519, 672, 650, 662, 806, 272, 787, 73, 582, 132, 146, 100, 695, 603, 632, 76, 359, 251, 721, 102, 41, 239, 10, 393, 197, 89, 814, 664, 170, 558, 358, 163, 14, 843, 800, 797, 174, 346, 128, 203, 573, 259, 157, 261, 628, 6, 739, 241, 327, 553, 319, 835, 707, 188, 671, 534, 533, 602, 311, 785, 422, 712, 305, 528, 3, 466, 372, 827, 274, 318, 380, 145, 467, 647, 768, 144, 497, 196, 169, 481, 453, 447, 655, 635, 556, 249, 627, 752, 706, 193, 42, 836, 17, 140, 2, 413, 611, 428, 69, 288, 439, 815, 369, 348, 505, 677, 509, 718, 408, 591, 149, 200, 228, 795, 593, 566, 599, 116, 506, 215, 491, 502, 61, 500, 847, 651, 779, 620, 88, 622, 624, 414, 495, 34, 848, 487, 432, 595, 807, 78, 680, 545, 28, 759, 490, 294, 148, 62, 617, 656, 379, 489, 122, 597, 529, 778, 601, 688, 179, 543, 234, 322, 536, 171, 362, 840, 658, 763, 213, 583, 781, 260, 120, 492, 250, 516, 633, 336, 520, 32, 302, 660, 195, 30, 280, 194, 623, 217, 613, 621, 829, 314, 526, 335, 219, 461, 216, 638, 298, 782, 720, 646, 341, 152, 679, 9, 804, 25, 16, 609, 351, 331, 285, 284, 572, 446, 64, 310, 223, 173, 356, 426, 776, 367, 212, 224, 535, 398, 97, 396, 501, 81, 777, 717, 482, 594, 743, 550, 730, 523, 634, 110, 225, 266, 513, 291, 525, 130, 252, 328, 496, 542, 262, 115, 657, 87, 510, 846, 124, 111, 734, 774, 514, 488, 164, 540, 67, 683, 276, 312, 264, 12, 790, 809, 687, 576, 460, 208, 227, 786, 214, 689, 530, 394, 547, 237, 575, 158, 793, 589, 304, 765, 103, 637, 799, 833, 267, 796, 329, 22, 674, 570, 202, 607, 273, 719, 726, 639, 850, 409, 555, 246, 812, 849, 143, 18, 209, 39, 698, 577, 475, 255, 636, 15, 364, 485, 448, 473, 412, 697, 20, 728, 438, 578, 52, 129, 405, 610, 760, 470, 600, 268, 702, 35, 371, 421, 769, 168, 55, 653, 773, 7, 161, 810, 693, 166, 744, 385, 181, 464, 334, 616, 605, 24, 517, 841, 147, 59, 504, 524, 465, 243, 751, 457, 156, 71, 816, 74, 564, 772, 83, 265, 789, 724, 731, 384, 134, 640, 1, 584, 568, 592, 569, 381, 68, 844, 561, 794, 220, 402, 629, 33, 136, 299, 783, 98, 139, 47, 430, 325, 309, 199, 614, 27, 293, 531, 451, 459, 749, 507, 44, 388, 764, 802, 46, 176, 416, 93, 673, 382, 70, 729, 424, 803, 77, 159, 663, 292, 711, 780, 588, 355, 436, 753, 94, 184, 141, 667, 375, 705, 832, 49, 626, 138, 756, 750, 737, 449, 425, 50, 80, 229, 123, 397, 106, 75, 376, 162, 137, 472, 296, 654, 694, 585, 354, 8, 811, 178, 643, 307, 317, 571, 315, 494, 269, 666, 187, 37, 704, 230, 452, 107, 222, 191, 579, 411, 287, 819, 648, 36, 771, 357, 443, 433, 521, 0, 681, 742, 401, 118, 360, 503, 13, 339, 189, 297, 722, 374, 31, 715, 135, 277, 758, 469, 757, 685, 395, 326, 670, 532, 690, 508, 109, 801, 99, 631, 142, 281, 43, 256, 838, 258, 373, 544, 313, 347, 713, 476, 527, 604, 283, 686, 480, 539, 429, 845, 581, 538, 153, 121, 253, 63, 748, 727, 235, 160, 247, 23, 477, 278, 641, 668, 66, 586, 323, 279, 805, 363, 437, 86, 391, 444, 180, 117, 557, 691, 625, 615, 289, 190, 821, 254, 546, 808, 11, 442, 204, 738, 211, 699, 282, 826, 456, 471, 26, 551, 361, 96, 710, 735, 271, 57, 458, 29, 332, 324, 338, 716, 114, 177, 619, 741, 308, 119, 618, 642, 830, 834, 445, 345, 733, 580, 560, 479, 828, 484, 606, 441, 708, 511, 113, 498, 51, 101, 45, 340, 85, 454, 390, 649, 754, 745, 392, 133, 596, 559, 244, 746, 321, 127, 678, 206, 263, 300, 257, 368, 84, 344, 54, 183, 736] tr_percent = 0.8 sidx = int(len(X)*tr_percent) Xtrain = X[ridx[:sidx]][:,cam] ytrain = y[ridx[:sidx]][:,cam+[6]] Xtest = X[ridx[sidx:]][:,cam] ytest = y[ridx[sidx:]][:,cam+[6]] train_xs = Xtrain.transpose((1,0,2,3,4)).tolist() train_xs = [np.array(train_x).astype(np.float32) for train_x in train_xs] train_ys = ytrain.transpose((1,0)).tolist() train_ys = [np.array(train_y).astype(np.int32) for train_y in train_ys] test_xs = Xtest.transpose((1,0,2,3,4)).tolist() test_xs = [np.array(test_x).astype(np.float32) for test_x in test_xs] test_ys = ytest.transpose((1,0)).tolist() test_ys = [np.array(test_y).astype(np.int32) for test_y in test_ys] train = TupleDataset(*(train_xs + train_ys)) test = TupleDataset(*(test_xs + test_ys)) #train = permute(train) #test = permute(test) return train, test
def setUp(self): self.label_names = ('a', 'b', 'c') imgs = np.random.uniform(size=(1, 3, 2, 3)) # There are labels for 'a' and 'b', but none for 'c'. pred_labels = np.array([[[1, 1, 1], [0, 0, 1]]]) gt_labels = np.array([[[1, 0, 0], [0, -1, 1]]]) self.iou_a = 1 / 3 self.iou_b = 2 / 4 self.pixel_accuracy = 3 / 5 self.class_accuracy_a = 1 / 3 self.class_accuracy_b = 2 / 2 self.miou = np.mean((self.iou_a, self.iou_b)) self.mean_class_accuracy = np.mean( (self.class_accuracy_a, self.class_accuracy_b)) self.dataset = TupleDataset(imgs, gt_labels) self.link = _SemanticSegmentationStubLink(pred_labels) self.iterator = SerialIterator(self.dataset, 5, repeat=False, shuffle=False) self.evaluator = SemanticSegmentationEvaluator(self.iterator, self.link, self.label_names)
def act_and_trains(self, imgobj, target_angle): x = [self.phi(s) for s in [imgobj]] t = np.array([target_angle], np.float32) self.data.append(x[0]) self.target_angles.append(t[0]) if len(self.data) > MAX_DATA: del self.data[0] del self.target_angles[0] dataset = TupleDataset(self.data, self.target_angles) train_iter = SerialIterator(dataset, batch_size=BATCH_SIZE, repeat=True, shuffle=True) train_batch = train_iter.next() x_train, t_train = chainer.dataset.concat_examples(train_batch, -1) y_train = self.net(x_train) loss_train = F.mean_squared_error( y_train, Variable(t_train.reshape(BATCH_SIZE, 1))) self.loss_list.append(loss_train.array) self.net.cleargrads() loss_train.backward() self.optimizer.update() self.count += 1 self.results_train['loss'].append(loss_train.array) x_test = chainer.dataset.concat_examples(x, -1) with chainer.using_config('train', False), chainer.using_config( 'enable_backprop', False): action_value = self.net(x_test) return action_value.data[0][0], loss_train.array
def create_data(n=3000): X = np.random.rand(n, 1).astype('float32') T = np.sum(np.hstack((X[0:-1], X[1:])), axis=1) T = np.hstack([0, T[0:]]).astype('float32') T = T.reshape([n, 1]) return TupleDataset(X, T)
def make_tupledata_set_train(size=100): alphabet_list = list("ABCDEFGHIJKLMNOPQRSTUVWXYZ") image_list = [] answer_list = [] def make_image_set(): image_path_list = glob.glob( "F://notMnist_large/{0}/*".format(alphabet)) count = 0 _dataset = [] for image_path in image_path_list[:size + 100]: try: _dataset.append(io.imread(image_path) / 255) count += 1 except: continue if count == size: break return _dataset def make_answer_set(): return np.array([alphabet_list.index(alphabet)] * size) for alphabet in alphabet_list[:10]: image_list.extend(make_image_set()) answer_list.extend(make_answer_set()) return TupleDataset(np.array(image_list, dtype=np.float32), np.array(answer_list))
def act_and_trains(self, imgobj, correct_action): x = [self.phi(s) for s in [imgobj]] t = np.array([correct_action], np.int32) dataset = TupleDataset(x, t) train_iter = SerialIterator(dataset, batch_size=BATCH_SIZE, repeat=True, shuffle=False) train_batch = train_iter.next() x_train, t_train = chainer.dataset.concat_examples(train_batch, -1) y_train = self.net(x_train) loss_train = F.softmax_cross_entropy(y_train, t_train) acc_train = F.accuracy(y_train, t_train) self.loss_list.append(loss_train.array) self.acc_list.append(acc_train.array) self.net.cleargrads() loss_train.backward() self.optimizer.update() self.count += 1 self.results_train['loss'].append(loss_train.array) self.results_train['accuracy'].append(acc_train.array) action = np.argmax(y_train.array) self.accuracy = np.mean(self.acc_list) # print('iteration: {}, acc (train): {:.4f}, action: {}'.format(self.count, self.accuracy, action)) return action
def generate_random_test_dataset(N=10000): import numpy as np Y = np.random.randint(0,10,N) Y = Y.astype(np.float32) X = [] for i in xrange(0,N): y = int(Y[i]) a = [] for j in xrange(0,24): b = [] for k in xrange(0,96): c = np.zeros((10,)) c[y] = 1 b.append(c) a.append(b) X.append(a) X = np.array(X, np.float32) X = np.rollaxis(X, 3, 1) Y = Y.astype(np.float32) from chainer.datasets import TupleDataset,split_dataset_random ds = TupleDataset(X,Y) train, val = split_dataset_random(ds, int(N*.9)) return train, val, [], 10
def check_generic(comm, length, bs): assert bs > 0 assert length > 0 a = list(range(comm.rank, length, comm.size)) b = list(range(comm.rank, length, comm.size)) c = list(range(comm.rank, length, comm.size)) model = ExampleModel() dataset = TupleDataset(a, b, c) iterator = SerialIterator(dataset, bs, shuffle=False, repeat=False) evaluator = GenericMultiNodeEvaluator(comm, iterator, model) results = evaluator(None) # Make expected answer iterator.reset() s = [ [ aa + bb + cc # Same calculation as model for aa, bb, cc in batch ] for batch in iterator ] s = comm.gather_obj(s) if comm.rank == 0: # flatten list of lists gathered expected = [] for e in zip(*s): expected.extend(e) for e, r in zip(expected, results): chainer.testing.assert_allclose(e, r) else: assert results is None
def fit(self, X, y): train = TupleDataset(X) train_iter = SerialIterator(train, self.batchsize) updater = training.StandardUpdater(train_iter, self.optimizer, device=self.device) trainer = training.Trainer(updater, (self.n_epochs, 'epoch'), out='out_' + str(self.device)) # Setup logging, printing & saving keys = self.model.keys reports = ['epoch'] reports += ['main/' + key for key in keys] trainer.extend(extensions.snapshot(), trigger=(1000, 'epoch')) trainer.extend(extensions.LogReport(trigger=(1, 'epoch'))) trainer.extend(extensions.PrintReport(reports)) trainer.extend(extensions.ProgressBar(update_interval=10)) # If previous model detected, resume if self.resume: print("Loading from {}".format(self.resume)) chainer.serializers.load_npz(self.resume, trainer) # Run the model trainer.run()
def test_cache_or_load_dataset(self): N = 10 float_dataset = np.random.uniform(size=(N, 3, 10, 10)).astype(np.float32) int_dataset = np.random.uniform(size=(N, )).astype(np.int32) dataset = TupleDataset(float_dataset, int_dataset) temp_dir = tempfile.mkdtemp() fn = os.path.join(temp_dir, 'cache.dat') obtained = cache_or_load_dataset(fn, dataset) val_1, val_2 = obtained[1] expected_1, expected_2 = dataset[1] np.testing.assert_equal(val_1, expected_1) np.testing.assert_equal(val_2, expected_2) # test loading dataset = cache_or_load_dataset(fn) self.assertEqual(len(dataset), N) for i in range(N): expected_1, expected_2 = dataset[i] val_1, val_2 = obtained[i] np.testing.assert_equal(val_1, expected_1) np.testing.assert_equal(val_2, expected_2)
def setUp(self): self.comm = create_communicator('naive') batchsize_per_process = 5 batchsize = batchsize_per_process * self.comm.size if self.comm.rank == 0: bboxes = [ generate_random_bbox(5, (256, 324), 24, 120) for _ in range(10) ] labels = [ np.random.choice(np.arange(3, dtype=np.int32), size=(5, )) for _ in range(10) ] else: bboxes = None labels = None initial_count = self.comm.rank * batchsize_per_process bboxes = self.comm.bcast_obj(bboxes) labels = self.comm.bcast_obj(labels) self.bboxes = bboxes self.labels = labels self.dataset = TupleDataset(np.random.uniform(size=(10, 3, 32, 48)), bboxes, labels) self.initial_count = initial_count self.batchsize = batchsize
def get_tuple_dataset(data): slice_array = data["inks"] label_array = data["labels"] length_array = [c[0] for c in data["shapes"]] zipped = list(zip(label_array, length_array)) dataset = TupleDataset(slice_array, zipped) return dataset
def setUp(self): self.comm = create_communicator('naive') batchsize_per_process = 5 batchsize = batchsize_per_process * self.comm.size if self.comm.rank == 0: masks = [ np.random.uniform(size=(5, 32, 48)) > 0.5 for _ in range(10) ] labels = [ np.random.choice(np.arange(3, dtype=np.int32), size=(5, )) for _ in range(10) ] else: masks = None labels = None initial_count = self.comm.rank * batchsize_per_process masks = self.comm.bcast_obj(masks) labels = self.comm.bcast_obj(labels) self.masks = masks self.labels = labels self.dataset = TupleDataset(np.random.uniform(size=(10, 3, 32, 48)), masks, labels) self.initial_count = initial_count self.batchsize = batchsize
def make_data(self, data, shuffle=False): xs, us = data[:, :, :self.n_state], data[:, :, -self.n_ctrl:] x_inits = xs[:, 0] n_data = x_inits.shape[0] self.n_data = n_data ds = TupleDataset(x_inits, xs, us, xp.arange(0, n_data)) loader = SerialIterator(ds, batch_size=self.n_batch, shuffle=shuffle) return ds, loader
def make(self, length_of_sequence): all_data = np.array([math.sin(i * 2 * math.pi/self.steps_per_cycle) for i in range(self.steps_per_cycle)] * self.number_of_cycles) sequences = [] t = [] for i in range(len(all_data) - length_of_sequence): sequences.append(all_data[i:i+length_of_sequence]) t.append(all_data[i+length_of_sequence]) return TupleDataset(sequences, t)
def get_mvmc(cam=None, tr_percent=0.5): if cam is None: cam = np.arange(6) url = 'https://www.dropbox.com/s/rofaov8tgqhh6jv/MVMC.npz' base_dir = get_dataset_directory('mvmc/') path = os.path.join(base_dir, 'mvmc.npz') if not os.path.isfile(path): download(url, path) data = np.load(path) X = data['X'] y = data['y'] sidx = int(len(y) * tr_percent) train = TupleDataset(X[:sidx, cam], y[:sidx, cam]) test = TupleDataset(X[sidx:, cam], y[sidx:, cam]) return train, test
def setUp(self): # Load the iris dataset data, target = load_iris(return_X_y=True) self.train_x = data self.train_y = target self.label_dim = numpy.max(target) + 1 X = data.astype(numpy.float32) y = target.astype(numpy.int32) self.dataset = TupleDataset(X, y)
def create_dataset(path, size=-1): """ Create dataset from MQ2007 data. .. warning:: It will create dataset with label in range [0, 1, 2] It should be no problem for Permutation Probability Loss but do not plug in other loss function. """ data = collections.defaultdict(lambda: [[], []]) with open(path, mode='r') as fin: # Data has one json per line for i, line in enumerate(fin): q, r, v = _parse_single(line) if r not in {0, 1, 2}: raise DatasetParseError( "L%d: Score must be 0, 1 or 2, but found %d" % (i, r)) data[q][0].append(r) data[q][1].append(v) vectors = [] scores = [] for d in data.values(): v = np.array(d[1], dtype=np.float32) s = np.array(d[0], dtype=np.float32) vectors.append(v) scores.append(s) s = max(map(len, scores)) vectors_pad = np.zeros((len(vectors), s, v.shape[-1]), dtype=np.float32) scores_pad = np.zeros((len(scores), s), dtype=np.float32) length = np.empty((len(scores)), dtype=np.int32) for i, (s, v) in enumerate(zip(scores, vectors)): vectors_pad[i, :len(v), :] = v scores_pad[i, :len(s)] = s length[i] = len(v) if size > 0: # Sample data AFTER all data has been loaded. This is because # There might be bias in data ordering. ind = np.random.permutation(len(vectors))[:size] return TupleDataset(vectors_pad[ind], scores_pad[ind], length[ind]) else: return TupleDataset(vectors_pad, scores_pad, length)
def setUp(self): masks = np.random.uniform(size=(10, 5, 32, 48)) > 0.5 labels = np.ones((10, 5), dtype=np.int32) self.dataset = TupleDataset( np.random.uniform(size=(10, 3, 32, 48)), masks, labels) self.link = _InstanceSegmentationStubLink(masks, labels) self.iterator = SerialIterator( self.dataset, 1, repeat=False, shuffle=False) self.evaluator = InstanceSegmentationCOCOEvaluator( self.iterator, self.link, label_names=('cls0', 'cls1', 'cls2')) self.expected_ap = 1
def setUp(self): self.trainer = mock.MagicMock() self.trainer.out = tempfile.mkdtemp() self.link = _RandomDetectionStubLink() self.dataset = TupleDataset(np.random.uniform(size=(10, 3, 32, 48)), np.random.uniform(size=(10, 5, 4)), np.random.randint(0, 19, size=(10, 5))) self.iterator = SerialIterator(self.dataset, 10, repeat=False, shuffle=False)
def create_dataset(figpath=None): np.random.seed(215) n = 1024 a = np.random.randn(n, 2) + 20 * np.ones((n, 2)) b = np.random.randn(n, 2) - 20 * np.ones((n, 2)) x = np.concatenate([a, b], axis=0) np.random.shuffle(x) if figpath is not None: sns.kdeplot(x[:, 0], x[:, 1]) plt.savefig(figpath) plt.close() x = x.astype('f') return TupleDataset((x))