def get_chainer_dataset(self) -> "chainer.datasets.Tupledataset":
     """
     get train/test splited chainer.datasets.Tupledataset data
     call this after exec self.build_data()
     """
     return (TupleDataset(self.x_train, self.y_train),
             TupleDataset(self.x_test, self.y_test))
コード例 #2
0
ファイル: dataset.py プロジェクト: sjtu-cs222/Group-21
def create_dataset(path, size=-1):

    data = collections.defaultdict(lambda: [[], []])
    with open(path, mode='r') as fin:
        for i, line in enumerate(fin):
            q, r, v = _parse_single(line)
            if r not in {0, 1, 2}:
                raise DatasetParseError(
                    "L%d: Score must be 0, 1 or 2, but found %d" %
                    (i, r)
                )
            data[q][0].append(r)
            data[q][1].append(v)
    vectors = []
    scores = []
    for d in data.values():
        v = np.array(d[1], dtype=np.float32)
        s = np.array(d[0], dtype=np.float32)
        vectors.append(v)
        scores.append(s)
    s = max(map(len, scores))
    vectors_pad = np.zeros((len(vectors), s, v.shape[-1]), dtype=np.float32)
    scores_pad = np.zeros((len(scores), s), dtype=np.float32)
    length = np.empty((len(scores)), dtype=np.int32)
    for i, (s, v) in enumerate(zip(scores, vectors)):
        vectors_pad[i, :len(v), :] = v
        scores_pad[i, :len(s)] = s
        length[i] = len(v)

    if size > 0:
        ind = np.random.permutation(len(vectors))[:size]
        return TupleDataset(vectors_pad[ind], scores_pad[ind], length[ind])
    else:
        return TupleDataset(vectors_pad, scores_pad, length)
コード例 #3
0
def load_new_dataset(data_dir):
    train_dogs = glob(os.path.join(data_dir, 'train/dog/*.jpg'))
    train_cats = glob(os.path.join(data_dir, 'train/cat/*.jpg'))
    valid_dogs = glob(os.path.join(data_dir, 'validation/dog/*.jpg'))
    valid_cats = glob(os.path.join(data_dir, 'validation/cat/*.jpg'))
    test_dogs = glob(os.path.join(data_dir, 'test_v2/dog/*.jpg'))
    test_cats = glob(os.path.join(data_dir, 'test_v2/cat/*.jpg'))

    # 教師ラベルの作成
    train_label = {'cat': 1, 'dog': 0}

    df = pd.DataFrame({
        'file_path':
        train_cats + train_dogs + valid_dogs + valid_cats + test_dogs +
        test_cats,
    })
    df['label'] = df['file_path'].str.split('/', expand=True)[5]
    df['dataset'] = df['file_path'].str.split('/', expand=True)[4]
    df['target'] = df['label'].replace(train_label)

    # データセットを作成
    train_df = df[df['dataset'] == 'train']
    valid_df = df[df['dataset'] == 'validation']
    test_df = df[df['dataset'] == 'test_v2']

    train = TupleDataset(train_df['file_path'].values,
                         train_df['target'].values.astype('int32'))
    valid = TupleDataset(valid_df['file_path'].values,
                         valid_df['target'].values.astype('int32'))
    test = TupleDataset(test_df['file_path'].values,
                        test_df['target'].values.astype('int32'))

    return train, valid, test
コード例 #4
0
def create_dataset(path, vocab, ind_unks, size=-1):
    texts = []
    hypotheses = []
    labels = []
    with codecs.open(path, mode='r', encoding='utf-8') as fin:
        # Data has one json per line
        for i, line in enumerate(fin):
            d = json.loads(line)
            t, h, l = _parse_single(d, vocab, ind_unks)
            if t is None:
                continue
            texts.append(t)
            hypotheses.append(h)
            labels.append(l)
    texts, texts_len = _pad_create(texts, np.int32)
    hypotheses, hypotheses_len = _pad_create(hypotheses, np.int32)
    labels = np.array(labels, dtype=np.int32)
    if size > 0:
        # Sample data AFTER all data has been loaded. This is because
        # There might be bias in data ordering.
        ind = np.random.permutation(len(labels))[:size]
        return TupleDataset(texts[ind], hypotheses[ind], texts_len[ind],
                            hypotheses_len[ind], labels[ind])
    else:
        return TupleDataset(texts, hypotheses, texts_len, hypotheses_len,
                            labels)
コード例 #5
0
def select_data(dataset,
                n_train=100,
                n_test=100,
                n_dim=1,
                with_label=True,
                classes=None):
    """

    :param n_train: nr of training examples per class
    :param n_test: nr of test examples per class
    :param n_dim: 1 or 3 (for convolutional input)
    :param with_label: whether or not to also provide labels
    :param classes: if not None, then it selects only those classes, e.g. [0, 1]
    :return:
    """

    train_data, test_data = dataset(ndim=n_dim, withlabel=True)

    for d in range(2):

        if d == 0:
            data = train_data._datasets[0]
            labels = train_data._datasets[1]
            n = n_train
        else:
            data = test_data._datasets[0]
            labels = test_data._datasets[1]
            n = n_test

        if not classes:
            c = set(labels)
        else:
            c = classes
        n_classes = len(c)

        for i in range(n_classes):
            lidx = np.where(labels == c[i])[0][:n]
            if i == 0:
                idx = lidx
            else:
                idx = np.hstack([idx, lidx])

        if with_label:
            L = np.concatenate([i * np.ones(n)
                                for i in np.arange(n_classes)]).astype('int32')

            if d == 0:
                train_data = TupleDataset(data[idx], L)
            else:
                test_data = TupleDataset(data[idx], L)
        else:
            if d == 0:
                train_data = data[idx]
            else:
                test_data = data[idx]

    return train_data, test_data
コード例 #6
0
    def setUp(self):
        np.random.shuffle(self.labels_0)
        np.random.shuffle(self.labels_1)

        self.dataset_0 = TupleDataset(
            np.random.uniform(size=(N, ) + self.img_shape), self.labels_0)
        self.dataset_1 = TupleDataset(
            np.random.uniform(size=(N, ) + self.img_shape), self.labels_1)
        self.n_class = np.max((self.labels_0, self.labels_1)) + 1
コード例 #7
0
def svhn_small():
    _get_svhn()
    dir_name = os.path.join('dataset', 'svhn')
    train = scipy.io.loadmat(os.path.join(dir_name, 'train.mat'))
    train = TupleDataset(train['X'].transpose(3, 2, 0, 1).astype(np.float32),
                         train['y'].flatten().astype(np.int32) - 1)
    test = scipy.io.loadmat(os.path.join(dir_name, 'test.mat'))
    test = TupleDataset(test['X'].transpose(3, 2, 0, 1).astype(np.float32),
                        test['y'].flatten().astype(np.int32) - 1)
    return train, test
コード例 #8
0
ファイル: train_SRCNN.py プロジェクト: shimo8810/STVSR
def load_dataset():
    train = h5py.File(path.join(ROOT_PATH, 'dataset/General100_train.hdf5'))
    test = h5py.File(path.join(ROOT_PATH, 'dataset/Set14_test.hdf5'))

    train_x, train_y = np.array(train['x_data']) / 255, np.array(train['y_data']) /255
    test_x, test_y = np.array(test['x_data']) / 255, np.array(test['y_data']) / 255

    train = TupleDataset(train_x, train_y)
    test = TupleDataset(test_x, test_y)

    train = TransformDataset(train, transform)

    return train, test
コード例 #9
0
def get_mvmc_flatten(cam=None, tr_percent=0.5):
    if cam is None:
        cam = np.arange(6).tolist()

    url = 'https://www.dropbox.com/s/rofaov8tgqhh6jv/MVMC.npz'
    base_dir = get_dataset_directory('mvmc/')
    path = os.path.join(base_dir, 'mvmc.npz')

    if not os.path.isfile(path):
        download(url, path)

    data = np.load(path)
    X = data['X']
    y = data['y']

    # Turn 3 to negative -1 for empty view
    y = y.astype(np.int32)
    y[y == 3] = -1

    # Get the max and
    last = np.max(y, 1)
    last = last[:, np.newaxis]
    y = np.hstack([y, last])

    ridx = np.random.permutation(range(len(X))).tolist()

    sidx = int(len(X) * tr_percent)

    Xtrain = X[ridx[:sidx]][:, cam]
    ytrain = y[ridx[:sidx]][:, cam]

    Xtest = X[ridx[sidx:]][:, cam]
    ytest = y[ridx[sidx:]][:, cam]

    train_xs = Xtrain.transpose((1, 0, 2, 3, 4)).tolist()
    train_xs = [np.array(train_x).astype(np.float32) for train_x in train_xs]
    train_ys = ytrain.transpose((1, 0)).tolist()
    train_ys = [np.array(train_y).astype(np.int32) for train_y in train_ys]

    test_xs = Xtest.transpose((1, 0, 2, 3, 4)).tolist()
    test_xs = [np.array(test_x).astype(np.float32) for test_x in test_xs]
    test_ys = ytest.transpose((1, 0)).tolist()
    test_ys = [np.array(test_y).astype(np.int32) for test_y in test_ys]

    train = TupleDataset(*(train_xs + train_ys))
    test = TupleDataset(*(test_xs + test_ys))

    train = permute(train)
    test = permute(test)

    return train, test
コード例 #10
0
ファイル: datasets.py プロジェクト: yyuzhongpv/ddnn
def get_mvmc_flatten_eval(cam):
    
    url = 'https://www.dropbox.com/s/uk8c6iymy8nprc0/MVMC.npz'
    base_dir = get_dataset_directory('mvmc/')
    path = os.path.join(base_dir, 'mvmc.npz')

    if not os.path.isfile(path):
        download(url, path)

    data = np.load(path)
    X = data['X']
    y = data['y']
    
    # Turn 3 to negative -1 for empty view
    y = y.astype(np.int32)
    y[y==3] = -1
    
    # Get the max and
    last = np.max(y,1)
    last = last[:,np.newaxis]
    y = np.hstack([y,last])
    
    ridx = [770, 723, 240, 21, 548, 440, 378, 192, 435, 792, 248, 784, 608, 676, 406, 353, 515, 709, 692, 303, 58, 565, 549, 82, 418, 825, 108, 562, 333, 226, 427, 431, 483, 165, 72, 386, 290, 186, 714, 740, 682, 218, 701, 417, 652, 352, 775, 60, 150, 404, 554, 823, 755, 232, 831, 221, 839, 167, 198, 567, 337, 238, 420, 400, 79, 242, 53, 474, 383, 684, 747, 537, 590, 389, 700, 423, 665, 377, 185, 301, 791, 434, 468, 231, 486, 820, 822, 798, 4, 403, 455, 233, 320, 817, 5, 407, 91, 56, 104, 151, 125, 415, 574, 316, 659, 387, 512, 661, 669, 155, 824, 518, 126, 587, 499, 205, 842, 725, 522, 342, 645, 612, 365, 65, 813, 399, 818, 38, 762, 644, 563, 463, 462, 350, 131, 343, 767, 370, 366, 630, 154, 675, 172, 270, 410, 175, 541, 478, 696, 295, 598, 766, 95, 306, 275, 286, 788, 105, 112, 210, 761, 207, 40, 48, 703, 450, 330, 493, 837, 245, 349, 732, 236, 182, 92, 201, 419, 90, 552, 19, 519, 672, 650, 662, 806, 272, 787, 73, 582, 132, 146, 100, 695, 603, 632, 76, 359, 251, 721, 102, 41, 239, 10, 393, 197, 89, 814, 664, 170, 558, 358, 163, 14, 843, 800, 797, 174, 346, 128, 203, 573, 259, 157, 261, 628, 6, 739, 241, 327, 553, 319, 835, 707, 188, 671, 534, 533, 602, 311, 785, 422, 712, 305, 528, 3, 466, 372, 827, 274, 318, 380, 145, 467, 647, 768, 144, 497, 196, 169, 481, 453, 447, 655, 635, 556, 249, 627, 752, 706, 193, 42, 836, 17, 140, 2, 413, 611, 428, 69, 288, 439, 815, 369, 348, 505, 677, 509, 718, 408, 591, 149, 200, 228, 795, 593, 566, 599, 116, 506, 215, 491, 502, 61, 500, 847, 651, 779, 620, 88, 622, 624, 414, 495, 34, 848, 487, 432, 595, 807, 78, 680, 545, 28, 759, 490, 294, 148, 62, 617, 656, 379, 489, 122, 597, 529, 778, 601, 688, 179, 543, 234, 322, 536, 171, 362, 840, 658, 763, 213, 583, 781, 260, 120, 492, 250, 516, 633, 336, 520, 32, 302, 660, 195, 30, 280, 194, 623, 217, 613, 621, 829, 314, 526, 335, 219, 461, 216, 638, 298, 782, 720, 646, 341, 152, 679, 9, 804, 25, 16, 609, 351, 331, 285, 284, 572, 446, 64, 310, 223, 173, 356, 426, 776, 367, 212, 224, 535, 398, 97, 396, 501, 81, 777, 717, 482, 594, 743, 550, 730, 523, 634, 110, 225, 266, 513, 291, 525, 130, 252, 328, 496, 542, 262, 115, 657, 87, 510, 846, 124, 111, 734, 774, 514, 488, 164, 540, 67, 683, 276, 312, 264, 12, 790, 809, 687, 576, 460, 208, 227, 786, 214, 689, 530, 394, 547, 237, 575, 158, 793, 589, 304, 765, 103, 637, 799, 833, 267, 796, 329, 22, 674, 570, 202, 607, 273, 719, 726, 639, 850, 409, 555, 246, 812, 849, 143, 18, 209, 39, 698, 577, 475, 255, 636, 15, 364, 485, 448, 473, 412, 697, 20, 728, 438, 578, 52, 129, 405, 610, 760, 470, 600, 268, 702, 35, 371, 421, 769, 168, 55, 653, 773, 7, 161, 810, 693, 166, 744, 385, 181, 464, 334, 616, 605, 24, 517, 841, 147, 59, 504, 524, 465, 243, 751, 457, 156, 71, 816, 74, 564, 772, 83, 265, 789, 724, 731, 384, 134, 640, 1, 584, 568, 592, 569, 381, 68, 844, 561, 794, 220, 402, 629, 33, 136, 299, 783, 98, 139, 47, 430, 325, 309, 199, 614, 27, 293, 531, 451, 459, 749, 507, 44, 388, 764, 802, 46, 176, 416, 93, 673, 382, 70, 729, 424, 803, 77, 159, 663, 292, 711, 780, 588, 355, 436, 753, 94, 184, 141, 667, 375, 705, 832, 49, 626, 138, 756, 750, 737, 449, 425, 50, 80, 229, 123, 397, 106, 75, 376, 162, 137, 472, 296, 654, 694, 585, 354, 8, 811, 178, 643, 307, 317, 571, 315, 494, 269, 666, 187, 37, 704, 230, 452, 107, 222, 191, 579, 411, 287, 819, 648, 36, 771, 357, 443, 433, 521, 0, 681, 742, 401, 118, 360, 503, 13, 339, 189, 297, 722, 374, 31, 715, 135, 277, 758, 469, 757, 685, 395, 326, 670, 532, 690, 508, 109, 801, 99, 631, 142, 281, 43, 256, 838, 258, 373, 544, 313, 347, 713, 476, 527, 604, 283, 686, 480, 539, 429, 845, 581, 538, 153, 121, 253, 63, 748, 727, 235, 160, 247, 23, 477, 278, 641, 668, 66, 586, 323, 279, 805, 363, 437, 86, 391, 444, 180, 117, 557, 691, 625, 615, 289, 190, 821, 254, 546, 808, 11, 442, 204, 738, 211, 699, 282, 826, 456, 471, 26, 551, 361, 96, 710, 735, 271, 57, 458, 29, 332, 324, 338, 716, 114, 177, 619, 741, 308, 119, 618, 642, 830, 834, 445, 345, 733, 580, 560, 479, 828, 484, 606, 441, 708, 511, 113, 498, 51, 101, 45, 340, 85, 454, 390, 649, 754, 745, 392, 133, 596, 559, 244, 746, 321, 127, 678, 206, 263, 300, 257, 368, 84, 344, 54, 183, 736]
    tr_percent = 0.8
    
    sidx = int(len(X)*tr_percent)
    
    Xtrain = X[ridx[:sidx]][:,cam]
    ytrain = y[ridx[:sidx]][:,cam+[6]]
    
    Xtest = X[ridx[sidx:]][:,cam]
    ytest = y[ridx[sidx:]][:,cam+[6]]
        
    train_xs = Xtrain.transpose((1,0,2,3,4)).tolist()
    train_xs = [np.array(train_x).astype(np.float32) for train_x in train_xs]
    train_ys = ytrain.transpose((1,0)).tolist()
    train_ys = [np.array(train_y).astype(np.int32) for train_y in train_ys]
    
    test_xs = Xtest.transpose((1,0,2,3,4)).tolist()
    test_xs = [np.array(test_x).astype(np.float32) for test_x in test_xs]
    test_ys = ytest.transpose((1,0)).tolist()
    test_ys = [np.array(test_y).astype(np.int32) for test_y in test_ys]
    
    train = TupleDataset(*(train_xs + train_ys))
    test = TupleDataset(*(test_xs + test_ys))
    
    #train = permute(train)
    #test = permute(test)
    
    return train, test
コード例 #11
0
    def setUp(self):
        self.label_names = ('a', 'b', 'c')
        imgs = np.random.uniform(size=(1, 3, 2, 3))
        # There are labels for 'a' and 'b', but none for 'c'.
        pred_labels = np.array([[[1, 1, 1], [0, 0, 1]]])
        gt_labels = np.array([[[1, 0, 0], [0, -1, 1]]])

        self.iou_a = 1 / 3
        self.iou_b = 2 / 4
        self.pixel_accuracy = 3 / 5
        self.class_accuracy_a = 1 / 3
        self.class_accuracy_b = 2 / 2
        self.miou = np.mean((self.iou_a, self.iou_b))
        self.mean_class_accuracy = np.mean(
            (self.class_accuracy_a, self.class_accuracy_b))

        self.dataset = TupleDataset(imgs, gt_labels)
        self.link = _SemanticSegmentationStubLink(pred_labels)
        self.iterator = SerialIterator(self.dataset,
                                       5,
                                       repeat=False,
                                       shuffle=False)
        self.evaluator = SemanticSegmentationEvaluator(self.iterator,
                                                       self.link,
                                                       self.label_names)
コード例 #12
0
    def act_and_trains(self, imgobj, target_angle):
        x = [self.phi(s) for s in [imgobj]]
        t = np.array([target_angle], np.float32)
        self.data.append(x[0])
        self.target_angles.append(t[0])
        if len(self.data) > MAX_DATA:
            del self.data[0]
            del self.target_angles[0]
        dataset = TupleDataset(self.data, self.target_angles)
        train_iter = SerialIterator(dataset,
                                    batch_size=BATCH_SIZE,
                                    repeat=True,
                                    shuffle=True)
        train_batch = train_iter.next()
        x_train, t_train = chainer.dataset.concat_examples(train_batch, -1)

        y_train = self.net(x_train)
        loss_train = F.mean_squared_error(
            y_train, Variable(t_train.reshape(BATCH_SIZE, 1)))

        self.loss_list.append(loss_train.array)

        self.net.cleargrads()
        loss_train.backward()
        self.optimizer.update()

        self.count += 1

        self.results_train['loss'].append(loss_train.array)
        x_test = chainer.dataset.concat_examples(x, -1)
        with chainer.using_config('train', False), chainer.using_config(
                'enable_backprop', False):
            action_value = self.net(x_test)
        return action_value.data[0][0], loss_train.array
コード例 #13
0
def create_data(n=3000):
    X = np.random.rand(n, 1).astype('float32')
    T = np.sum(np.hstack((X[0:-1], X[1:])), axis=1)
    T = np.hstack([0, T[0:]]).astype('float32')
    T = T.reshape([n, 1])

    return TupleDataset(X, T)
コード例 #14
0
def make_tupledata_set_train(size=100):
    alphabet_list = list("ABCDEFGHIJKLMNOPQRSTUVWXYZ")
    image_list = []
    answer_list = []

    def make_image_set():
        image_path_list = glob.glob(
            "F://notMnist_large/{0}/*".format(alphabet))
        count = 0
        _dataset = []
        for image_path in image_path_list[:size + 100]:
            try:
                _dataset.append(io.imread(image_path) / 255)
                count += 1
            except:
                continue
            if count == size:
                break
        return _dataset

    def make_answer_set():
        return np.array([alphabet_list.index(alphabet)] * size)

    for alphabet in alphabet_list[:10]:
        image_list.extend(make_image_set())
        answer_list.extend(make_answer_set())

    return TupleDataset(np.array(image_list, dtype=np.float32),
                        np.array(answer_list))
コード例 #15
0
    def act_and_trains(self, imgobj, correct_action):
        x = [self.phi(s) for s in [imgobj]]
        t = np.array([correct_action], np.int32)
        dataset = TupleDataset(x, t)
        train_iter = SerialIterator(dataset,
                                    batch_size=BATCH_SIZE,
                                    repeat=True,
                                    shuffle=False)
        train_batch = train_iter.next()
        x_train, t_train = chainer.dataset.concat_examples(train_batch, -1)

        y_train = self.net(x_train)

        loss_train = F.softmax_cross_entropy(y_train, t_train)
        acc_train = F.accuracy(y_train, t_train)

        self.loss_list.append(loss_train.array)
        self.acc_list.append(acc_train.array)

        self.net.cleargrads()
        loss_train.backward()
        self.optimizer.update()

        self.count += 1

        self.results_train['loss'].append(loss_train.array)
        self.results_train['accuracy'].append(acc_train.array)

        action = np.argmax(y_train.array)
        self.accuracy = np.mean(self.acc_list)
        #			print('iteration: {}, acc (train): {:.4f}, action: {}'.format(self.count, self.accuracy, action))

        return action
コード例 #16
0
ファイル: experiment.py プロジェクト: svoss/masters-thesis
def generate_random_test_dataset(N=10000):
    import numpy as np
    Y = np.random.randint(0,10,N)
    Y = Y.astype(np.float32)
    X = []
    for i in xrange(0,N):
        y = int(Y[i])
        a = []
        for j in xrange(0,24):
            b = []
            for k in xrange(0,96):
                c =  np.zeros((10,))
                c[y] = 1
                b.append(c)
            a.append(b)               
        X.append(a)

    X = np.array(X, np.float32)
    X = np.rollaxis(X, 3, 1)
    Y = Y.astype(np.float32)
    from chainer.datasets import TupleDataset,split_dataset_random
    ds = TupleDataset(X,Y)
    train, val = split_dataset_random(ds, int(N*.9))

    return train, val, [], 10 
コード例 #17
0
def check_generic(comm, length, bs):
    assert bs > 0
    assert length > 0
    a = list(range(comm.rank, length, comm.size))
    b = list(range(comm.rank, length, comm.size))
    c = list(range(comm.rank, length, comm.size))

    model = ExampleModel()
    dataset = TupleDataset(a, b, c)
    iterator = SerialIterator(dataset, bs, shuffle=False, repeat=False)
    evaluator = GenericMultiNodeEvaluator(comm, iterator, model)

    results = evaluator(None)

    # Make expected answer
    iterator.reset()
    s = [
        [
            aa + bb + cc  # Same calculation as model
            for aa, bb, cc in batch
        ] for batch in iterator
    ]
    s = comm.gather_obj(s)

    if comm.rank == 0:
        # flatten list of lists gathered
        expected = []
        for e in zip(*s):
            expected.extend(e)

        for e, r in zip(expected, results):
            chainer.testing.assert_allclose(e, r)

    else:
        assert results is None
コード例 #18
0
    def fit(self, X, y):
        train = TupleDataset(X)
        train_iter = SerialIterator(train, self.batchsize)
        updater = training.StandardUpdater(train_iter,
                                           self.optimizer,
                                           device=self.device)
        trainer = training.Trainer(updater, (self.n_epochs, 'epoch'),
                                   out='out_' + str(self.device))

        # Setup logging, printing & saving
        keys = self.model.keys
        reports = ['epoch']
        reports += ['main/' + key for key in keys]
        trainer.extend(extensions.snapshot(), trigger=(1000, 'epoch'))
        trainer.extend(extensions.LogReport(trigger=(1, 'epoch')))
        trainer.extend(extensions.PrintReport(reports))
        trainer.extend(extensions.ProgressBar(update_interval=10))

        # If previous model detected, resume
        if self.resume:
            print("Loading from {}".format(self.resume))
            chainer.serializers.load_npz(self.resume, trainer)

        # Run the model
        trainer.run()
コード例 #19
0
    def test_cache_or_load_dataset(self):
        N = 10
        float_dataset = np.random.uniform(size=(N, 3, 10,
                                                10)).astype(np.float32)
        int_dataset = np.random.uniform(size=(N, )).astype(np.int32)
        dataset = TupleDataset(float_dataset, int_dataset)

        temp_dir = tempfile.mkdtemp()
        fn = os.path.join(temp_dir, 'cache.dat')
        obtained = cache_or_load_dataset(fn, dataset)

        val_1, val_2 = obtained[1]
        expected_1, expected_2 = dataset[1]

        np.testing.assert_equal(val_1, expected_1)
        np.testing.assert_equal(val_2, expected_2)

        # test loading
        dataset = cache_or_load_dataset(fn)
        self.assertEqual(len(dataset), N)
        for i in range(N):
            expected_1, expected_2 = dataset[i]
            val_1, val_2 = obtained[i]
            np.testing.assert_equal(val_1, expected_1)
            np.testing.assert_equal(val_2, expected_2)
コード例 #20
0
    def setUp(self):
        self.comm = create_communicator('naive')

        batchsize_per_process = 5
        batchsize = batchsize_per_process * self.comm.size
        if self.comm.rank == 0:
            bboxes = [
                generate_random_bbox(5, (256, 324), 24, 120) for _ in range(10)
            ]
            labels = [
                np.random.choice(np.arange(3, dtype=np.int32), size=(5, ))
                for _ in range(10)
            ]
        else:
            bboxes = None
            labels = None
        initial_count = self.comm.rank * batchsize_per_process

        bboxes = self.comm.bcast_obj(bboxes)
        labels = self.comm.bcast_obj(labels)
        self.bboxes = bboxes
        self.labels = labels

        self.dataset = TupleDataset(np.random.uniform(size=(10, 3, 32, 48)),
                                    bboxes, labels)
        self.initial_count = initial_count
        self.batchsize = batchsize
コード例 #21
0
def get_tuple_dataset(data):
    slice_array = data["inks"]
    label_array = data["labels"]
    length_array = [c[0] for c in data["shapes"]]
    zipped = list(zip(label_array, length_array))
    dataset = TupleDataset(slice_array, zipped)
    return dataset
コード例 #22
0
    def setUp(self):
        self.comm = create_communicator('naive')

        batchsize_per_process = 5
        batchsize = batchsize_per_process * self.comm.size
        if self.comm.rank == 0:
            masks = [
                np.random.uniform(size=(5, 32, 48)) > 0.5 for _ in range(10)
            ]
            labels = [
                np.random.choice(np.arange(3, dtype=np.int32), size=(5, ))
                for _ in range(10)
            ]
        else:
            masks = None
            labels = None
        initial_count = self.comm.rank * batchsize_per_process

        masks = self.comm.bcast_obj(masks)
        labels = self.comm.bcast_obj(labels)
        self.masks = masks
        self.labels = labels

        self.dataset = TupleDataset(np.random.uniform(size=(10, 3, 32, 48)),
                                    masks, labels)
        self.initial_count = initial_count
        self.batchsize = batchsize
コード例 #23
0
 def make_data(self, data, shuffle=False):
     xs, us = data[:, :, :self.n_state], data[:, :, -self.n_ctrl:]
     x_inits = xs[:, 0]
     n_data = x_inits.shape[0]
     self.n_data = n_data
     ds = TupleDataset(x_inits, xs, us, xp.arange(0, n_data))
     loader = SerialIterator(ds, batch_size=self.n_batch, shuffle=shuffle)
     return ds, loader
コード例 #24
0
    def make(self, length_of_sequence):
        all_data = np.array([math.sin(i * 2 * math.pi/self.steps_per_cycle) for i in range(self.steps_per_cycle)] * self.number_of_cycles)

        sequences = []
        t = []
        for i in range(len(all_data) - length_of_sequence):
            sequences.append(all_data[i:i+length_of_sequence])
            t.append(all_data[i+length_of_sequence])
        return TupleDataset(sequences, t)
コード例 #25
0
def get_mvmc(cam=None, tr_percent=0.5):
    if cam is None:
        cam = np.arange(6)

    url = 'https://www.dropbox.com/s/rofaov8tgqhh6jv/MVMC.npz'
    base_dir = get_dataset_directory('mvmc/')
    path = os.path.join(base_dir, 'mvmc.npz')

    if not os.path.isfile(path):
        download(url, path)

    data = np.load(path)
    X = data['X']
    y = data['y']
    sidx = int(len(y) * tr_percent)
    train = TupleDataset(X[:sidx, cam], y[:sidx, cam])
    test = TupleDataset(X[sidx:, cam], y[sidx:, cam])
    return train, test
コード例 #26
0
    def setUp(self):
        # Load the iris dataset
        data, target = load_iris(return_X_y=True)
        self.train_x = data
        self.train_y = target
        self.label_dim = numpy.max(target) + 1

        X = data.astype(numpy.float32)
        y = target.astype(numpy.int32)
        self.dataset = TupleDataset(X, y)
コード例 #27
0
def create_dataset(path, size=-1):
    """
    Create dataset from MQ2007 data.

    .. warning:: It will create dataset with label in range [0, 1, 2]
        It should be no problem for Permutation Probability Loss
        but do not plug in other loss function.
    """
    data = collections.defaultdict(lambda: [[], []])
    with open(path, mode='r') as fin:
        # Data has one json per line
        for i, line in enumerate(fin):
            q, r, v = _parse_single(line)
            if r not in {0, 1, 2}:
                raise DatasetParseError(
                    "L%d: Score must be 0, 1 or 2, but found %d" % (i, r))
            data[q][0].append(r)
            data[q][1].append(v)
    vectors = []
    scores = []
    for d in data.values():
        v = np.array(d[1], dtype=np.float32)
        s = np.array(d[0], dtype=np.float32)
        vectors.append(v)
        scores.append(s)
    s = max(map(len, scores))
    vectors_pad = np.zeros((len(vectors), s, v.shape[-1]), dtype=np.float32)
    scores_pad = np.zeros((len(scores), s), dtype=np.float32)
    length = np.empty((len(scores)), dtype=np.int32)
    for i, (s, v) in enumerate(zip(scores, vectors)):
        vectors_pad[i, :len(v), :] = v
        scores_pad[i, :len(s)] = s
        length[i] = len(v)

    if size > 0:
        # Sample data AFTER all data has been loaded. This is because
        # There might be bias in data ordering.
        ind = np.random.permutation(len(vectors))[:size]
        return TupleDataset(vectors_pad[ind], scores_pad[ind], length[ind])
    else:
        return TupleDataset(vectors_pad, scores_pad, length)
コード例 #28
0
 def setUp(self):
     masks = np.random.uniform(size=(10, 5, 32, 48)) > 0.5
     labels = np.ones((10, 5), dtype=np.int32)
     self.dataset = TupleDataset(
         np.random.uniform(size=(10, 3, 32, 48)),
         masks, labels)
     self.link = _InstanceSegmentationStubLink(masks, labels)
     self.iterator = SerialIterator(
         self.dataset, 1, repeat=False, shuffle=False)
     self.evaluator = InstanceSegmentationCOCOEvaluator(
         self.iterator, self.link, label_names=('cls0', 'cls1', 'cls2'))
     self.expected_ap = 1
コード例 #29
0
    def setUp(self):
        self.trainer = mock.MagicMock()
        self.trainer.out = tempfile.mkdtemp()

        self.link = _RandomDetectionStubLink()
        self.dataset = TupleDataset(np.random.uniform(size=(10, 3, 32, 48)),
                                    np.random.uniform(size=(10, 5, 4)),
                                    np.random.randint(0, 19, size=(10, 5)))
        self.iterator = SerialIterator(self.dataset,
                                       10,
                                       repeat=False,
                                       shuffle=False)
コード例 #30
0
def create_dataset(figpath=None):
    np.random.seed(215)
    n = 1024
    a = np.random.randn(n, 2) + 20 * np.ones((n, 2))
    b = np.random.randn(n, 2) - 20 * np.ones((n, 2))
    x = np.concatenate([a, b], axis=0)
    np.random.shuffle(x)
    if figpath is not None:
        sns.kdeplot(x[:, 0], x[:, 1])
        plt.savefig(figpath)
        plt.close()
    x = x.astype('f')
    return TupleDataset((x))