Ejemplo n.º 1
0
def _prepare_cifar10_data():
    data_path = '/home/huwenp/Dataset/CIFAR/'
    url = 'http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'
    file_manager.create_dirname_if_not_exist(data_path)
    file_name = os.path.basename(url)
    full_path = os.path.join(data_path, file_name)
    folder = os.path.join(data_path, 'cifar-10-batches-py')
    if not os.path.isdir(folder):
        file_manager.download(url, data_path)
        with tarfile.open(full_path) as f:
            f.extractall(path=data_path)
    train_x = []
    train_y = []
    for i in range(1, 6):
        file_path = os.path.join(folder, 'data_batch_{0:d}'.format(i))
        data_dict = file_manager.unpickle(file_path)
        train_x.append(data_dict['data'])
        train_y.append(data_dict['labels'])
    train_x = np.concatenate(train_x) / 255.0
    pos = 0.006
    train_y = np.concatenate(train_y)
    train_x = train_x / np.linalg.norm(train_x, axis=1, keepdims=True)
    train_x = train_x - np.expand_dims(np.mean(train_x, 1), 1) + pos

    data_dict = file_manager.unpickle(os.path.join(folder, 'test_batch'))
    test_y = np.array(data_dict['labels'])
    test_x = data_dict['data'] / 255.0
    test_x = test_x / np.linalg.norm(test_x, axis=1, keepdims=True)
    test_x = test_x - np.expand_dims(np.mean(test_x, 1), 1) + pos
    train_x = train_x.reshape((train_x.shape[0], 3, -1))
    test_x = test_x.reshape((test_x.shape[0], 3, -1))

    return train_x, train_y, test_x, test_y
Ejemplo n.º 2
0
 def _prepare_imagenet_test_data(self): 
         data_dict = file_manager.unpickle(
             os.path.join(self.data_path, 'val_data'))
         test_x = data_dict['data'] / 255.0
         test_y = np.array(data_dict['labels'])
         # .transpose([0, 2, 3, 1])
         test_x = test_x.reshape((test_x.shape[0], 3, 32, 32))
Ejemplo n.º 3
0
def _make_Negative_small_data():
    train_x = []
    train_y = []
    for i in range(1, 11):
        file_path = os.path.join(data_path, 'train_data_batch_{0:d}'.format(i))
        data_dict = file_manager.unpickle(file_path)
        train_x.append(data_dict['data'])
        train_y.append(data_dict['labels'])
    train_x = np.concatenate(train_x) / 255.0
    train_y = np.concatenate(train_y)

    # pdb.set_trace()

    label = _select_pOn_data(train_y, negatives_small)
    train_x_ = train_x[label,:]
    max_num = 20000
    for i in range(10):
        lengh = train_x_.shape[0]
        if lengh > max_num:
            torch.save(train_x_[:max_num, :], data_path_s + 
                        'imagenet_batch_' + str(i) + '.pt')
            train_x_ = train_x_[max_num:, :]
        else:
            torch.save(train_x_[:lengh, :], data_path_s +
                       'imagenet_batch_' + str(i) + '.pt')
            train_x_ = None
        if train_x_ is None:
            break
Ejemplo n.º 4
0
def _prepare_cifar50_data():
    data_path = '/home/huwenp/Dataset/CIFAR/'
    url = 'http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'
    file_manager.create_dirname_if_not_exist(data_path)
    file_name = os.path.basename(url)
    full_path = os.path.join(data_path, file_name)
    folder = os.path.join(data_path, 'cifar-10-batches-py-feature')
    if not os.path.isdir(folder):
        file_manager.download(url, data_path)
        with tarfile.open(full_path) as f:
            f.extractall(path=data_path)
    train_x = []
    train_y = []
    for i in range(0, 5):
        file_path = os.path.join(folder, 'traindata_batch_{0:d}.pt'.format(i))
        data_dict = file_manager.unpickle(file_path)
        pdb.set_trace()
        train_x.append(data_dict['data'])
        train_y.append(data_dict['labels'])
    train_x = np.concatenate(train_x) / 255.0
    train_y = np.concatenate(train_y)

    data_dict = file_manager.unpickle(
        os.path.join(folder, 'testdata_batch_0.pt'))
    test_x = data_dict['data'] / 255.0
    test_y = np.array(data_dict['labels'])
    # pdb.set_trace()

    # .transpose([0, 2, 3, 1])
    train_x = train_x.reshape((train_x.shape[0], 3, 32, 32))
    # .transpose([0, 2, 3, 1])
    test_x = test_x.reshape((test_x.shape[0], 3, 32, 32))

    # for i in range(10):
    #     # pdb.set_trace()
    #     misc.imsave('./images/cifar10' + str(train_y[i]) + '-' + str(i) + '.jpg', train_x[i].transpose(1, 2, 0))

    # pdb.set_trace()
    # train_y = _binarize(train_y)
    # test_y = _binarize(test_y)
    return train_x, train_y, test_x, test_y
Ejemplo n.º 5
0
def _prepare_imagenet_data_all(data_path_t): 
    # pdb.set_trace()
    train_x = []
    train_y = []
    for i in range(1, 11):
        file_path = os.path.join(data_path, 'train_data_batch_{0:d}'.format(i))
        data_dict = file_manager.unpickle(file_path)
        train_x.append(data_dict['data'])
        train_y.append(data_dict['labels'])
        # pdb.set_trace()
    train_x = np.concatenate(train_x) / 255.0
    train_y = np.concatenate(train_y)

    data_dict = file_manager.unpickle(
        os.path.join(data_path, 'val_data'))
    test_x = data_dict['data'] / 255.0
    test_y = np.array(data_dict['labels'])
    train_x = train_x.reshape((train_x.shape[0], 3, 32, 32))
    # .transpose([0, 2, 3, 1])
    test_x = test_x.reshape((test_x.shape[0], 3, 32, 32))
    # pdb.set_trace()

    return train_x, train_y, test_x, test_y
Ejemplo n.º 6
0
def _make_other_data():

    # folder = os.path.join(data_path, 'imagenet_train')
    # if not os.path.isdir(data_path):
    #     file_manager.download(url, data_path)
    #     with tarfile.open(full_path) as f:
    #         f.extractall(path=data_path)
    train_x = []
    train_y = []
    for i in range(1, 11):
        file_path = os.path.join(data_path, 'train_data_batch_{0:d}'.format(i))
        data_dict = file_manager.unpickle(file_path)
        train_x.append(data_dict['data'])
        train_y.append(data_dict['labels'])
    train_x = np.concatenate(train_x) / 255.0
    train_y = np.concatenate(train_y)

    # pdb.set_trace()

    label = _select_data(train_y)
    train_x = train_x[label,:]
    train_y = train_y[label]
    max_num = 5000
    for i in range(100000000):
        lengh = train_x.shape[0]
        if lengh > max_num:
            data_save = {'data':train_x[:max_num, :], 'labels':train_y[:max_num]}
            torch.save(data_save, data_path_ot + 
                        'imagenet_batch_' + str(i) + '.pt')
            train_x = train_x[max_num:, :]
            train_y = train_y[max_num:]
        else:
            data_save = {'data':train_x, 'labels':train_y}
            torch.save(data_save, data_path_ot +
                       'imagenet_batch_' + str(i) + '.pt')
            train_x = None
        if train_x is None:
            break