Exemple #1
0
def process_images(names, out_loc, mean=None, sd=None):
    print('Names: ', names)
    dataset = NORBDataset(dataset_root='/dfs/scratch1/thomasat/datasets/norb', names=names)

    Xs = []
    Ys = []

    print('Dataset names: ', dataset.data.keys())

    for name in names:
        X, Y = process_data(dataset.data[name])
        print('X,Y shape: ', X.shape, Y.shape)
        Xs.append(X)
        Ys.append(Y)

    X = np.vstack(Xs)
    Y = np.vstack(Ys)

    # Shuffle
    idx = np.arange(0, X.shape[0])  
    np.random.shuffle(idx)
    X = X[idx,:]
    Y = Y[idx,:]

    if mean is None and sd is None:
        X, mean, sd  = normalize_data(X)
        print('X, Y: ', X.shape, Y.shape)
    else:
        X = apply_normalization(X,mean,sd)

    # Save
    data_dict = {'X': X, 'Y': Y}

    pkl.dump(data_dict, open(out_loc, 'wb'), protocol=2)

    return mean,sd
    enc = OneHotEncoder()
    Y = enc.fit_transform(Y).todense()
    return X, Y


train_loc = '/dfs/scratch1/thomasat/datasets/convex/convex_train.amat'
test_loc = '/dfs/scratch1/thomasat/datasets/convex/50k/convex_test.amat'
train_out = '/dfs/scratch1/thomasat/datasets/convex/train_normalized'
test_out = '/dfs/scratch1/thomasat/datasets/convex/test_normalized'

train_data = np.genfromtxt(train_loc)
train_X, train_Y = process_data(train_data)

test_data = np.genfromtxt(test_loc)
test_X, test_Y = process_data(test_data)

# Normalize
train_X, mean, sd = normalize_data(train_X)
test_X = apply_normalization(test_X, mean, sd)

# Save
print('test_X, test_Y shape: ', test_X.shape, test_Y.shape)
print('train_X, train_Y shape: ', train_X.shape, train_Y.shape)
train = {'X': train_X, 'Y': train_Y}
test = {'X': test_X, 'Y': test_Y}

pkl.dump(train, open(train_out, 'wb'), protocol=2)
pkl.dump(test, open(test_out, 'wb'), protocol=2)
print('Saved train to: ', train_out)
print('Saved test to: ', test_out)