def process_data(X, y=None, test_size=0.20, dummies=False):
    if y is None:
        y = np.ones(X.shape[0])

    len_ = X.shape[0]
    X = prepare_dataset(X)

    if dummies:
        y = pd.get_dummies(y).values

    shape_ = list(X.shape[1:])

    X_train, X_test, y_train, y_test = train_test_split(X.flatten().reshape(
        len_, -1),
                                                        y,
                                                        test_size=test_size,
                                                        random_state=4891)

    X_train = X_train.reshape([X_train.shape[0]] + shape_)
    X_test = X_test.reshape([X_test.shape[0]] + shape_)

    print('Training dataset shape: ', X_train.shape)
    print('Validation dataset shape: ', X_test.shape)

    train_dataset = Dataset(X_train, y_train)
    test_dataset = Dataset(X_test, y_test)

    return train_dataset, test_dataset
예제 #2
0
def process_data(X, y=None, test_size=0.20, dummies=False):
    if y is None:
        y = da.ones(X.shape[0])
    
    len_ = X.shape[0]    
    X = prepare_dataset(X)
        
    if dummies:
        y = dd.get_dummies(y)
        
    shape_  = list(X.shape[1:])
    
    X_train, X_test, y_train, y_test  = train_test_split(X.flatten().reshape(len_,-1), y, test_size=test_size, random_state=4891)
    
    X_train = X_train.reshape([X_train.shape[0]]+shape_)
    X_test = X_test.reshape([X_test.shape[0]]+shape_)
     
    print('Training dataset shape: ', X_train.shape)
    print('Validation dataset shape: ', X_test.shape)

    train_dataset = Dataset(X_train, y_train)
    test_dataset = Dataset(X_test, y_test)
    
    samples = list()
    for _ in range(10):
        for y_uniq in da.unique(train_dataset.labels):
            samples.append(train_dataset.x[train_dataset.labels==y_uniq][random.randint(0,len(train_dataset.x[train_dataset.labels==y_uniq])-1)])
    
    train_dataset.samples = da.array(samples)
    return train_dataset, test_dataset
예제 #3
0
def load_MNIST():
    data_path = '../data/MNIST_data'
    data = input_data.read_data_sets(data_path, one_hot=False)
    x_train_aux = data.train.images
    x_test = data.test.images
    data_dim = data.train.images.shape[1]
    n_train = data.train.images.shape[0]

    train_size = int(n_train * 0.8)
    valid_size = n_train - train_size
    x_valid, x_train = merge_datasets(x_train_aux, data_dim, train_size,
                                      valid_size)
    print('Data loaded. ',
          time.localtime().tm_hour, ':',
          time.localtime().tm_min, 'h')
    # logs.write('\tData loaded ' + str(time.localtime().tm_hour) +':' + str(time.localtime().tm_min) + 'h\n')

    x_train = np.reshape(x_train, [-1, 28, 28, 1])
    x_valid = np.reshape(x_valid, [-1, 28, 28, 1])
    x_test = np.reshape(x_test, [-1, 28, 28, 1])

    train_dataset = Dataset(x_train, data.train.labels)
    valid_dataset = Dataset(x_valid, data.train.labels)
    test_dataset = Dataset(x_test, data.test.labels)

    print('Train Data: ', train_dataset.x.shape)
    print('Valid Data: ', valid_dataset.x.shape)
    print('Test Data: ', test_dataset.x.shape)

    return train_dataset, valid_dataset, test_dataset
예제 #4
0
def process_data(X, y, test_size=None):
    if test_size is None:
        test_size = 0.20
    len_ = X.shape[0]
    X_train, X_test, y_train, y_test = train_test_split(X.flatten().reshape(
        len_, -1),
                                                        y,
                                                        test_size=test_size,
                                                        random_state=4891)

    print(X_train.shape, X_test.shape)
    d = int(np.sqrt(X_train.shape[1] / 3))

    X_train = np.reshape(X_train, [-1, d, d, 3])
    X_test = np.reshape(X_test, [-1, d, d, 3])

    train_dataset = Dataset(X_train, y_train)
    test_dataset = Dataset(X_test, y_test)

    return train_dataset, test_dataset
예제 #5
0
def load_FREY():
    data_path = '../data/frey_rawface.mat'
    mat = loadmat(data_path)
    data = mat['ff']
    data = np.transpose(data)  # [num_images, dimension]
    data = np.array(data, dtype=np.float32)
    for i in range(data.shape[0]):
        min_value = np.min(data[i, :])
        max_value = np.max(data[i, :])
        num = (data[i, :] - min_value)
        den = (max_value - min_value)
        data[i, :] = num / den

    data_dim = data.shape[1]
    num_images = data.shape[0]
    train_size = int(num_images * 0.8)
    valid_size = int(num_images * 0.1)
    test_size = num_images - train_size - valid_size

    x_train = data[:train_size]
    x_valid = data[train_size:(train_size + valid_size)]
    x_test = data[(train_size + valid_size):]

    x_train = np.reshape(x_train, [-1, 28, 20, 1])
    x_valid = np.reshape(x_valid, [-1, 28, 20, 1])
    x_test = np.reshape(x_test, [-1, 28, 20, 1])

    x_train_labels = np.zeros(x_train.shape[0])
    x_valid_labels = np.zeros(x_valid.shape[0])
    x_test_labels = np.zeros(x_test.shape[0])

    train_dataset = Dataset(x_train, x_train_labels)
    valid_dataset = Dataset(x_valid, x_valid_labels)
    test_dataset = Dataset(x_test, x_test_labels)

    print('Train Data: ', train_dataset.x.shape)
    print('Valid Data: ', valid_dataset.x.shape)
    print('Test Data: ', test_dataset.x.shape)

    return train_dataset, valid_dataset, test_dataset
예제 #6
0
def prepare_dataset(X, y=None):
    if y is None:
        y = np.zeros(X.shape[0])
    return Dataset(X, y)