Ejemplo n.º 1
0
def pca_layer(X, energy, eps):
    import pylearn_pca
    (eigvals, eigvecs), centered_trainset = pylearn_pca.pca_from_examples(
        X=X, max_energy_fraction=energy)
    centering_offset = centered_trainset[0] - X[0]

    W = eigvecs / np.sqrt(eigvals + eps)
    print('PCA kept %i of %i components' % (W.shape[1], X.shape[1]))
    return AffineLayerPre(W.astype(X.dtype), centering_offset.astype(X.dtype))
Ejemplo n.º 2
0
def pca_layer(X, energy, eps):
    import pylearn_pca
    (eigvals, eigvecs), centered_trainset = pylearn_pca.pca_from_examples(
            X=X,
            max_energy_fraction=energy)
    centering_offset = centered_trainset[0] - X[0]

    W = eigvecs / np.sqrt(eigvals + eps)
    print('PCA kept %i of %i components' % (W.shape[1], X.shape[1]))
    return AffineLayerPre(
        W.astype(X.dtype),
        centering_offset.astype(X.dtype))
Ejemplo n.º 3
0
def zca_layer(X, energy, eps):
    """
    Return a pair of layers whose output when filtering X will be X's ZCA.

    energy - retain at least this much energy with the principle components
    eps - add this to the eigenvalues when computing PCA responses to prevent
          division-by-zero and suppress weak components in the PCA
          representation.
    """
    import pylearn_pca
    (eigvals, eigvecs), centered_trainset = pylearn_pca.pca_from_examples(
            X=X,
            max_energy_fraction=energy)

    centering_offset = centered_trainset[0] - X[0]
    W = eigvecs / np.sqrt(eigvals + eps)
    print('ZCA kept %i of %i components' % (W.shape[1], X.shape[1]))
    l0 = AffineLayerPre(W.astype(X.dtype), centering_offset.astype(X.dtype))
    l1 = ClipLayer(eigvecs.T.copy().astype(X.dtype), np.asarray(0, dtype=X.dtype))
    return [l0, l1]
Ejemplo n.º 4
0
def zca_layer(X, energy, eps):
    """
    Return a pair of layers whose output when filtering X will be X's ZCA.

    energy - retain at least this much energy with the principle components
    eps - add this to the eigenvalues when computing PCA responses to prevent
          division-by-zero and suppress weak components in the PCA
          representation.
    """
    import pylearn_pca
    (eigvals, eigvecs), centered_trainset = pylearn_pca.pca_from_examples(
        X=X, max_energy_fraction=energy)

    centering_offset = centered_trainset[0] - X[0]
    W = eigvecs / np.sqrt(eigvals + eps)
    print('ZCA kept %i of %i components' % (W.shape[1], X.shape[1]))
    l0 = AffineLayerPre(W.astype(X.dtype), centering_offset.astype(X.dtype))
    l1 = ClipLayer(eigvecs.T.copy().astype(X.dtype),
                   np.asarray(0, dtype=X.dtype))
    return [l0, l1]
Ejemplo n.º 5
0
def preprocess_data(config, ctrl):
    dataset = json_call(config['dataset_name'])
    train, valid, test = classification_train_valid_test(dataset)
    X_train, y_train = numpy.asarray(train[0]), numpy.asarray(train[1])
    X_valid, y_valid = numpy.asarray(valid[0]), numpy.asarray(valid[1])
    X_test, y_test = numpy.asarray(test[0]), numpy.asarray(test[1])

    if config['preprocessing']['kind'] == 'pca':
        # compute pca of input (TODO: retrieve only pca_whitened input)
        raise NotImplementedError('rewrite since cut and paste')
        (eigvals,eigvecs), centered_trainset = pylearn_pca.pca_from_examples(
                X=dataset['inputs'][:dataset['n_train']],
                max_energy_fraction=config['pca_energy'])
        eigmean = dataset['inputs'][0] - centered_trainset[0]

        whitened_inputs = pylearn_pca.pca_whiten((eigvals,eigvecs),
                dataset['inputs']-eigmean)
        ctrl.info('PCA kept %i of %i components'%(whitened_inputs.shape[1],
            dataset['n_inputs']))
    elif config['preprocessing']['kind'] == 'zca':
        (eigvals,eigvecs), centered_trainset = pylearn_pca.pca_from_examples(
                X=X_train,
                max_energy_fraction=config['preprocessing']['energy'])
        eigmean = X_train[0] - centered_trainset[0]

        def whiten(X):
            X = pylearn_pca.pca_whiten((eigvals,eigvecs),
                    X - eigmean)
            X = pylearn_pca.pca_whiten_inverse((eigvals, eigvecs),
                    X) + eigmean
            X = X.astype('float32')
            X_min = X.min()
            X_max = X.max()
            ctrl.info('ZCA min:%f max:%f' % (X_min, X_max))
            if X_min < 0 or X_max > 1.0:
                ctrl.info('ZCA clamping return value to (0, 1) interval')
                X = numpy.clip(X, 0, 1, out=X)
            return X

        X_train, X_valid, X_test = [whiten(X)
                for X in [X_train, X_valid, X_test]]

    elif config['preprocessing']['kind'] == 'normalize':
        raise NotImplementedError('rewrite since cut and paste')
        n_train=dataset['n_train']
        whitened_inputs = dataset['inputs']
        whitened_inputs = whitened_inputs - whitened_inputs[:n_train].mean(axis=0)
        whitened_inputs /= whitened_inputs[:n_train].std(axis=0)+1e-7
    elif config['preprocessing']['kind'] == 'raw':
        pass
    else:
        raise ValueError(
                'unrecognized preprocessing',
                config['preprocessing']['kind'])

    for Xy in 'X', 'y':
        for suffix in 'train', 'valid', 'test':
            varname = '%s_%s'%(Xy, suffix)
            var = locals()[varname]
            ctrl.info('%s shape=%s max=%f min=%f' % (
                varname,
                var.shape,
                var.max(),
                var.min()))

    s_X_train = theano.shared(X_train)
    s_y_train = theano.shared(y_train)
    s_X_valid = theano.shared(X_valid)
    s_y_valid = theano.shared(y_valid)
    s_X_test = theano.shared(X_test)
    s_y_test = theano.shared(y_test)

    return (dataset,
            (s_X_train, s_y_train),
            (s_X_valid, s_y_valid),
            (s_X_test, s_y_test))
def preprocess_data(config, ctrl):
    dataset = json_call(config['dataset_name'])
    train, valid, test = classification_train_valid_test(dataset)
    X_train, y_train = numpy.asarray(train[0]), numpy.asarray(train[1])
    X_valid, y_valid = numpy.asarray(valid[0]), numpy.asarray(valid[1])
    X_test, y_test = numpy.asarray(test[0]), numpy.asarray(test[1])

    if config['preprocessing']['kind'] == 'pca':
        # compute pca of input (TODO: retrieve only pca_whitened input)
        raise NotImplementedError('rewrite since cut and paste')
        (eigvals, eigvecs), centered_trainset = pylearn_pca.pca_from_examples(
            X=dataset['inputs'][:dataset['n_train']],
            max_energy_fraction=config['pca_energy'])
        eigmean = dataset['inputs'][0] - centered_trainset[0]

        whitened_inputs = pylearn_pca.pca_whiten((eigvals, eigvecs),
                                                 dataset['inputs'] - eigmean)
        ctrl.info('PCA kept %i of %i components' %
                  (whitened_inputs.shape[1], dataset['n_inputs']))
    elif config['preprocessing']['kind'] == 'zca':
        (eigvals, eigvecs), centered_trainset = pylearn_pca.pca_from_examples(
            X=X_train, max_energy_fraction=config['preprocessing']['energy'])
        eigmean = X_train[0] - centered_trainset[0]

        def whiten(X):
            X = pylearn_pca.pca_whiten((eigvals, eigvecs), X - eigmean)
            X = pylearn_pca.pca_whiten_inverse((eigvals, eigvecs), X) + eigmean
            X = X.astype('float32')
            X_min = X.min()
            X_max = X.max()
            ctrl.info('ZCA min:%f max:%f' % (X_min, X_max))
            if X_min < 0 or X_max > 1.0:
                ctrl.info('ZCA clamping return value to (0, 1) interval')
                X = numpy.clip(X, 0, 1, out=X)
            return X

        X_train, X_valid, X_test = [
            whiten(X) for X in [X_train, X_valid, X_test]
        ]

    elif config['preprocessing']['kind'] == 'normalize':
        raise NotImplementedError('rewrite since cut and paste')
        n_train = dataset['n_train']
        whitened_inputs = dataset['inputs']
        whitened_inputs = whitened_inputs - whitened_inputs[:n_train].mean(
            axis=0)
        whitened_inputs /= whitened_inputs[:n_train].std(axis=0) + 1e-7
    elif config['preprocessing']['kind'] == 'raw':
        pass
    else:
        raise ValueError('unrecognized preprocessing',
                         config['preprocessing']['kind'])

    for Xy in 'X', 'y':
        for suffix in 'train', 'valid', 'test':
            varname = '%s_%s' % (Xy, suffix)
            var = locals()[varname]
            ctrl.info('%s shape=%s max=%f min=%f' %
                      (varname, var.shape, var.max(), var.min()))

    s_X_train = theano.shared(X_train)
    s_y_train = theano.shared(y_train)
    s_X_valid = theano.shared(X_valid)
    s_y_valid = theano.shared(y_valid)
    s_X_test = theano.shared(X_test)
    s_y_test = theano.shared(y_test)

    return (dataset, (s_X_train, s_y_train), (s_X_valid, s_y_valid),
            (s_X_test, s_y_test))