Example #1
0
def whiten(data,
           V,
           m0,
           s0,
           nprincomps,
           batchsize=500,
           use_gpu=True,
           verbose=True):
    data = data.astype(np.float32)
    ncases = data.shape[0]
    nbatches = (ncases - 1) / batchsize + 1

    data_white = np.zeros((ncases, nprincomps), dtype=np.float32)
    for bidx in range(nbatches):
        start = bidx * batchsize
        end = min((bidx + 1) * batchsize, ncases)
        if use_gpu:
            data[start:end] = theano_subtract_m1(data[start:end])
            data[start:end] = theano_divide_s1(data[start:end])
            data[start:end] = theano_subtract_row(data[start:end], m0)
            data[start:end] = theano_divide_row(data[start:end], s0)
            data_white[start:end] = theano_dot(data[start:end],
                                               V[:nprincomps].T)
        else:
            data[start:end] -= data[start:end].mean(1)[:, None]
            s1 = data[start:end].std(1)[:, None]
            data[start:end] /= s1 + s1.mean()
            data[start:end] -= m0
            data[start:end] /= s0
            data_white[start:end] = np.dot(data[start:end], V[:nprincomps].T)
    return data_white
Example #2
0
def whitenX(data, V, m0, s0, nprincomps, batchsize=500, contrast_norm=True, dataset_norm=True, use_gpu=True, verbose=True):
    data = data.astype(np.float32)
    ncases = data.shape[0]
    nbatches = (ncases - 1) / batchsize + 1

    V = V[:nprincomps]
    V = np.concatenate((V,-1*V),0).T

    data_white = np.zeros((ncases, nprincomps*2), dtype=np.float32)

    for bidx in range(nbatches):
        start = bidx * batchsize
        end = min((bidx + 1) * batchsize, ncases)
        if use_gpu:
            if contrast_norm:
                data[start:end] = theano_subtract_m1(data[start:end])
                data[start:end] = theano_divide_s1(data[start:end])
            if dataset_norm:
                data[start:end] = theano_subtract_row(data[start:end], m0)
                data[start:end] = theano_divide_row(data[start:end], s0)
            data_white[start:end] = theano_dot(data[start:end], V)
        else:
            if contrast_norm:
                data[start:end] -= data[start:end].mean(1)[:, None]
                s1 = data[start:end].std(1)[:, None]
                data[start:end] /= s1 + s1.mean()
            if dataset_norm:
                data[start:end] -= m0
                data[start:end] /= s0
            data_white[start:end] = np.dot(data[start:end], V)
    return (data_white > 0.)*data_white
Example #3
0
def pca(data, whiten=True, use_gpu = True, batchsize=100, contrast_norm=True, dataset_norm=True, verbose=True):
    data = data.astype(np.float32)
    ncases = data.shape[0]
    nbatches = (ncases - 1) / batchsize + 1
    # contrast normalization

    if contrast_norm:
        if verbose:
            print 'using gpu'
            print 'performing contrast normalization'
        for bidx in range(nbatches):
            start = bidx * batchsize
            end = min((bidx + 1) * batchsize, ncases)
            if use_gpu:
                data[start:end] = theano_subtract_m1(data[start:end])
                data[start:end] = theano_divide_s1(data[start:end])
            else:
                data[start:end] -= data[start:end].mean(1)[:, None]
                s1 = data[start:end].std(1)[:, None]
                data[start:end] /= s1 + s1.mean()

    # normalization over dataset
    m0=0
    s0=1
    if dataset_norm:
        if verbose:
            print 'performing normalization over dataset'
        m0 = compute_mean0_batchwise(data, batchsize=batchsize, use_gpu=use_gpu, verbose=verbose)
        for bidx in range(nbatches):
            start = bidx * batchsize
            end = min((bidx + 1) * batchsize, ncases)
            if use_gpu:
                data[start:end] = theano_subtract_row(data[start:end], m0)
            else:
                data[start:end] -= m0

        s0 = compute_std0_batchwise(data, batchsize=batchsize, use_gpu=use_gpu, verbose=verbose)
        s0 += s0.mean()
        for bidx in range(nbatches):
            start = bidx * batchsize
            end = min((bidx + 1) * batchsize, ncases)
            if use_gpu:
                data[start:end] = theano_divide_row(data[start:end], s0)
            else:
                data[start:end] /= s0

    if verbose:
        print 'computing covariance matrix'
    covmat = compute_covmat_batchwise(data, use_gpu=use_gpu, batchsize=batchsize, verbose=verbose)
    if verbose:
        print 'performing eigenvalue decomposition'
    if whiten:
        V, W, var_fracs = _get_pca_params_from_covmat(covmat, verbose=verbose)
    else:
        V, W, var_fracs = _get_pca_nowhite_params_from_covmat(covmat, verbose=verbose)

    return V, W, m0, s0, var_fracs
Example #4
0
def pca(data, use_gpu=True, batchsize=100, verbose=True):
    data = data.astype(np.float32)
    ncases = data.shape[0]
    nbatches = (ncases - 1) / batchsize + 1
    # contrast normalization
    if verbose:
        print 'using gpu'
        print 'performing contrast normalization'
    for bidx in range(nbatches):
        start = bidx * batchsize
        end = min((bidx + 1) * batchsize, ncases)
        if use_gpu:
            data[start:end] = theano_subtract_m1(data[start:end])
            data[start:end] = theano_divide_s1(data[start:end])
        else:
            data[start:end] -= data[start:end].mean(1)[:, None]
            s1 = data[start:end].std(1)[:, None]
            data[start:end] /= s1 + s1.mean()
    # normalization over dataset
    if verbose:
        print 'performing normalization over dataset'
    m0 = compute_mean0_batchwise(data,
                                 batchsize=batchsize,
                                 use_gpu=use_gpu,
                                 verbose=verbose)
    for bidx in range(nbatches):
        start = bidx * batchsize
        end = min((bidx + 1) * batchsize, ncases)
        if use_gpu:
            data[start:end] = theano_subtract_row(data[start:end], m0)
        else:
            data[start:end] -= m0

    s0 = compute_std0_batchwise(data,
                                batchsize=batchsize,
                                use_gpu=use_gpu,
                                verbose=verbose)
    s0 += s0.mean()
    for bidx in range(nbatches):
        start = bidx * batchsize
        end = min((bidx + 1) * batchsize, ncases)
        if use_gpu:
            data[start:end] = theano_divide_row(data[start:end], s0)
        else:
            data[start:end] /= s0
    if verbose:
        print 'computing covariance matrix'
    covmat = compute_covmat_batchwise(data,
                                      use_gpu=use_gpu,
                                      batchsize=batchsize,
                                      verbose=verbose)
    if verbose:
        print 'performing eigenvalue decomposition'
    V, W, var_fracs = _get_pca_params_from_covmat(covmat, verbose=verbose)
    return V, W, m0, s0, var_fracs
def whitenX(data,
            V,
            m0,
            s0,
            nprincomps,
            batchsize=500,
            contrast_norm=True,
            dataset_norm=True,
            use_gpu=True,
            verbose=True):
    data = data.astype(np.float32)
    ncases = data.shape[0]
    nbatches = (ncases - 1) / batchsize + 1

    V = V[:nprincomps]
    V = np.concatenate((V, -1 * V), 0).T

    data_white = np.zeros((ncases, nprincomps * 2), dtype=np.float32)

    for bidx in range(nbatches):
        start = bidx * batchsize
        end = min((bidx + 1) * batchsize, ncases)
        if use_gpu:
            if contrast_norm:
                data[start:end] = theano_subtract_m1(data[start:end])
                data[start:end] = theano_divide_s1(data[start:end])
            if dataset_norm:
                data[start:end] = theano_subtract_row(data[start:end], m0)
                data[start:end] = theano_divide_row(data[start:end], s0)
            data_white[start:end] = theano_dot(data[start:end], V)
        else:
            if contrast_norm:
                data[start:end] -= data[start:end].mean(1)[:, None]
                s1 = data[start:end].std(1)[:, None]
                data[start:end] /= s1 + s1.mean()
            if dataset_norm:
                data[start:end] -= m0
                data[start:end] /= s0
            data_white[start:end] = np.dot(data[start:end], V)
    return (data_white > 0.) * data_white
Example #6
0
def whiten(data, V, m0, s0, nprincomps, batchsize=500, use_gpu=True, verbose=True):
    data = data.astype(np.float32)
    ncases = data.shape[0]
    nbatches = (ncases - 1) / batchsize + 1

    data_white = np.zeros((ncases, nprincomps), dtype=np.float32)
    for bidx in range(nbatches):
        start = bidx * batchsize
        end = min((bidx + 1) * batchsize, ncases)
        if use_gpu:
            data[start:end] = theano_subtract_m1(data[start:end])
            data[start:end] = theano_divide_s1(data[start:end])
            data[start:end] = theano_subtract_row(data[start:end], m0)
            data[start:end] = theano_divide_row(data[start:end], s0)
            data_white[start:end] = theano_dot(data[start:end], V[:nprincomps].T)
        else:
            data[start:end] -= data[start:end].mean(1)[:, None]
            s1 = data[start:end].std(1)[:, None]
            data[start:end] /= s1 + s1.mean()
            data[start:end] -= m0
            data[start:end] /= s0
            data_white[start:end] = np.dot(data[start:end], V[:nprincomps].T)
    return data_white