def whiten(data, V, m0, s0, nprincomps, batchsize=500, use_gpu=True, verbose=True): data = data.astype(np.float32) ncases = data.shape[0] nbatches = (ncases - 1) / batchsize + 1 data_white = np.zeros((ncases, nprincomps), dtype=np.float32) for bidx in range(nbatches): start = bidx * batchsize end = min((bidx + 1) * batchsize, ncases) if use_gpu: data[start:end] = theano_subtract_m1(data[start:end]) data[start:end] = theano_divide_s1(data[start:end]) data[start:end] = theano_subtract_row(data[start:end], m0) data[start:end] = theano_divide_row(data[start:end], s0) data_white[start:end] = theano_dot(data[start:end], V[:nprincomps].T) else: data[start:end] -= data[start:end].mean(1)[:, None] s1 = data[start:end].std(1)[:, None] data[start:end] /= s1 + s1.mean() data[start:end] -= m0 data[start:end] /= s0 data_white[start:end] = np.dot(data[start:end], V[:nprincomps].T) return data_white
def whitenX(data, V, m0, s0, nprincomps, batchsize=500, contrast_norm=True, dataset_norm=True, use_gpu=True, verbose=True): data = data.astype(np.float32) ncases = data.shape[0] nbatches = (ncases - 1) / batchsize + 1 V = V[:nprincomps] V = np.concatenate((V,-1*V),0).T data_white = np.zeros((ncases, nprincomps*2), dtype=np.float32) for bidx in range(nbatches): start = bidx * batchsize end = min((bidx + 1) * batchsize, ncases) if use_gpu: if contrast_norm: data[start:end] = theano_subtract_m1(data[start:end]) data[start:end] = theano_divide_s1(data[start:end]) if dataset_norm: data[start:end] = theano_subtract_row(data[start:end], m0) data[start:end] = theano_divide_row(data[start:end], s0) data_white[start:end] = theano_dot(data[start:end], V) else: if contrast_norm: data[start:end] -= data[start:end].mean(1)[:, None] s1 = data[start:end].std(1)[:, None] data[start:end] /= s1 + s1.mean() if dataset_norm: data[start:end] -= m0 data[start:end] /= s0 data_white[start:end] = np.dot(data[start:end], V) return (data_white > 0.)*data_white
def pca(data, whiten=True, use_gpu = True, batchsize=100, contrast_norm=True, dataset_norm=True, verbose=True): data = data.astype(np.float32) ncases = data.shape[0] nbatches = (ncases - 1) / batchsize + 1 # contrast normalization if contrast_norm: if verbose: print 'using gpu' print 'performing contrast normalization' for bidx in range(nbatches): start = bidx * batchsize end = min((bidx + 1) * batchsize, ncases) if use_gpu: data[start:end] = theano_subtract_m1(data[start:end]) data[start:end] = theano_divide_s1(data[start:end]) else: data[start:end] -= data[start:end].mean(1)[:, None] s1 = data[start:end].std(1)[:, None] data[start:end] /= s1 + s1.mean() # normalization over dataset m0=0 s0=1 if dataset_norm: if verbose: print 'performing normalization over dataset' m0 = compute_mean0_batchwise(data, batchsize=batchsize, use_gpu=use_gpu, verbose=verbose) for bidx in range(nbatches): start = bidx * batchsize end = min((bidx + 1) * batchsize, ncases) if use_gpu: data[start:end] = theano_subtract_row(data[start:end], m0) else: data[start:end] -= m0 s0 = compute_std0_batchwise(data, batchsize=batchsize, use_gpu=use_gpu, verbose=verbose) s0 += s0.mean() for bidx in range(nbatches): start = bidx * batchsize end = min((bidx + 1) * batchsize, ncases) if use_gpu: data[start:end] = theano_divide_row(data[start:end], s0) else: data[start:end] /= s0 if verbose: print 'computing covariance matrix' covmat = compute_covmat_batchwise(data, use_gpu=use_gpu, batchsize=batchsize, verbose=verbose) if verbose: print 'performing eigenvalue decomposition' if whiten: V, W, var_fracs = _get_pca_params_from_covmat(covmat, verbose=verbose) else: V, W, var_fracs = _get_pca_nowhite_params_from_covmat(covmat, verbose=verbose) return V, W, m0, s0, var_fracs
def pca(data, use_gpu=True, batchsize=100, verbose=True): data = data.astype(np.float32) ncases = data.shape[0] nbatches = (ncases - 1) / batchsize + 1 # contrast normalization if verbose: print 'using gpu' print 'performing contrast normalization' for bidx in range(nbatches): start = bidx * batchsize end = min((bidx + 1) * batchsize, ncases) if use_gpu: data[start:end] = theano_subtract_m1(data[start:end]) data[start:end] = theano_divide_s1(data[start:end]) else: data[start:end] -= data[start:end].mean(1)[:, None] s1 = data[start:end].std(1)[:, None] data[start:end] /= s1 + s1.mean() # normalization over dataset if verbose: print 'performing normalization over dataset' m0 = compute_mean0_batchwise(data, batchsize=batchsize, use_gpu=use_gpu, verbose=verbose) for bidx in range(nbatches): start = bidx * batchsize end = min((bidx + 1) * batchsize, ncases) if use_gpu: data[start:end] = theano_subtract_row(data[start:end], m0) else: data[start:end] -= m0 s0 = compute_std0_batchwise(data, batchsize=batchsize, use_gpu=use_gpu, verbose=verbose) s0 += s0.mean() for bidx in range(nbatches): start = bidx * batchsize end = min((bidx + 1) * batchsize, ncases) if use_gpu: data[start:end] = theano_divide_row(data[start:end], s0) else: data[start:end] /= s0 if verbose: print 'computing covariance matrix' covmat = compute_covmat_batchwise(data, use_gpu=use_gpu, batchsize=batchsize, verbose=verbose) if verbose: print 'performing eigenvalue decomposition' V, W, var_fracs = _get_pca_params_from_covmat(covmat, verbose=verbose) return V, W, m0, s0, var_fracs
def whitenX(data, V, m0, s0, nprincomps, batchsize=500, contrast_norm=True, dataset_norm=True, use_gpu=True, verbose=True): data = data.astype(np.float32) ncases = data.shape[0] nbatches = (ncases - 1) / batchsize + 1 V = V[:nprincomps] V = np.concatenate((V, -1 * V), 0).T data_white = np.zeros((ncases, nprincomps * 2), dtype=np.float32) for bidx in range(nbatches): start = bidx * batchsize end = min((bidx + 1) * batchsize, ncases) if use_gpu: if contrast_norm: data[start:end] = theano_subtract_m1(data[start:end]) data[start:end] = theano_divide_s1(data[start:end]) if dataset_norm: data[start:end] = theano_subtract_row(data[start:end], m0) data[start:end] = theano_divide_row(data[start:end], s0) data_white[start:end] = theano_dot(data[start:end], V) else: if contrast_norm: data[start:end] -= data[start:end].mean(1)[:, None] s1 = data[start:end].std(1)[:, None] data[start:end] /= s1 + s1.mean() if dataset_norm: data[start:end] -= m0 data[start:end] /= s0 data_white[start:end] = np.dot(data[start:end], V) return (data_white > 0.) * data_white