def cycle_pairs(inputs, btsz, **kwargs): """ """ p0, p1 = inputs[0], inputs[1] bg, end = _cycle(p0, btsz) for idx, idx_p1 in izip(bg, end): yield (garray(p0[idx:idx_p1]), garray(p1[idx:idx_p1]))
def setup_training_data(params, midi_dir, verbose=False): ''' load and setup training data input: T - max-lag for computing frame size ''' # load training data sequential_data, sequential_labels, num_labels = load_data(midi_dir) T = max(params['Tv'], params['Th']) # max look-behind # convert sequences into subsequences of length T+1 subseq_data, subseq_labels = frame_subseqs(T + 1, sequential_data, sequential_labels) subseq_data *= params['vis_scale'] # put training data at correct scale training_data = subseq_to_frames(subseq_data) Nl = params['Nl'] training_labels = compute_binary_labels(subseq_to_frames(subseq_labels), Nl) input_training_data = gp.concatenate( (gp.garray(training_data), gp.garray(training_labels)), axis=1) return input_training_data
def buildDBN(layerSizes, scales, fanOuts, outputActFunct, realValuedVis, useReLU = False, uniforms = None, dropouts = None): ''' layerSizes is tuple of (visible input, hidden 1..n, visible ouput) ''' # Extract tuples of RBM style layers shapes = [(layerSizes[i-1],layerSizes[i]) for i in range(1, len(layerSizes))] assert(len(scales) == len(shapes) == len(fanOuts)) if uniforms == None: # Not sure what this is? uniforms = [False for s in shapes] assert(len(scales) == len(uniforms)) # Biases for layers excluding visible input - creates a list of row vectors initialBiases = [gnp.garray(0*num.random.rand(1, layerSizes[i])) for i in range(1, len(layerSizes))] # Biases for layers excluding visible output initialGenBiases = [gnp.garray(0*num.random.rand(1, layerSizes[i])) for i in range(len(layerSizes) - 1)] # Fan outs is a misleading term since I think it sets a cap on the number of incoming links # Depends on which way round one views the network I guess initialWeights = [gnp.garray(initWeightMatrix(shapes[i], scales[i], fanOuts[i], uniforms[i])) \ for i in range(len(shapes))] net = DBN(initialWeights, initialBiases, initialGenBiases, outputActFunct, realValuedVis, useReLU) if dropouts is None: net.dropouts = [0.0] * len(shapes) else: net.dropouts = dropouts return net
def SGD(self, train_data, train_targs, eta=0.1, tau=10., lambda_w=0.1, epochs=10, mbsz=5, test_data=gnp.garray(0), test_targs=gnp.garray(0)): num_iter = int(np.ceil(len(train_data) * 1.0 / mbsz)) print 'num_iter/epoch=%d' % num_iter for epoch in range(epochs): print 'epoch=%d' % epoch st = time.clock() eta_e = eta * tau / (tau + epoch) n_err = 0 for i in xrange(num_iter): #data, targs = self.sample_mini_batch(train_data, train_targs, mbsz) data, targs = self.choose_mini_batch(train_data, train_targs, mbsz, i) n_err += self.update_mini_batch(data, targs, eta_e, lambda_w, mbsz) et = time.clock() print 'one epoch takes %f secs' % (et - st) if test_data != None: v_err = self.classification_error(test_data, test_targs) print 'n_err_train = %d, n_err_validation = %d, train error = %f, validation error = %f' % ( n_err, v_err, n_err * 1. / train_data.shape[0], v_err * 1. / test_data.shape[0])
def bench_gnp(): n = 40000 a = np.random.uniform(low=0., high=1., size=(n, n)).astype(np.float32) b = np.random.uniform(low=0., high=1., size=(n, n)).astype(np.float32) ga = gpu.garray(a) gb = gpu.garray(b) ga = ga.dot(gb)
def backward(self, Y, preds, acts, words, X): """ Backward pass through the network """ batchsize = preds.shape[0] # Compute part of df/dR Ix = gpu.garray(preds[:, :-1] - Y) / batchsize delta = gpu.dot(acts.T, Ix) dR = delta[:-1, :] + self.gamma_r * self.R db = delta[-1, :] dR = dR.as_numpy_array() # Compute df/dC and word inputs for df/dR Ix = gpu.dot(Ix, self.R.T) dC = gpu.zeros(np.shape(self.C)) for i in range(self.context): delta = gpu.dot(words[:, :, i].T, Ix) dC[i, :, :] = delta + self.gamma_c * self.C[i, :, :] delta = gpu.dot(Ix, self.C[i, :, :].T) delta = delta.as_numpy_array() for j in range(X.shape[0]): dR[:, X[j, i]] = dR[:, X[j, i]] + delta.T[:, j] self.dR = gpu.garray(dR) self.db = db self.dC = dC
def compute_MT(A, M, grid_2_id, id_2_grid): print 'computing MT' MT = np.zeros((1584, 1584)) sortlist = defaultdict(list) for i in id_2_grid: for j in id_2_grid: if i == j: continue x = id_2_grid[i] y = id_2_grid[j] (lat1, lon1, lat_length, lon_length) = gh._decode_c2i(x) (lat2, lon2, lat_length, lon_length) = gh._decode_c2i(y) sortlist[(abs(lat1 - lat2) + abs(lon1 - lon2))].append([i, j]) # Mpow = np.eye(1584) Mpow = gnp.garray(np.eye(1584)) M = gnp.garray(M) #print Mpow for i in sortlist: # print i Mpow = Mpow.dot(M) Lde = int(i * 0.2) Mtemp = Mpow.dot(A[Lde]) # np.dot(Mpow, A[Lde]) # print 'finish' # print Mtemp for x in sortlist[i]: MT[x[0], x[1]] = Mtemp[x[0], x[1]] # print np.max(MT), np.unravel_index(MT.argmax(), MT.shape) #print MT return MT
def __init__(self, n_visible, n_hidden=None, vistype='sigmoid', hidtype='sigmoid', W=None, hbias=None, vbias=None, batch_size=128): # initialize parameters self.SIZE_LIMIT = 80000000 # the size of the largest gpu array self.vistype = vistype self.hidtype = hidtype self.batch_size = batch_size self.n_visible = n_visible if n_hidden is None: n_hidden = self.n_visible self.n_hidden = n_hidden n = self.n_visible*self.n_hidden + self.n_hidden bound = 2.38 / np.sqrt(n) if W is None: W = np.zeros((self.n_visible, self.n_hidden)) for i in range(self.n_visible): for j in range(self.n_hidden): W[i,j] = np.random.uniform(-bound, bound) W = gp.garray(W) self.W = W if vbias is None: vbias = gp.zeros(self.n_visible) else: vbias = gp.garray(vbias) self.vbias = vbias if hbias is None: hbias = np.zeros((self.n_hidden,)) for i in range(self.n_hidden): hbias[i] = np.random.uniform(-bound, bound) hbias = gp.garray(hbias) self.hbias = hbias #initialize updates self.wu_vh = gp.zeros((self.n_visible, self.n_hidden)) self.wu_v = gp.zeros(self.n_visible) self.wu_h = gp.zeros(self.n_hidden)
def rec_to_gpu(rec): if rec.dtype.names is not None: rec = rec.astype([(k, np.float32) for k in rec.dtype.names]) return gpu.garray(rec.view((np.float32, len(rec.dtype.names)))) else: # rec = rec.astype(np.float32) return gpu.garray(rec.astype(np.float32))
def trte_split(X, Y, tr_frac): """Split the data in X/Y into training and testing portions.""" if gp.is_garray(X): X = X.as_numpy_array() else: X = np.array(X) if gp.is_garray(Y): Y = Y.as_numpy_array() else: Y = np.array(Y) obs_count = X.shape[0] obs_dim = X.shape[1] tr_count = round(tr_frac * obs_count) te_count = obs_count - tr_count Xtr = np.zeros((tr_count, X.shape[1])) Ytr = np.zeros((tr_count, Y.shape[1])) Xte = np.zeros((te_count, X.shape[1])) Yte = np.zeros((te_count, Y.shape[1])) idx = npr.permutation(range(obs_count)) # Basic manual iteration for i in range(obs_count): if (i < tr_count): Xtr[i, :] = X[idx[i], :] Ytr[i, :] = Y[idx[i], :] else: Xte[(i - tr_count), :] = X[idx[i], :] Yte[(i - tr_count), :] = Y[idx[i], :] return [gp.garray(Xtr), gp.garray(Ytr), gp.garray(Xte), gp.garray(Yte)]
def _load_model_from_stream(self, f): has_input, self.in_dim, self.out_dim, nonlin_id = struct.unpack( 'iiii', f.read(4 * 4)) self.has_input = has_input == 1 if not self.has_input: self.in_dim = None self.nonlin = layer.get_nonlin_from_type_id(nonlin_id) if self.has_input: self.W_ih = gnp.garray( np.fromstring(f.read(self.in_dim * self.out_dim * 4), dtype=np.float32).reshape( self.in_dim, self.out_dim)) self.dW_ih = self.W_ih * 0 self.W_hh = gnp.garray( np.fromstring(f.read(self.out_dim * self.out_dim * 4), dtype=np.float32).reshape(self.out_dim, self.out_dim)) self.b = gnp.garray( np.fromstring(f.read(self.out_dim * 4), dtype=np.float32)) self.dW_hh = self.W_hh * 0 self.b = self.b * 0 self._update_param_size()
def _set_param_from_vec(self, v, is_noiseless=False): if self.has_input: self.W_ih = gnp.garray(v[:self.W_ih.size].reshape(self.W_ih.shape)) self.W_hh = gnp.garray(v[-self.W_hh.size - self.b.size:-self.b.size].reshape( self.W_hh.shape)) self.b = gnp.garray(v[-self.b.size:])
def test_loss(loss, weight=1): print 'Testing loss <%s>, weight=%g' % (loss.get_name(), weight) loss.set_weight(weight) sx, sy = 3, 4 x = gnp.randn(sx, sy) t = gnp.randn(sx, sy) if loss.target_should_be_one_hot(): new_t = np.zeros(t.shape) new_t[np.arange(t.shape[0]), t.argmax(axis=1)] = 1 t = gnp.garray(new_t) elif loss.target_should_be_normalized(): t = t - t.min(axis=1)[:,gnp.newaxis] + 1 t /= t.sum(axis=1)[:,gnp.newaxis] elif loss.target_should_be_hinge(): new_t = -np.ones(t.shape) new_t[np.arange(t.shape[0]), t.argmax(axis=1)] = 1 t = gnp.garray(new_t) loss.load_target(t) def f(w): return loss.compute_loss_and_grad(gnp.garray(w.reshape(sx, sy)))[0] fdiff_grad = finite_difference_gradient(f, x.asarray().ravel()) backprop_grad = loss.compute_loss_and_grad(x, compute_grad=True)[1].asarray().ravel() test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient', backprop_grad, ' Backpropagation Gradient') print '' return test_passed
def vecToStack(self, vec): start = 0 sizes = [self.inputDim] + self.layerSizes + [self.outputDim] for n, m, i in zip(sizes[:-1], sizes[1:], range(len(sizes) - 1)): self.stack[i] = [gp.garray(np.reshape(np.array(vec[start:start+m*n]),(m,n))),\ gp.garray(np.reshape(np.array(vec[start+m*n:start+m*(n+1)]),(m,1)))] start += m * (n + 1)
def costAndGradSFO(self,stack,datums): """ Wrapper function used for SFO optimizer. """ N = len(datums) cost = 0. grad = [[gp.zeros(w.shape),gp.zeros(b.shape)] for w,b in self.stack] # Push stack to device self.stack = [[gp.garray(w),gp.garray(b)] for w,b in stack] for datum in datums: data = gp.garray(self.data_dict[datum]) labels = np.array(self.alis[datum], dtype=np.int32) costSingle,gradSingle,skip = self.costAndGrad(data,labels) if skip: print "LOGGING SKIP" #TODO what to do here? N -= 1 continue grad = [[gs[0]+g[0],gs[1]+g[1]] for gs,g in zip(gradSingle,grad)] cost += costSingle # Have to force GC the gpu... gnumpy lameness gp.free_reuse_cache() # Pull gradient from device grad = [[((1./N)*gw).as_numpy_array(), ((1./N)*gb).as_numpy_array()] for gw,gb in grad] cost *= 1./N return cost,grad
def totalLoss(self, minibatchStream, lossFuncts): totalCases = 0 sumLosses = num.zeros((1 + len(lossFuncts), )) if isinstance(self.outputActFunct, LinearMasked): for inpMB, targMB, targMaskMB in minibatchStream: inputBatch = inpMB if isinstance( inpMB, gnp.garray) else gnp.garray(inpMB) targetBatch = targMB if isinstance( targMB, gnp.garray) else gnp.garray(targMB) targetMaskBatch = targMaskMB if isinstance( targMaskMB, gnp.garray) else gnp.garray(targMaskMB) outputActs = self.fprop(inputBatch) sumLosses[0] += self.outputActFunct.error( targetBatch, self.state[-1], targetMaskBatch, outputActs) for j, f in enumerate(lossFuncts): sumLosses[j + 1] += f(targetBatch, outputActs, targetMaskBatch) totalCases += inpMB.shape[0] else: for inpMB, targMB in minibatchStream: inputBatch = inpMB if isinstance( inpMB, gnp.garray) else gnp.garray(inpMB) targetBatch = targMB if isinstance( targMB, gnp.garray) else gnp.garray(targMB) outputActs = self.fpropDropout(inputBatch) sumLosses[0] += self.outputActFunct.error( targetBatch, self.state[-1], outputActs) for j, f in enumerate(lossFuncts): sumLosses[j + 1] += f(targetBatch, outputActs) totalCases += inpMB.shape[0] return sumLosses / float(totalCases)
def fgrad(w): if self.num_layers == 0: Wtemp = self.output.W self.output.W = gnp.garray(w.reshape(Wtemp.shape)) else: Wtemp = self.layer[0].W self.layer[0].W = gnp.garray(w.reshape(Wtemp.shape)) self._forward(self.train_data.X[:ncases,:]) Z = self.train_data.T[:ncases] Z = self.output.act_type.label_vec_to_mat(Z, self.train_data.K) self.output.loss(Z) self.output.gradient() dLdXabove = self.output.dLdXtop for i in range(self.num_layers-1, -1, -1): self.layer[i].gradient(dLdXabove) dLdXabove = self.layer[i].dLdXbelow if self.num_layers == 0: grad_w = self.output.dLdW else: grad_w = self.layer[0].dLdW if self.num_layers == 0: self.output.W = Wtemp else: self.layer[0].W = Wtemp return grad_w.reshape(np.prod(grad_w.shape)).asarray() / Z.shape[0]
def step(self, inputBatch, targetBatch, learnRates, momentum, L2Costs, useDropout=False, targetMaskBatch=None): mbsz = inputBatch.shape[0] inputBatch = inputBatch if isinstance( inputBatch, gnp.garray) else gnp.garray(inputBatch) if (targetMaskBatch is None): targetBatch = targetBatch if isinstance( targetBatch, gnp.garray) else gnp.garray(targetBatch) errSignals, outputActs, error = self.fpropBprop( inputBatch, targetBatch, useDropout) else: targetMaskBatch = targetMaskBatch if isinstance( targetMaskBatch, gnp.garray) else gnp.garray(targetMaskBatch) errSignals, outputActs, error = self.fpropBprop( inputBatch, targetBatch, useDropout, targetMaskBatch) factor = 1 - momentum if not self.nestCompare else 1.0 self.scaleDerivs(momentum) for i, (WGrad, biasGrad) in enumerate(self.gradients(self.state, errSignals)): self.WGrads[i] += learnRates[i] * factor * ( WGrad / mbsz - L2Costs[i] * self.weights[i]) self.biasGrads[i] += (learnRates[i] * factor / mbsz) * biasGrad self.applyUpdates(self.weights, self.biases, self.weights, self.biases, self.WGrads, self.biasGrads) self.constrainWeights() return error, outputActs
def backprop(self): self.timer_logger('backprop', time.time()) self.results['grads'] = [] self.results['bias_grads'] = [] if self.problem == 'classification': #assumes softmax + cross entropy so that both gradients cancel out to give: error = y-t self.results['error'] = self.results['current'] - gpu.garray( self.util.create_t_dataset(self.batch_y)) else: #assumes linear unit + squared error cost function so that both gradients cancel out to give: error = y-t self.results['error'] = (self.results['current'] - gpu.garray(self.batch_y)) for pair in self.results['activations']: activation = pair[0] weight = pair[1] gradient = self.activation_gradient(activation) self.results['grads'].insert( 0, gpu.dot(activation.T, self.results['error'])) self.results['bias_grads'].insert( 0, gpu.dot(gpu.ones((1, self.results['error'].shape[0])), self.results['error'])) self.results['error'] = gpu.dot(self.results['error'], weight.T) * gradient self.timer_logger('backprop', time.time())
def vecToStack(self,vec): start = 0 sizes = [self.inputDim]+self.layerSizes+[self.outputDim] for n,m,i in zip(sizes[:-1],sizes[1:],range(len(sizes)-1)): self.stack[i] = [gp.garray(np.reshape(np.array(vec[start:start+m*n]),(m,n))),\ gp.garray(np.reshape(np.array(vec[start+m*n:start+m*(n+1)]),(m,1)))] start += m*(n+1)
def __init__(self): self.u = Util() gpu.board_id_to_use = 1 print 'USE GPU' + str(gpu.board_id_to_use) gpu.expensive_check_probability = 0 b = batch_creator() path_train = '/home/tim/development/train.csv' path_test = '/home/tim/development/test_X.csv' batch_size = 100 set_sizes = [1.00,0.00,0.00] data = b.create_batches([path_train, path_test], [0, -1], set_sizes,batch_size, standardize = False) self.data = gpu.garray(data[0][0]/255.) self.v_original = None #self.w = gpu.garray(self.u.create_sparse_weight(784, 800)) self.w = gpu.garray(np.random.randn(784,800))*0.1 self.bias_h = gpu.zeros((1,800)) self.bias_v = gpu.zeros((1,784)) self.w_updt = gpu.zeros((784, 800)) self.bias_h_updt = gpu.zeros((1,800)) self.bias_v_updt = gpu.zeros((1,784)) self.h = gpu.zeros((100,800)) self.v = gpu.zeros((100,784)) self.time_interval = 0
def exact_samples(rbm, num, batch_units=10, show_progress=False): scores = get_scores(rbm, batch_units=batch_units).as_numpy_array() scores -= np.logaddexp.reduce(scores.ravel()) p = np.exp(scores) prefix_len = rbm.nhid - batch_units prefixes = combinations_array(prefix_len).as_numpy_array() postfixes = combinations_array(batch_units).as_numpy_array() p_row = p.sum(1) p_row /= p_row.sum() cond_p_col = p / p_row[:, nax] cond_p_col *= (1. - 1e-8) # keep np.random.multinomial from choking because the sum is greater than 1 vis = np.zeros((num, rbm.nvis)) hid = np.zeros((num, rbm.nhid)) with misc.gnumpy_conversion_check('allow'): rows = np.random.multinomial(1, p_row, size=num).argmax(1) #cols = np.random.multinomial(1, cond_p_col[rows, :]).argmax(1) cols = np.array([np.random.multinomial(1, cond_p_col[row, :]).argmax() for row in rows]) hid = np.hstack([prefixes[rows, :], postfixes[cols, :]]) vis = np.random.binomial(1, gnp.logistic(rbm.vis_inputs(hid))) return binary_rbms.RBMState(gnp.garray(vis), gnp.garray(hid))
def stepNesterov(self, inputBatch, targetBatch, learnRates, momentum, L2Costs, useDropout=False): mbsz = inputBatch.shape[0] inputBatch = inputBatch if isinstance( inputBatch, gnp.garray) else gnp.garray(inputBatch) targetBatch = targetBatch if isinstance( targetBatch, gnp.garray) else gnp.garray(targetBatch) curWeights = [w.copy() for w in self.weights] curBiases = [b.copy() for b in self.biases] self.scaleDerivs(momentum) self.applyUpdates(self.weights, self.biases, curWeights, curBiases, self.WGrads, self.biasGrads) nodeSensitivity, outputActs, error = self.fpropBprop( inputBatch, targetBatch, useDropout) #self.scaleDerivs(momentum) for i, (WGrad, biasGrad) in enumerate( self.gradients(self.state, nodeSensitivity)): self.WGrads[i] += learnRates[i] * (WGrad / mbsz - L2Costs[i] * self.weights[i]) self.biasGrads[i] += (learnRates[i] / mbsz) * biasGrad self.applyUpdates(self.weights, self.biases, curWeights, curBiases, self.WGrads, self.biasGrads) self.constrainWeights() return error, outputActs
def __init__(self, vis, target_moments, compute_after=DEFAULT_LOG_PROB_AFTER, num_particles=100, num_steps=1000, binarize=True): if binarize and vis is not None: try: vis = gnp.garray(np.random.binomial(1, vis)) except: vis = gnp.garray(np.random.binomial(1, vis.as_numpy_array())) self.vis = vis self.target_moments = target_moments self.compute_after = compute_after if target_moments is not None: self.base_rate_moments = target_moments.full_base_rate_moments() self.num_particles = num_particles self.num_steps = num_steps self.avg_rbm = None if target_moments is not None: nhid = self.base_rate_moments.expect_hid.size self.exact = (nhid <= 20) else: self.exact = False self.count = 0
def mlpSingleOutput1Layer_costfunc(x, *args): inputSize, l1Size, lambda_hidden, inputs, targets = args numCases = shape(inputs)[1] num_weights_L1 = l1Size * (inputSize + 1) inputs = gpu.garray(inputs) targets = gpu.garray(targets) theta_L1 = gpu.garray(reshape(x[0:num_weights_L1], (l1Size, inputSize + 1))) theta_output = gpu.garray(reshape(x[num_weights_L1:shape(x)[0]], (1, l1Size+1))) inputs = gpu.concatenate((gpu.ones((1,numCases)), inputs), axis = 0) hidden_sum_L1 = gpu.dot(theta_L1, inputs) hidden_activation_L1 = hidden_sum_L1.logistic() hidden_activation_L1 = gpu.concatenate((gpu.ones((1,numCases)), hidden_activation_L1), axis = 0) #hidden_activation_L1 = hidden_activation_L1 * dropout_prob hidden_sum_output = gpu.dot(theta_output, hidden_activation_L1) outputs = hidden_sum_output.logistic() output_target_diff = (outputs - targets)**2 regularized_penalty_output = theta_output[:,1:shape(theta_output)[1]] regularized_penalty_output = regularized_penalty_output * regularized_penalty_output regularized_penalty_L1 = theta_L1[:,1:shape(theta_L1)[1]] regularized_penalty_L1 = regularized_penalty_L1 * regularized_penalty_L1 cost = gpu.sum(output_target_diff)/(2*numCases) + 0.5 * lambda_hidden*(gpu.sum(regularized_penalty_L1)+gpu.sum(regularized_penalty_output)) print 'Multilayer Preceptron Cost:', cost del inputs del theta_L1 del hidden_sum_L1 del hidden_activation_L1 del regularized_penalty_output del regularized_penalty_L1 gpu.free_reuse_cache() return cost
def buildDBN(layerSizes, scales, fanOuts, outputActFunct, realValuedVis, useReLU=False, uniforms=None): shapes = [(layerSizes[i - 1], layerSizes[i]) for i in range(1, len(layerSizes))] assert (len(scales) == len(shapes) == len(fanOuts)) if uniforms == None: uniforms = [False for s in shapes] assert (len(scales) == len(uniforms)) initialBiases = [ gnp.garray(0 * num.random.rand(1, layerSizes[i])) for i in range(1, len(layerSizes)) ] initialGenBiases = [ gnp.garray(0 * num.random.rand(1, layerSizes[i])) for i in range(len(layerSizes) - 1) ] initialWeights = [gnp.garray(initWeightMatrix(shapes[i], scales[i], fanOuts[i], uniforms[i])) \ for i in range(len(shapes))] net = DBN(initialWeights, initialBiases, initialGenBiases, outputActFunct, realValuedVis, useReLU) return net
def check_against_exact(): with misc.gnumpy_conversion_check('allow'): rbm = test_tractable.random_rbm(NVIS, NHID) G, s = tractable.exact_fisher_information(rbm, return_mean=True, batch_units=2) rw = fisher.RegressionWeights.from_maximum_likelihood(G, NVIS, NHID) G, s = gnp.garray(G), gnp.garray(s) S = G + np.outer(s, s) m_unary = s[:NVIS + NHID] S_unary = S[:NVIS + NHID, :NVIS + NHID] m_pair = gnp.zeros((NVIS, NHID, 3)) S_pair = gnp.zeros((NVIS, NHID, 3, 3)) for i in range(NVIS): for j in range(NHID): vis_idx = i hid_idx = NVIS + j vishid_idx = NVIS + NHID + NHID * i + j idxs = np.array([vis_idx, hid_idx, vishid_idx]) m_pair[i, j, :] = s[idxs] S_pair[i, j, :] = S[idxs[:, nax], idxs[nax, :]] stats = fang.Statistics(m_unary, S_unary, m_pair, S_pair) beta, sigma_sq = stats.compute_regression_weights() assert np.allclose(beta, rw.beta) assert np.allclose(sigma_sq, rw.sigma_sq) Sigma = stats.unary_covariance() assert np.max(np.abs(Sigma - G[:NVIS + NHID, :NVIS + NHID])) < 1e-6
def test_loss(loss, weight=1): print 'Testing loss <%s>, weight=%g' % (loss.get_name(), weight) loss.set_weight(weight) sx, sy = 3, 4 x = gnp.randn(sx, sy) t = gnp.randn(sx, sy) if loss.target_should_be_one_hot(): new_t = np.zeros(t.shape) new_t[np.arange(t.shape[0]), t.argmax(axis=1)] = 1 t = gnp.garray(new_t) elif loss.target_should_be_normalized(): t = t - t.min(axis=1)[:, gnp.newaxis] + 1 t /= t.sum(axis=1)[:, gnp.newaxis] elif loss.target_should_be_hinge(): new_t = -np.ones(t.shape) new_t[np.arange(t.shape[0]), t.argmax(axis=1)] = 1 t = gnp.garray(new_t) loss.load_target(t) def f(w): return loss.compute_loss_and_grad(gnp.garray(w.reshape(sx, sy)))[0] fdiff_grad = finite_difference_gradient(f, x.asarray().ravel()) backprop_grad = loss.compute_loss_and_grad( x, compute_grad=True)[1].asarray().ravel() test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient', backprop_grad, ' Backpropagation Gradient') print '' return test_passed
def costfunc_gpu_ReLU(x, *args): num_input,num_hidden,num_output,inputs,lambda_val,sparsityParam,beta = args num_weights1 = (num_input+1)*num_hidden x = gpu.garray(x) inputs = gpu.garray(inputs) #weights1 = gpu.garray(reshape(x[0:num_weights1],(num_hidden,num_input+1))) weights1 = x[0:num_weights1].reshape((num_hidden,num_input+1)) #weights2 = gpu.garray(reshape(x[num_weights1:shape(x)[0]], (num_output,num_hidden+1))) weights2 = x[num_weights1:shape(x)[0]].reshape((num_output,num_hidden+1)) nData = shape(inputs)[1] data = gpu.concatenate((gpu.ones((1,nData)), inputs), axis = 0) hidden_sum = gpu.dot(weights1, data) hidden_activation = gpu.log(1+hidden_sum.exp()) p_avg = gpu.sum(hidden_activation,axis=1)/nData hidden_activation = gpu.concatenate((gpu.ones((1,nData)), hidden_activation), axis = 0) output = gpu.dot(weights2, hidden_activation) regularized_penalty1 = weights1[:,1:shape(weights1)[1]] regularized_penalty2 = weights2[:,1:shape(weights2)[1]] regularized_penalty1 = regularized_penalty1 * regularized_penalty1 regularized_penalty2 = regularized_penalty2 * regularized_penalty2 output_target_diff = (output - inputs)*(output - inputs) KL = gpu.sum(sparsityParam*gpu.log(sparsityParam/p_avg) + (1-sparsityParam)*gpu.log((1-sparsityParam)/(1-p_avg))) cost = gpu.sum(output_target_diff)/(2*nData) + 0.5 * lambda_val * (gpu.sum(regularized_penalty1) + gpu.sum(regularized_penalty2)) + beta*KL print 'ReLU Linear Decoder Cost: ', cost return cost
def epoch_update(self, input_batch, output_batch): this_batch_size = input_batch.shape[0] if not isinstance(input_batch, gnp.garray): input_batch = gnp.garray(input_batch) if not isinstance(output_batch, gnp.garray): output_batch = gnp.garray(output_batch) error_residual, output_result, error = self.feed_back(input_batch, output_batch) for i, (w_grad, b_grad) in enumerate(self.gradients(self.state, error_residual)): self.weight_grads_l2_norm[i] += (w_grad/this_batch_size - self.l2*self.weights[i]) ** 2 self.bias_gradis_l2_norm[i] += (b_grad/this_batch_size) ** 2 w_factor = 1 / gnp.sqrt(self.weight_grads_l2_norm[i]) b_factor = 1 / gnp.sqrt(self.bias_gradis_l2_norm[i]) self.weight_grads[i] = self.learning_rate * w_factor * ( w_grad/this_batch_size - self.l2*self.weights[i]) self.bias_grads[i] = (self.learning_rate*b_factor/this_batch_size) * b_grad for i in range(len(self.weights)): self.weights[i] += self.weight_grads[i] self.biases[i] += self.bias_grads[i] return error, output_result
def backward(self, Y, preds, acts, words, X): """ Backward pass through the network """ batchsize = preds.shape[0] # Compute part of df/dR Ix = gpu.garray(preds[:,:-1] - Y) / batchsize delta = gpu.dot(acts.T, Ix) dR = delta[:-1,:] + self.gamma_r * self.R db = delta[-1,:] dR = dR.as_numpy_array() # Compute df/dC and word inputs for df/dR Ix = gpu.dot(Ix, self.R.T) dC = gpu.zeros(np.shape(self.C)) for i in range(self.context): delta = gpu.dot(words[:,:,i].T, Ix) dC[i,:,:] = delta + self.gamma_c * self.C[i,:,:] delta = gpu.dot(Ix, self.C[i,:,:].T) delta = delta.as_numpy_array() for j in range(X.shape[0]): dR[:,X[j,i]] = dR[:,X[j,i]] + delta.T[:,j] self.dR = gpu.garray(dR) self.db = db self.dC = dC
def trte_split(X, Y, tr_frac): """Split the data in X/Y into training and testing portions.""" if gp.is_garray(X): X = X.as_numpy_array() else: X = np.array(X) if gp.is_garray(Y): Y = Y.as_numpy_array() else: Y = np.array(Y) obs_count = X.shape[0] obs_dim = X.shape[1] tr_count = round(tr_frac * obs_count) te_count = obs_count - tr_count Xtr = np.zeros((tr_count, X.shape[1])) Ytr = np.zeros((tr_count, Y.shape[1])) Xte = np.zeros((te_count, X.shape[1])) Yte = np.zeros((te_count, Y.shape[1])) idx = npr.permutation(range(obs_count)) # Basic manual iteration for i in range(obs_count): if (i < tr_count): Xtr[i,:] = X[idx[i],:] Ytr[i,:] = Y[idx[i],:] else: Xte[(i - tr_count),:] = X[idx[i],:] Yte[(i - tr_count),:] = Y[idx[i],:] return [gp.garray(Xtr), gp.garray(Ytr), gp.garray(Xte), gp.garray(Yte)]
def _load_from_stream(self, f): self._param_id, layer_dim = struct.unpack('ii', f.read(4*2)) self.gamma = gnp.garray(np.fromstring(f.read(layer_dim * 4), dtype=np.float32)) self.beta = gnp.garray(np.fromstring(f.read(layer_dim * 4), dtype=np.float32)) self.param_size = self.gamma.size + self.beta.size self.gamma_grad = gnp.zeros(self.gamma.size) self.beta_grad = gnp.zeros(self.beta.size)
def vector_weights(self, Wm=gp.garray(())): """Return the weights in Wm or self.W, vectorized.""" if (Wm.size == 0): Wm = self.W if not gp.is_garray(Wm): Wm = gp.garray(Wm) Wv = Wm.reshape((Wm.size, 1)) return Wv
def fobos_nn(self, w): nu = self.tau * self.lr u, s, vt = linalg.svd(w, full_matrices=0, compute_uv=1) sdash = np.maximum(s - nu, 0) sdashzeros = np.diag(sdash) # sdashzeros = np.zeros(u.shape, dtype=np.float) # sdashzeros[:sdashtemp.shape[0], :sdashtemp.shape[1]] = sdashtemp return gnp.dot(gnp.garray(u), gnp.dot(gnp.garray(sdashzeros), gnp.garray(vt))).as_numpy_array(), s
def __init__(self, W, hbias, n_hidden, hidtype): self.W = gp.garray(W) # convert 1d arrays to 2d if len(hbias.shape) == 1: hbias = hbias.reshape((hbias.shape[0],1)) self.hbias = gp.garray(hbias) self.n_hidden = n_hidden self.hidtype = hidtype
def fobos_nn(self, w): nu = self.tau * self.lr u, s, vt = randomized_svd(w, w.shape[0]) sdash = np.maximum(s - nu, 0) sdashtemp = np.diag(sdash) sdashzeros = np.zeros(u.shape, dtype=np.float) sdashzeros[:sdashtemp.shape[0], :sdashtemp.shape[1]] = sdashtemp return gnp.dot(gnp.garray(u), gnp.dot(gnp.garray(sdashzeros), gnp.garray(vt))).as_numpy_array(), s
def compute_kernel_transformation(self, x_base, x_new): x_base = x_base if isinstance(x_base, gnp.garray) else gnp.garray(x_base) x_new = x_new if isinstance(x_new, gnp.garray) else gnp.garray(x_new) xx = x_new.dot(x_base.T) xx_base = (x_base**2).sum(axis=1) xx_new = (x_new**2).sum(axis=1) return gnp.exp(-1.0 / (2 * self.sigma**2) * (-2 * xx + xx_base + xx_new[:,gnp.newaxis]))
def compute_kernel_transformation(self, x_base, x_new): x_base = x_base if isinstance(x_base, gnp.garray) else gnp.garray(x_base) x_new = x_new if isinstance(x_new, gnp.garray) else gnp.garray(x_new) base_norm = (x_base**2).sum(axis=1) new_norm = (x_new**2).sum(axis=1) return x_new.dot(x_base.T) / (base_norm + new_norm[:,gnp.newaxis])
def grad_costfunc_gpu_ReLU(x, *args): num_input, num_hidden, num_output, inputs, lambda_val, sparsityParam, beta = args num_weights1 = (num_input + 1) * num_hidden num_weights2 = (num_hidden + 1) * num_output x = gpu.garray(x) inputs = gpu.garray(inputs) weights1 = x[0:num_weights1].reshape((num_hidden, num_input + 1)) weights2 = x[num_weights1:shape(x)[0]].reshape( (num_output, num_hidden + 1)) nData = shape(inputs)[1] data = gpu.concatenate((gpu.ones((1, nData)), inputs), axis=0) hidden_sum = gpu.dot(weights1, data) #hidden_activation = gpu.log(1+hidden_sum.exp()) relu_mask_hidden1 = gpu.ones(shape(hidden_sum)) * (hidden_sum > 0) hidden_activation = hidden_sum * relu_mask_hidden1 #hidden_derivative = hidden_sum.logistic() hidden_derivative = relu_mask_hidden1 hidden_activation = gpu.concatenate((gpu.ones( (1, nData)), hidden_activation), axis=0) hidden_derivative = gpu.concatenate((gpu.ones( (1, nData)), hidden_derivative), axis=0) outputs = gpu.dot(weights2, hidden_activation) weights1_grad = gpu.zeros(shape(weights1)) weights2_grad = gpu.zeros(shape(weights2)) p = outputs - inputs weights2_grad += gpu.dot( p, gpu.garray(transpose(hidden_activation.as_numpy_array()))) q_temp = gpu.dot(gpu.garray(transpose(weights2.as_numpy_array())), p) #q = multiply(multiply(q_temp,hidden_activation),(1-hidden_activation)) q = q_temp * hidden_derivative delta2 = gpu.dot(q, gpu.garray(transpose(data.as_numpy_array()))) weights1_grad += delta2[1:shape(delta2)[0], :] weights1_grad = weights1_grad / nData weights2_grad = weights2_grad / nData weights1_grad[:, 1:shape(weights1_grad)[1]] = weights1_grad[:, 1:shape( weights1_grad)[1]] + weights1[:, 1:shape(weights1)[1]] * lambda_val weights2_grad[:, 1:shape(weights2_grad)[1]] = weights2_grad[:, 1:shape( weights2_grad)[1]] + weights2[:, 1:shape(weights2)[1]] * lambda_val #weights1_grad = reshape(weights1_grad, num_weights1) weights1_grad = weights1_grad.reshape(num_weights1) #weights2_grad = reshape(weights2_grad, num_weights2) weights2_grad = weights2_grad.reshape(num_weights2) del x del inputs del data del p del q_temp del q del delta2 del hidden_sum del hidden_activation del weights1 del weights2 gpu.free_reuse_cache() return hstack( (weights1_grad.as_numpy_array(), weights2_grad.as_numpy_array()))
def test_gnumpy(): n = 10000 for i in range(10): a = np.random.uniform(low=0., high=1., size=(n, n)).astype(np.float32) b = np.random.uniform(low=0., high=1., size=(n, n)).astype(np.float32) ga = gpu.garray(a) gb = gpu.garray(b) ga = ga.dot(gb)
def _load_from_stream(self, f): if struct.unpack('i', f.read(4))[0] == 1: self.prev = Preprocessor.load_from_stream(f) else: self.prev = None D = struct.unpack('i', f.read(4))[0] self.avg = gnp.garray(np.fromstring(f.read(4*D), dtype=np.float32)) self.m = gnp.garray(np.fromstring(f.read(4*D*D), dtype=np.float32).reshape(D,D))
def forward(self): """ Perform a forward pass to calculate the activation (objective) """ numExamples = self.output_port.getOutput().shape[0] self.objective = -gpu.sum(gpu.garray(self.target_port.getOutput()) * gpu.log(gpu.garray(self.output_port.getOutput()))) self.objective += -gpu.sum((1.0 - self.target_port.getOutput())*(gpu.log(1.000001 - self.output_port.getOutput()))) self.objective /= numExamples
def test_symmetric(): v = gnp.garray(np.random.uniform(size=(N, NVIS))) h = gnp.garray(np.random.uniform(size=(N, NHID))) stats = fang.Statistics.from_activations(v, h) with misc.gnumpy_conversion_check('allow'): assert np.allclose(stats.S_unary, stats.S_unary.T) assert np.allclose(stats.S_pair, stats.S_pair.as_numpy_array().swapaxes(2, 3))
def compute_kernel_transformation(self, x_base, x_new): x_base = x_base if isinstance(x_base, gnp.garray) else gnp.garray(x_base) x_new = x_new if isinstance(x_new, gnp.garray) else gnp.garray(x_new) base_norm = (x_base**2).sum(axis=1) new_norm = (x_new**2).sum(axis=1) return x_new.dot(x_base.T) / (base_norm + new_norm[:, gnp.newaxis])
def glog_l_grad_stoch(self, Wmat, coin, st): grad = np.zeros((self.dim1, self.dim2), dtype=np.float) ggrad = gnp.garray(grad) gWmat = gnp.garray(Wmat) for s in range(st): n = coin[s] dif = gnp.outer(gnp.garray(self.Xi[n][0].T), gnp.garray(self.Xi[n][2])) - gnp.outer(gnp.garray(self.Xi[n][1].T), gnp.garray(self.Xi[n][2])) ggrad = ggrad + self.Y[n] * dif * logistic(-self.Y[n] * (gnp.dot(gnp.garray(self.Xi[n][0]),gnp.dot(gWmat, gnp.garray(self.Xi[n][2].T))) - gnp.dot(gnp.garray(self.Xi[n][1]),gnp.dot(gWmat, gnp.garray(self.Xi[n][2].T)))).as_numpy_array())[0,0] return ggrad.as_numpy_array()
def init_params(self, embed_map, count_dict, L): """ Initializes embeddings and context matricies """ prng = RandomState(self.seed) # Pre-trained word embedding matrix if embed_map != None: R = np.zeros((self.K, self.V)) for i in range(self.V): word = count_dict[i] if word in embed_map: R[:,i] = embed_map[word] else: R[:,i] = embed_map['*UNKNOWN*'] R = gpu.garray(R) else: r = np.sqrt(6) / np.sqrt(self.K + self.V + 1) R = prng.rand(self.K, self.V) * 2 * r - r R = gpu.garray(R) bw = gpu.zeros((1, self.V)) # Context C = 0.01 * prng.randn(self.context, self.K, self.K) C = gpu.garray(C) # Image context M = 0.01 * prng.randn(self.h, self.K) M = gpu.garray(M) # Hidden layer r = np.sqrt(6) / np.sqrt(self.D + self.h + 1) J = prng.rand(self.D, self.h) * 2 * r - r J = gpu.garray(J) bj = gpu.zeros((1, self.h)) # Initial deltas used for SGD deltaR = gpu.zeros(np.shape(R)) deltaC = gpu.zeros(np.shape(C)) deltaB = gpu.zeros(np.shape(bw)) deltaM = gpu.zeros(np.shape(M)) deltaJ = gpu.zeros(np.shape(J)) deltaBj = gpu.zeros(np.shape(bj)) self.R = R self.C = C self.bw = bw self.M = M self.J = J self.bj = bj self.deltaR = deltaR self.deltaC = deltaC self.deltaB = deltaB self.deltaM = deltaM self.deltaJ = deltaJ self.deltaBj = deltaBj
def _load_from_stream(self, f): if struct.unpack('i', f.read(4))[0] == 1: self.prev = Preprocessor.load_from_stream(f) else: self.prev = None D = struct.unpack('i', f.read(4))[0] self.avg = gnp.garray(np.fromstring(f.read(4 * D), dtype=np.float32)) self.m = gnp.garray( np.fromstring(f.read(4 * D * D), dtype=np.float32).reshape(D, D))
def compute_kernel_transformation(self, x_base, x_new): x_base = x_base if isinstance(x_base, gnp.garray) else gnp.garray(x_base) x_new = x_new if isinstance(x_new, gnp.garray) else gnp.garray(x_new) xx = x_new.dot(x_base.T) xx_base = (x_base**2).sum(axis=1) xx_new = (x_new**2).sum(axis=1) return (-2 * xx + xx_base + xx_new[:, gnp.newaxis])
def load_trained_rbm(fname): """Load a previously trained RBM""" if fname[-2:] == 'pk': return storage.load(fname) elif fname[-3:] == 'mat': vars = scipy.io.loadmat(fname) return binary_rbms.RBM(gnp.garray(vars['visbiases'].ravel()), gnp.garray(vars['hidbiases'].ravel()), gnp.garray(vars['vishid'])) else: raise RuntimeError('Unknown format: {}'.format(fname))
def _load_from_stream(self, f): self._param_id, self.in_dim, self.out_dim, self.dropout = \ struct.unpack('iiif', f.read(4*4)) self.W = gnp.garray(np.fromstring(f.read(self.in_dim * self.out_dim * 4), dtype=np.float32).reshape(self.in_dim, self.out_dim)) self.b = gnp.garray(np.fromstring(f.read(self.out_dim * 4), dtype=np.float32)) self.W_grad = self.W * 0 self.b_grad = self.b * 0 self.param_size = self.W.size + self.b.size
def matrix_weights(self, Wv=gp.garray(())): """Return the weights in Wv, or self.W, matrized.""" if (Wv.size == 0): Wm = self.Wm else: if not gp.is_garray(Wv): Wv = gp.garray(Wv) if (Wv.size != self.weight_count()): raise Exception('Wrong-sized Wv.') Wm = Wv.reshape((self.dim_output,self.dim_input)) return Wm