def trainOneBatch(self, img, txt, epoch, imgcost, txtcost, diffcost=1.0): img = gp.as_garray(img) txt = gp.as_garray(txt) if self.debug: self.gradientCheck(img, txt) sys.exit(0) ia = self.isae.forward2Top(img, training=True) ta = self.tsae.forward2Top(txt, training=True) if not self.fix_img_path and (imgcost > 0 or diffcost > 0): g, irecloss = self.getSinglePathGrad(self.isae, ia, ta, imgcost, diffcost) self.isae.updateParams(epoch, g, self.isae.ae) else: irecloss = 0 if not self.fix_txt_path and (txtcost > 0 or diffcost > 0): g, trecloss = self.getSinglePathGrad(self.tsae, ta, ia, txtcost, diffcost) self.tsae.updateParams(epoch, g, self.tsae.ae) else: trecloss = 0 perf = [irecloss, trecloss] for i in range(1, self.depth): perf.append(self.getDiffLoss(ia[i], ta[i])) a = ia[1:self.depth] + ta[1:self.depth] ae = self.isae.ae[1:] + self.tsae.ae[1:] for i in range(len(a)): perf.append(ae[i].computeSparsity(a[i])) return np.array(perf)
def forward_prop(self, X=None, T=10, h_init=None, **kwargs): """ options: - X can be None, when there's no input, then T must be specified - if X is not None, T will not be used - an extra h_init can be given to the forward prop to feed into the first hidden state activation. """ if X is not None and self.has_input: X = gnp.as_garray(X) self.X = X T = X.shape[0] self.A = X.dot(self.W_ih) + self.b else: self.X = None self.A = self.b.tile((T, 1)) self.H = gnp.empty((T, self.out_dim)) if h_init is not None: self.h_init = gnp.as_garray(h_init) self.A[0] += self.h_init.reshape(1, -1).dot(self.W_hh) else: self.h_init = None self.H[0] = self.nonlin.forward_prop(self.A[0]) for t in range(1, T): self.A[t] += self.H[t - 1].reshape(1, -1).dot(self.W_hh) self.H[t] = self.nonlin.forward_prop(self.A[t]) return self.H
def reverse(self, processed_x): processed_x = gnp.as_garray(processed_x) sqrcov = gnp.as_garray(self.sqrcov) x = processed_x.dot(sqrcov) + self.avg if self.prev: return self.prev.reverse(x) else: return x
def train(self, x): if self.prev: x = self.prev.process(x) x = gnp.as_garray(x) self.avg = x.mean(axis=0) cov = (x - self.avg).T.dot(x - self.avg) / x.shape[0] cov = gnp.as_numpy_array(cov) self.sqrcov = la.cholesky(cov + np.eye(cov.shape[0]) * 1e-5) self.m = gnp.as_garray(la.inv(self.sqrcov + np.eye(x.shape[1]) * 1e-5))
def _dist_euclidean(X, Y): """ d_ij = (x_i - y_j)^2 """ X = gnp.as_garray(X) Y = gnp.as_garray(Y) X_diag = (X*X).sum(axis=1) Y_diag = (Y*Y).sum(axis=1) return gnp.sqrt(-2 * X.dot(Y.T) + X_diag.reshape(-1,1) + Y_diag.reshape(1,-1) + 1e-3)
def setUp(self): self.theta = gpu.as_garray( ([0.040281, -0.034031, 0.075200, 0.071569], [0.013256, 0.092686, -0.070016, 0.093055])) self.theta2 = gpu.as_garray([[0.1150530, 0.1013294, -0.0686610], [-0.0459608, 0.0020356, -0.0995257], [0.0948434, 0.0686487, 0.0481420]]) self.theta3 = gpu.as_garray( [[0.1007928, 0.1168322, -0.0497762, -0.0658923], [-0.0841614, -0.0378504, -0.0918123, 0.0031022]])
def inference(self, imgpath, txtpath, statpath=None): """map input featuers into latent features, do normalization if statpath is available""" imgData = gp.garray(np.load(imgpath)) txtData = gp.garray(np.load(txtpath)) if statpath: stat = np.load(statpath) mean = gp.as_garray(stat['mean']) std = gp.as_garray(stat['std']) imgData -= mean imgData /= std imgcode, txtcode = self.getReps(imgData, txtData) return imgcode, txtcode
def prepareStat(self, path): path = path.rstrip('/ ') stat_file = path + '_stat.npz' if os.path.exists(stat_file): stat = np.load(stat_file) self.mean = gp.as_garray(stat['mean']) self.std = gp.as_garray(stat['std']) else: self.mean, self.std = self.computeStat() np.savez(stat_file, mean=self.mean.as_numpy_array(), std=self.std.as_numpy_array())
def finetune(self, train_xy, batch_size, learning_rate, momentum): (train_set_x, train_set_y) = train_xy train_set_x = gnp.as_garray(train_set_x) train_set_y = gnp.as_garray(train_set_y) self.feedforward(train_set_x) self.backpropagation(train_set_y) self.gradient_update(batch_size, learning_rate, momentum) self.errors = gnp.sum((self.final_layer_output - train_set_y) ** 2, axis=1) return self.errors.as_numpy_array()
def load_data(self, x_train, t_train, x_val=None, t_val=None): x_train = np.array([gnp.as_garray(x) for x in x_train], dtype=np.object) t_train = np.array([gnp.as_garray(t) for t in t_train], dtype=np.object) if x_val is not None and t_val is not None: x_val = np.array([gnp.as_garray(x) for x in x_val], dtype=np.object) t_val = np.array([gnp.as_garray(t) for t in t_val], dtype=np.object) super(SequenceLearner, self).load_data(x_train, t_train, x_val=x_val, t_val=t_val)
def initWeights(self): if self.initType == 'zeroMean': r0 = -self.initRange / 2.0 r1 = self.initRange / 2.0 elif self.initType == 'positive': r0 = 0.0 r1 = self.initRange else: raise Exception('Unknown initialization type: ' + self.initType) if self.bias: if self.biasInitConst >= 0.0: self.W = np.concatenate( (self.random.uniform(r0, r1, (self.inputDim, self.outputDim)), np.ones((1, self.outputDim)) * self.biasInitConst), axis=0) else: self.W = self.random.uniform( r0, r1, (self.inputDim + 1, self.outputDim)) else: self.W = self.random.uniform(-self.initRange / 2.0, self.initRange / 2.0, (self.inputDim, self.outputDim)) if self.gpu: self.W = gpu.as_garray(self.W.astype('float32'))
def compute_not_weighted_loss_and_grad(self, pred, compute_grad=False): pred = gnp.as_garray(pred) y = gnp.exp(pred - pred.max(axis=1)[:, gnp.newaxis]) y = y / y.sum(axis=1)[:, gnp.newaxis] return -(self.target * gnp.log(y + _SMALL_CONSTANT)).sum(), y - self.target
def compute_not_weighted_loss_and_grad(self, pred, compute_grad=False): pred = gnp.as_garray(pred) M = 1 - pred * self.target loss = (((M > 0) * M)**2).sum() grad = -2 * ((M > 0) * self.target * M) if compute_grad else gnp.zeros( pred.shape) return loss, grad
def forward(self, X): self.X = X # Num of examples N = X.shape[0] # Timespan T = X.shape[1] # Windows size S = self.windowSize # Channels D = self.numChannels # Num filters F = self.numFilters Z = np.zeros((N, T - S + 1, S, D), X.dtype) for i in range(T - S + 1): Z[:, i, :, :] = X[:, i:i + S, :] Z = Z.reshape(N * (T - S + 1), S * D) if self.gpu: Z = gpu.as_garray(Z.astype('float32')) Y = gpu.dot(Z, self.W) Y = gpu.as_numpy_array(Y) else: Y = np.dot(Z, self.W) Y = Y.reshape(N, T - S + 1, F) self.Z = Z return Y
def recover_patches_from_responses(self, resp): R = gnp.as_garray(resp) R /= (R.sum(axis=1).reshape(-1, 1) + 1e-10) P = R.dot(self.C) if self.prep is not None: P = self.prep.reverse(P) return P.asarray()
def recover_patches_from_responses(self, resp, hard_assignment=False): if hard_assignment: P = self.C[resp.argmax(axis=1)] else: P = gnp.as_garray(resp).dot(self.C) if self.prep is not None: P = self.prep.reverse(P) return P.asarray()
def reload(self, _pt_params): """ """ if self.p is None: self.p = gzeros(self.size) pt_params = gpu.as_garray(_pt_params) self.prep_layer(pt_params) del pt_params
def compute_patch_responses(self, patches): if self.prep is not None: patches = self.prep.process(patches) patches = gnp.as_garray(patches) D = self.f_dist(patches, self.C).asarray().astype(np.float64) d_avg = D.mean(axis=0) R = (d_avg.reshape(1, -1) - D) return R * (R > 0)
def compute_patch_responses(self, patches): if self.prep is not None: patches = self.prep.process(patches) patches = gnp.as_garray(patches) D = self.f_dist(patches, self.C).asarray().astype(np.float64) idx = D.argmin(axis=1) R = np.zeros((patches.shape[0], self.n_oc), dtype=np.float32) R[np.arange(R.shape[0]), idx] = 1 return R
def extractValidationReps(self, dat, output_path): """ extract representations of input data, i.e., top layer vector dat may be the option name for the path of input data save it to disk, location is read from config """ dat = gp.as_garray(dat) reps = self.getReps(dat) np.save(output_path, reps)
def initWithPCA(self): train_data_path = self.readField(self.config, self.name, "train_data") train_data_files = train_data_path.split(',') train_data_list = [ self.loadDirOrFile(path) for path in train_data_files ] train_data = np.concatenate(train_data_list, axis=1) u, s, v = np.linalg.svd(train_data.T, full_matrices=1) bases = u[:, 0:self.hDim] self.W1 = gp.as_garray(bases) x = np.dot(train_data, self.W1.as_numpy_array()) w = np.dot(np.dot(np.linalg.inv(np.dot(x.T, x)), x.T), train_data) # self.W2 = gp.as_garray(bases.T) self.W2 = gp.as_garray(w) self.initUpdate() self.initHyperParam(self.config, self.name)
def parameter_prediction(self, test_set_x): test_set_x = gnp.as_garray(test_set_x) current_activations = test_set_x for i in range(self.n_layers): current_activations = gnp.tanh(gnp.dot(current_activations, self.W_params[i]) + self.b_params[i]) final_layer_output = gnp.dot(current_activations, self.W_params[self.n_layers]) + self.b_params[self.n_layers] return final_layer_output.as_numpy_array()
def __init__(self, outputDim, activeFn, inputNames=None, initRange=1.0, bias=True, biasInitConst=-1.0, initSeed=2, needInit=True, initWeights=0, initType='zeroMean', learningRate=0.0, learningRateAnnealConst=0.0, momentum=0.0, deltaMomentum=0.0, weightClip=0.0, gradientClip=0.0, weightRegConst=0.0, outputdEdX=True, defaultValue=0.0, gpu=use_gpu, name=None): Stage.__init__(self, name=name, inputNames=inputNames, outputDim=outputDim, defaultValue=defaultValue, learningRate=learningRate, learningRateAnnealConst=learningRateAnnealConst, momentum=momentum, deltaMomentum=deltaMomentum, weightClip=weightClip, gradientClip=gradientClip, weightRegConst=weightRegConst, gpu=gpu, outputdEdX=outputdEdX) self.bias = bias self.activeFn = activeFn self.inputDim = None self.random = np.random.RandomState(initSeed) if not needInit: if self.gpu: self.W = gnp.as_garray(initWeights) else: self.W = initWeights else: # Lazy initialize the weights until the first data arrives self.W = None self.initRange = initRange self.biasInitConst = biasInitConst self.initType = initType self.X = 0 self.Y = 0 pass
def array(x, dtype=None, **kwargs): if gnp.is_garray(x): if dtype is gpu_float32: return x else: return np.array(gnp.as_numpy_array(x), dtype=dtype, **kwargs) else: if dtype is gpu_float32: return gnp.as_garray(np.array(x, **kwargs)) else: return np.array(x, dtype=dtype, **kwargs)
def forward(self, X): if self.gpu: self.X = [] self.X.append(gpu.as_garray(X[0].astype('float32'))) self.X.append(gpu.as_garray(X[1].astype('float32'))) if len(X) == 2: Y = self.beta * gpu.sum(self.X[0] * self.X[1], axis=self.sumAxis) elif len(X) == 3: self.X.append(gpu.as_garray(X[2].astype('float32'))) self.Z = gpu.sum(self.X[0] * self.X[1], axis=self.sumAxis) Y = self.X[2] * self.Z Y = Y.as_numpy_array(dtype='float32') else: self.X = X if len(self.X) == 2: Y = self.beta * np.sum(self.X[0] * self.X[1], axis=self.sumAxis) elif len(self.X) == 3: self.Z = np.sum(self.X[0] * self.X[1], axis=self.sumAxis) Y = self.X[2] * self.Z return Y
def gradient_check(self, X, y, dweights): EPSILON = g.as_garray(1e-4) ERRORTHRESHOLD = g.as_garray(1e-2) g.GNUMPY_CPU_PRECISION = 64 g.acceptable_number_types = "no nans or infs" for ind in range(len(self.weights)): w, b = self.weights[ind] dw, db = dweights[ind] for i in range(len(b)): b[i] = b[i] + EPSILON fw = self.predict_proba(X) op = self.f_score(y, fw) b[i] -= 2 * EPSILON fw = self.predict_proba(X) om = self.f_score(y, fw) b[i] += EPSILON rs = (g.as_garray(op) - g.as_garray(om)) / (EPSILON * 2.0) / g.as_garray(len(X)) if g.abs(rs - g.as_garray(db[i])) > ERRORTHRESHOLD: print ind, i, rs, db[i], type(rs), type(db) assert (0) for i in range(w.shape[0]): for j in range(w.shape[1]): w[i, j] += EPSILON fw = self.predict_proba(X) op = self.f_score(y, fw) w[i, j] -= 2 * EPSILON fw = self.predict_proba(X) om = self.f_score(y, fw) w[i, j] += EPSILON rs = (g.as_garray(op) - g.as_garray(om)) / ( EPSILON * 2.0) / g.as_garray(len(X)) if g.abs(rs - g.as_garray(dw[i, j])) > ERRORTHRESHOLD: print ind, i, j, rs, dw[i, j], type(w), type(dw) assert (0) print "gradient_check passed"
def cast_array_to_local_type(arr): """Given an array (HDF5, numpy, gnumpy) return an array that matches the current theano configuration. That is, if the current device is GPU, make it a gnumpy.garry. If the current theano.config.floatX does not match the dtype of arr, return an array that does.""" res = arr if GPU and not isinstance(arr, gp.garray): warnings.warn('Implicilty converting numpy.ndarray to gnumpy.garray') res = gp.as_garray(res) elif isinstance(arr, np.ndarray) and arr.dtype != theano.config.floatX: res = arr.astype(theano.config.floatX) return res
def backward(self, dEdY): # Need to generalize, but now, let's assume it's the attention model. dEdX = [] if self.gpu: if len(self.X) == 2: dEdY = dEdY.reshape(dEdY.shape[0], 1, dEdY.shape[1]) dEdY = gpu.as_garray(dEdY) dEdX1 = self.beta * gpu.sum(dEdY * self.X[1], axis=2) dEdX2 = self.beta * dEdY * self.X[0] dEdX.append(dEdX1.as_numpy_array(dtype='float32')) dEdX.append(dEdX2.as_numpy_array(dtype='float32')) elif len(self.X) == 3: dEdY = gpu.as_garray(dEdY) dEdY2 = dEdY.reshape(dEdY.shape[0], 1, dEdY.shape[1]) dEdY2 = gpu.as_garray(dEdY2) dEdX1 = self.X[2] * gpu.sum(dEdY2 * self.X[1], axis=2) dEdX2 = self.X[2].reshape(self.X[2].shape[0], 1, 1) * dEdY2 * self.X[0] dEdX3 = gpu.sum(dEdY * self.Z, axis=-1).reshape(self.X[2].shape[0], 1) dEdX.append(dEdX1.as_numpy_array(dtype='float32')) dEdX.append(dEdX2.as_numpy_array(dtype='float32')) dEdX.append(dEdX3.as_numpy_array(dtype='float32')) else: if len(self.X) == 2: dEdY = dEdY.reshape(dEdY.shape[0], 1, dEdY.shape[1]) dEdX.append(self.beta * np.sum(dEdY * self.X[1], axis=2)) dEdX.append(self.beta * dEdY * self.X[0]) elif len(self.X) == 3: dEdY2 = dEdY.reshape(dEdY.shape[0], 1, dEdY.shape[1]) dEdX.append(self.X[2] * np.sum(dEdY2 * self.X[1], axis=2)) dEdX.append(self.X[2].reshape(self.X[2].shape[0], 1, 1) * dEdY2 * self.X[0]) dEdX.append( np.sum(dEdY * self.Z, axis=-1).reshape(self.X[2].shape[0], 1)) return dEdX
def __init__(self, numChannels, windowSize, numFilters, inputNames=None, initRange=1.0, initSeed=2, needInit=True, initWeights=None, learningRate=0.0, learningRateAnnealConst=0.0, momentum=0.0, deltaMomentum=0.0, weightClip=0.0, gradientClip=0.0, weightRegConst=0.0, defaultValue=0.0, outputdEdX=True, gpu=use_gpu, name=None): Stage.__init__(self, name=name, inputNames=inputNames, outputDim=numFilters, defaultValue=defaultValue, learningRate=learningRate, learningRateAnnealConst=learningRateAnnealConst, momentum=momentum, deltaMomentum=deltaMomentum, weightClip=weightClip, gradientClip=gradientClip, weightRegConst=weightRegConst, gpu=gpu, outputdEdX=outputdEdX) self.numFilters = numFilters self.numChannels = numChannels self.windowSize = windowSize self.random = np.random.RandomState(initSeed) if needInit: self.W = self.random.uniform( -initRange / 2.0, initRange / 2.0, (self.windowSize * self.numChannels, self.numFilters)) else: self.W = initWeights if self.gpu: self.W = gnp.as_garray(self.W.astype('float32')) self.X = 0 self.Y = 0
def backward(self, dEdY): dEdZ = self.activeFn.backward(dEdY, self.Y, 0) if self.gpu: gdEdZ = gpu.as_garray(dEdZ.astype('float32')) self.dEdW = gpu.dot(self.X.transpose(), gdEdZ) if self.bias: dEdX = gpu.dot(gdEdZ, self.W[:-1, :].transpose()) else: dEdX = gpu.dot(gdEdZ, self.W.transpose()) dEdX = gpu.as_numpy_array(dEdX) else: self.dEdW = np.dot(self.X.transpose(), dEdZ) if self.bias: dEdX = np.dot(dEdZ, self.W[:-1, :].transpose()) else: dEdX = np.dot(dEdZ, self.W.transpose()) return dEdX if self.outputdEdX else None