def lecun_lcn(self, X, kernel_size=7, threshold = 1e-4, use_divisor=False): """ Yann LeCun's local contrast normalization Orginal code in Theano by: Guillaume Desjardins """ filter_shape = (1, 1, kernel_size, kernel_size) filters = gaussian_filter(kernel_size).reshape(filter_shape) filters = shared(_asarray(filters, dtype=floatX), borrow=True) convout = conv2d(X, filters=filters, filter_shape=filter_shape, border_mode='full') # For each pixel, remove mean of kernel_sizexkernel_size neighborhood mid = int(floor(kernel_size/2.)) new_X = X - convout[:,:,mid:-mid,mid:-mid] if use_divisor: # Scale down norm of kernel_sizexkernel_size patch sum_sqr_XX = conv2d(T.sqr(T.abs_(X)), filters=filters, filter_shape=filter_shape, border_mode='full') denom = T.sqrt(sum_sqr_XX[:,:,mid:-mid,mid:-mid]) per_img_mean = denom.mean(axis=[2,3]) divisor = T.largest(per_img_mean.dimshuffle(0,1,'x','x'), denom) divisor = T.maximum(divisor, threshold) new_X /= divisor return new_X#T.cast(new_X, floatX)
def lcn_3d_input(data, kernel_shape, n_maps): """ :param data: [examples, depth, filters, height, width] :param kernel_shape: int :param n_maps: int :return: new_x: [examples, depth, filters, height, width] """ # create symbolic variable for the input data ftensor5 = T.TensorType('float32', [False] * 5) x = ftensor5() # # determine the number of maps # n_maps = data.shape[2] # create 3d filter that spans across all channels / feature maps # todo: kernel is not really in 3d; need 3d implementation instead of 2d repeated across third dimension # todo: alternative is to keep 2d kernel and extend short range given data size in z-plane; change first kernel_sh. filter_shape = (1, kernel_shape[0], n_maps, kernel_shape[1], kernel_shape[2]) filters = np.resize(gaussian_filter(kernel_shape[1]), filter_shape) filters = filters / np.sum(filters) filters = sharedX(filters) # convolve filter with input signal convolution_out = conv3d( signals=x, filters=filters, signals_shape=data.shape, filters_shape=filter_shape, border_mode='valid' ) # for each pixel, remove mean of 9x9 neighborhood mid_0 = int(np.floor(kernel_shape[0] / 2.)) mid_1 = int(np.floor(kernel_shape[1] / 2.)) mid_2 = int(np.floor(kernel_shape[2] / 2.)) mean = T.tile(convolution_out, (1, 1, n_maps, 1, 1)) padded_mean = T.zeros_like(x) padded_mean = T.set_subtensor(padded_mean[:, mid_0:-mid_0, :, mid_1:-mid_1, mid_2:-mid_2], mean) centered_data = data - padded_mean # scale down norm of 9x9 patch if norm is bigger than 1 sum_sqr_xx = conv3d(signals=T.sqr(data), filters=filters) denominator = T.tile(T.sqrt(sum_sqr_xx), (1, 1, n_maps, 1, 1)) padded_denominator = T.ones_like(x) padded_denominator = T.set_subtensor( padded_denominator[:, mid_0:-mid_0, :, mid_1:-mid_1, mid_2:-mid_2], denominator ) per_img_mean = padded_denominator.mean(axis=[1, 2, 3, 4]) divisor = T.largest( per_img_mean.dimshuffle(0, 'x', 'x', 'x', 'x'), padded_denominator ) new_x = centered_data / T.maximum(1., divisor) # compile theano function f = theano.function([x], new_x) return f(data)
def lecun_lcn(self, X, kernel_size=7, threshold=1e-4, use_divisor=True): """ Yann LeCun's local contrast normalization Orginal code in Theano by: Guillaume Desjardins """ filter_shape = (1, 1, kernel_size, kernel_size) filters = self.gaussian_filter(kernel_size).reshape(filter_shape) # filters = shared(_asarray(filters, dtype=floatX), borrow=True) filters = K.variable(filters) convout = K.conv2d(X, filters, filter_shape=filter_shape, border_mode='same') # For each pixel, remove mean of kernel_sizexkernel_size neighborhood new_X = X - convout if use_divisor: # Scale down norm of kernel_sizexkernel_size patch sum_sqr_XX = K.conv2d(K.pow(K.abs(new_X), 2), filters, filter_shape=filter_shape, border_mode='same') denom = T.sqrt(sum_sqr_XX) per_img_mean = denom.mean(axis=[2, 3]) divisor = T.largest(per_img_mean.dimshuffle(0, 1, 'x', 'x'), denom) divisor = T.maximum(divisor, threshold) new_X /= divisor return new_X
def LCN(data, kernel_shape): # X = T.ftensor4() filter_shape = (1, 1, kernel_shape, kernel_shape) filters = sharedX(gaussian_filter(kernel_shape).reshape(filter_shape)) convout = conv2d(data, filters=filters, border_mode='full') # For each pixel, remove mean of 9x9 neighborhood mid = int(np.floor(kernel_shape/ 2.)) centered_X = data - convout[:,:,mid:-mid,mid:-mid] # Scale down norm of 9x9 patch if norm is bigger than 1 sum_sqr_XX = conv2d(T.sqr(data), filters=filters, border_mode='full') denom = T.sqrt(sum_sqr_XX[:,:,mid:-mid,mid:-mid]) per_img_mean = denom.mean(axis = [2,3]) divisor = T.largest(per_img_mean.dimshuffle(0, 1, 'x', 'x'), denom) new_X = centered_X / T.maximum(1., divisor) # new_X = new_X[:,:,mid:-mid, mid:-mid] new_X = T.extra_ops.squeeze(new_X) # remove broadcastable dimension new_X = new_X[:, 0, :, :] # TODO: check whether this forced squeeze is good return new_X
def apply(self, dataset, can_fit=True): x = dataset.get_design_matrix() denseX = T.matrix(dtype=x.dtype) image_shape = (len(x),) + self.img_shape X = denseX.reshape(image_shape) ones_patch = T.ones((1,1,9,9), dtype=x.dtype) convout = conv.conv2d(input = X, filters = ones_patch / (9.*9.), image_shape = image_shape, filter_shape = (1, 1, 9, 9), border_mode='full') # For each pixel, remove mean of 3x3 neighborhood centered_X = X - convout[:,:,4:-4,4:-4] # Scale down norm of 3x3 patch if norm is bigger than 1 sum_sqr_XX = conv.conv2d(input = centered_X**2, filters = ones_patch, image_shape = image_shape, filter_shape = (1, 1, 9, 9), border_mode='full') denom = T.sqrt(sum_sqr_XX[:,:,4:-4,4:-4]) xdenom = denom.reshape(X.shape) new_X = centered_X / T.largest(1.0, xdenom) new_X = T.flatten(new_X, outdim=2) f = theano.function([denseX], new_X) dataset.set_design_matrix(f(x))
def _lcn(image, im_shape, fmaps, pool_depth, width, sigma): """ """ import theano import theano.tensor as T from theano.tensor.nnet import conv border = width//2 filters = _lcn_filters(fmaps, pool_depth, width, sigma) filter_shape = filters.shape blurred_mean = conv.conv2d(input=image, filters=filters, image_shape=im_shape, filter_shape=filter_shape, border_mode='full') image -= blurred_mean[:, :, border:-border, border:-border] image_sqr = T.sqr(image) blurred_sqr = conv.conv2d(input=image_sqr, filters=filters, image_shape=im_shape, filter_shape=filter_shape, border_mode='full') div = T.sqrt(blurred_sqr[:, :, border:-border, border:-border]) fm_mean = div.mean(axis=[2, 3]) div = T.largest(fm_mean.dimshuffle(0, 1, 'x', 'x'), div) + 1e-6 image = image/div return T.cast(image, theano.config.floatX)
def setup_theano(self): self.vocab_mat = T.fmatrix('vocab') self.sample = T.fmatrix('sample') b = T.fvector('b') W = T.fmatrix('W') f = self.transform_function( W, b, self.wordvec_transform(self.sample, self.vocab_mat)) s = T.sum(f) self.corrupt_sample = T.fmatrix('corrupt-sample') f_corrupt = self.transform_function( W, b, self.wordvec_transform(self.corrupt_sample, self.vocab_mat)) s_corrupt = T.sum(f_corrupt) J = T.largest(0, 1 - s + s_corrupt) self.grad = theano.grad(J, [b, W, self.vocab_mat]) self.grad_fn = theano.function( [self.sample, self.corrupt_sample, b, W, self.vocab_mat], self.grad, allow_input_downcast=True) self.exec_fn = theano.function([self.sample, b, W, self.vocab_mat], f, allow_input_downcast=True)
def lecun_lcn(self, X, kernel_size=9, threshold=1e-4, use_divisor=True, border=False): """ Yann LeCun's local contrast normalization Orginal code in Theano by: Guillaume Desjardins """ filter_shape = (1, 1, kernel_size, kernel_size) filters = gaussian_filter(kernel_size).reshape(filter_shape) filters = shared(_asarray(filters, dtype=floatX), borrow=True) mid = int(floor(kernel_size / 2.)) if border: r = (kernel_size - 1) / 2 up = X[:, :, 0:1, :].repeat(r, axis=2) down = X[:, :, -1:, :].repeat(r, axis=2) X_ = T.concatenate([up, X, down], axis=2) left = X_[:, :, :, 0:1].repeat(r, axis=3) right = X_[:, :, :, -1:].repeat(r, axis=3) X_ = T.concatenate([left, X_, right], axis=3) convout = conv2d(X_, filters=filters, filter_shape=filter_shape, border_mode='valid') centered_X = X - convout else: convout = conv2d(X, filters=filters, filter_shape=filter_shape, border_mode='full') # For each pixel, remove mean of kernel_sizexkernel_size neighborhood centered_X = X - convout[:, :, mid:-mid, mid:-mid] if use_divisor: # Scale down norm of kernel_sizexkernel_size patch sum_sqr_XX = conv2d(T.sqr(X), filters=filters, filter_shape=filter_shape, border_mode='full') sum_sqr_XX = sum_sqr_XX[:, :, mid:-mid, mid:-mid] sum_sqr_XX = T.maximum(sum_sqr_XX, threshold) denom = T.sqrt(sum_sqr_XX) # denom = abs(centered_X) per_img_mean = denom.mean(axis=[2, 3]) divisor = T.largest(per_img_mean.dimshuffle(0, 1, 'x', 'x'), denom) divisor = T.maximum(divisor, threshold) new_X = centered_X / divisor return new_X else: return centered_X
def lecun_lcn(self, X, kernel_size=7, threshold=1e-4, use_divisor=False): """ Yann LeCun's local contrast normalization Orginal code in Theano by: Guillaume Desjardins """ filter_shape = (1, 1, kernel_size, kernel_size) filters = gaussian_filter(kernel_size).reshape(filter_shape) filters = shared(_asarray(filters, dtype=floatX), borrow=True) convout = conv2d(X, filters=filters, filter_shape=filter_shape, border_mode='full') # For each pixel, remove mean of kernel_sizexkernel_size neighborhood mid = int(floor(kernel_size / 2.)) new_X = X - convout[:, :, mid:-mid, mid:-mid] if use_divisor: # Scale down norm of kernel_sizexkernel_size patch sum_sqr_XX = conv2d(T.sqr(T.abs_(new_X)), filters=filters, filter_shape=filter_shape, border_mode='full') denom = T.sqrt(sum_sqr_XX[:, :, mid:-mid, mid:-mid]) per_img_mean = denom.mean(axis=[2, 3]) divisor = T.largest(per_img_mean.dimshuffle(0, 1, 'x', 'x'), denom) divisor = T.maximum(divisor, threshold) new_X /= divisor return new_X #T.cast(new_X, floatX)
def __init__(self): self.configured = False self.vocab_mat = T.fmatrix('vocab') # x has size num_samples x (window_size * vec_width) self.x = T.fmatrix('x') b = T.fvector('b') W = T.fmatrix('W') f = 1 / (1 + T.exp(-(W*(self.x.dot(self.vocab_mat) + b)))) s = T.sum(f) self.exec_fn = theano.function([self.x, b, W, self.vocab_mat], f, allow_input_downcast=True) self.x_c = T.fmatrix('x_c') f_c = 1 / (1 + T.exp(-(W*(self.x_c.dot(self.vocab_mat)) + b))) s_c = T.sum(f_c) J = T.largest(0, 1 - s + s_c) self.grad = theano.grad(J, [b, W, self.vocab_mat]) self.grad_fn = theano.function( [self.x, self.x_c, b, W, self.vocab_mat], self.grad, allow_input_downcast=True)
def apply(self, dataset, can_fit=True): x = dataset.get_design_matrix() denseX = T.matrix(dtype=x.dtype) image_shape = (len(x),) + self.img_shape X = denseX.reshape(image_shape) filters = gaussian_filter_9x9().reshape((1,1,9,9)) convout = conv.conv2d(input = X, filters = filters, image_shape = image_shape, filter_shape = (1, 1, 9, 9), border_mode='full') # For each pixel, remove mean of 9x9 neighborhood centered_X = X - convout[:,:,4:-4,4:-4] # Scale down norm of 9x9 patch if norm is bigger than 1 sum_sqr_XX = conv.conv2d(input = centered_X**2, filters = filters, image_shape = image_shape, filter_shape = (1, 1, 9, 9), border_mode='full') denom = T.sqrt(sum_sqr_XX[:,:,4:-4,4:-4]) per_img_mean = T.mean(T.flatten(denom, outdim=3), axis=2) divisor = T.largest(per_img_mean.dimshuffle((0,1,'x','x')), denom) new_X = centered_X / divisor new_X = T.flatten(new_X, outdim=2) f = theano.function([denseX], new_X) dataset.set_design_matrix(f(x))
def __init__(self, word_vec_width, batch_size, num_hidden, learning_rate=0.1): self.num_hidden = num_hidden self.learning_rate = learning_rate self.word_vec_width = word_vec_width self.batch_size = batch_size self.vocab_mat = T.fmatrix('vocab') self.word_onehot = T.fmatrix('word_onehot') b = T.fvector('b') W = T.fmatrix('W') f = 1 / (1 + T.exp(-(W * (self.word_onehot.dot(self.vocab_mat) + b)))) s = T.sum(f) self.exec_fn = theano.function( [self.word_onehot, b, W, self.vocab_mat], f, allow_input_downcast=True) self.word_onehot_c = T.fmatrix('word_onehot_c') f_c = 1 / (1 + T.exp(-(W * (self.word_onehot_c.dot(self.vocab_mat)) + b))) s_c = T.sum(f_c) J = T.largest(0, 1 - s + s_c) self.grad = theano.grad(J, [b, W, self.vocab_mat]) self.grad_fn = theano.function( [self.word_onehot, self.word_onehot_c, b, W, self.vocab_mat], self.grad, allow_input_downcast=True)
def lecun_lcn(input, img_shape, kernel_shape, threshold=1e-4): """ Yann LeCun's local contrast normalization Orginal code in Theano by: Guillaume Desjardins """ input = input.reshape(input.shape[0], input.shape[1], input.shape[2], 1) X = tensor.matrix(dtype=input.dtype) X = X.reshape((len(input), img_shape[0], img_shape[1], 1)) filter_shape = (1, 1, kernel_shape, kernel_shape) filters = sharedX(gaussian_filter(kernel_shape).reshape(filter_shape)) input_space = Conv2DSpace(shape=img_shape, num_channels=1) transformer = Conv2D(filters=filters, batch_size=len(input), input_space=input_space, border_mode="full") convout = transformer.lmul(X) # For each pixel, remove mean of 9x9 neighborhood mid = int(numpy.floor(kernel_shape / 2.0)) centered_X = X - convout[:, mid:-mid, mid:-mid, :] # Scale down norm of 9x9 patch if norm is bigger than 1 transformer = Conv2D(filters=filters, batch_size=len(input), input_space=input_space, border_mode="full") sum_sqr_XX = transformer.lmul(X ** 2) denom = tensor.sqrt(sum_sqr_XX[:, mid:-mid, mid:-mid, :]) per_img_mean = denom.mean(axis=[1, 2]) divisor = tensor.largest(per_img_mean.dimshuffle(0, "x", "x", 1), denom) divisor = tensor.maximum(divisor, threshold) new_X = centered_X / divisor new_X = tensor.flatten(new_X, outdim=3) f = function([X], new_X) return f(input)
def LCNinput(data, kernel_shape): X = T.ftensor4() filter_shape = (1, 1, kernel_shape, kernel_shape) filters = sharedX(gaussian_filter(kernel_shape).reshape(filter_shape)) convout = conv2d(X, filters=filters, border_mode='full') # For each pixel, remove mean of 9x9 neighborhood mid = int(np.floor(kernel_shape/ 2.)) centered_X = X - convout[:,:,mid:-mid,mid:-mid] # Scale down norm of 9x9 patch if norm is bigger than 1 sum_sqr_XX = conv2d(T.sqr(X), filters=filters, border_mode='full') denom = T.sqrt(sum_sqr_XX[:,:,mid:-mid,mid:-mid]) per_img_mean = denom.mean(axis = [2,3]) divisor = T.largest(per_img_mean.dimshuffle(0,1, 'x', 'x'), denom) new_X = centered_X / T.maximum(1., divisor) # new_X = new_X[:,:,mid:-mid, mid:-mid] f = theano.function([X], new_X) return f(data)
def lecun_lcn(input, img_shape, kernel_shape): input = input.reshape(input.shape[0], input.shape[1], input.shape[2], 1) X = T.matrix(dtype=input.dtype) X = X.reshape((len(input), img_shape[0], img_shape[1], 1)) filter_shape = (1, 1, kernel_shape, kernel_shape) filters = sharedX(gaussian_filter(kernel_shape).reshape(filter_shape)) input_space = Conv2DSpace(shape = img_shape, num_channels = 1) transformer = Conv2D(filters = filters, batch_size = len(input), input_space = input_space, border_mode = 'full') convout = transformer.lmul(X) # For each pixel, remove mean of 9x9 neighborhood mid = int(np.floor(kernel_shape/ 2.)) centered_X = X - convout[:,mid:-mid,mid:-mid,:] # Scale down norm of 9x9 patch if norm is bigger than 1 transformer = Conv2D(filters = filters, batch_size = len(input), input_space = input_space, border_mode = 'full') sum_sqr_XX = transformer.lmul(X**2) denom = T.sqrt(sum_sqr_XX[:,mid:-mid,mid:-mid,:]) per_img_mean = denom.mean(axis = [1,2]) divisor = T.largest(per_img_mean.dimshuffle(0,'x', 'x', 1), denom) new_X = centered_X / divisor new_X = T.flatten(new_X, outdim=3) f = function([X], new_X) return f(input)
def get_train(U_Ot, U_R, lenW, n_facts): def phi_x1(x_t, L): return T.concatenate([L[x_t].reshape((-1,)), zeros((2*lenW,)), zeros((3,))], axis=0) def phi_x2(x_t, L): return T.concatenate([zeros((lenW,)), L[x_t].reshape((-1,)), zeros((lenW,)), zeros((3,))], axis=0) def phi_y(x_t, L): return T.concatenate([zeros((2*lenW,)), L[x_t].reshape((-1,)), zeros((3,))], axis=0) def phi_t(x_t, y_t, yp_t, L): return T.concatenate([zeros(3*lenW,), T.stack(T.switch(T.lt(x_t,y_t), 1, 0), T.switch(T.lt(x_t,yp_t), 1, 0), T.switch(T.lt(y_t,yp_t), 1, 0))], axis=0) def s_Ot(xs, y_t, yp_t, L): result, updates = theano.scan( lambda x_t, t: T.dot(T.dot(T.switch(T.eq(t, 0), phi_x1(x_t, L).reshape((1,-1)), phi_x2(x_t, L).reshape((1,-1))), U_Ot.T), T.dot(U_Ot, (phi_y(y_t, L) - phi_y(yp_t, L) + phi_t(x_t, y_t, yp_t, L)))), sequences=[xs, T.arange(T.shape(xs)[0])]) return result.sum() def sR(xs, y_t, L, V): result, updates = theano.scan( lambda x_t, t: T.dot(T.dot(T.switch(T.eq(t, 0), phi_x1(x_t, L).reshape((1,-1)), phi_x2(x_t, L).reshape((1,-1))), U_R.T), T.dot(U_R, phi_y(y_t, V))), sequences=[xs, T.arange(T.shape(xs)[0])]) return result.sum() x_t = T.iscalar('x_t') m = [x_t] + [T.iscalar('m_o%d' % i) for i in xrange(n_facts)] f = [T.iscalar('f%d_t' % i) for i in xrange(n_facts)] r_t = T.iscalar('r_t') gamma = T.scalar('gamma') L = T.fmatrix('L') # list of messages V = T.fmatrix('V') # vocab r_args = T.stack(*m) cost_arr = [0] * 2 * (len(m)-1) updates_arr = [0] * 2 * (len(m)-1) for i in xrange(len(m)-1): cost_arr[2*i], updates_arr[2*i] = theano.scan( lambda f_bar, t: T.switch(T.or_(T.eq(t, f[i]), T.eq(t, T.shape(L)-1)), 0, T.largest(gamma - s_Ot(T.stack(*m[:i+1]), f[i], t, L), 0)), sequences=[L, T.arange(T.shape(L)[0])]) cost_arr[2*i+1], updates_arr[2*i+1] = theano.scan( lambda f_bar, t: T.switch(T.or_(T.eq(t, f[i]), T.eq(t, T.shape(L)-1)), 0, T.largest(gamma + s_Ot(T.stack(*m[:i+1]), t, f[i], L), 0)), sequences=[L, T.arange(T.shape(L)[0])]) cost1, u1 = theano.scan( lambda r_bar, t: T.switch(T.eq(r_t, t), 0, T.largest(gamma - sR(r_args, r_t, L, V) + sR(r_args, t, L, V), 0)), sequences=[V, T.arange(T.shape(V)[0])]) cost = cost1.sum() for c in cost_arr: cost += c.sum() g_uo, g_ur = T.grad(cost, [U_Ot, U_R]) train = theano.function( inputs=[r_t, gamma, L, V] + m + f, outputs=[cost], updates=[(U_Ot, U_Ot-alpha*g_uo), (U_R, U_R-alpha*g_ur)]) return train
def update(s, prev_diff, v, reward, tps): max_v = float("-inf") v_template = T.zeros_like(v) for a in range(n_actions): tp = tps[s, a, :] max_v = T.largest(max_v, T.dot(tp, reward + discount * v)) new_diff = abs(v[s] - max_v) if T.lt(prev_diff, new_diff): diff = new_diff else: diff = prev_diff return (diff, T.set_subtensor(v_template[s], max_v)), {}
def update(s, prev_diff, v, reward, tps): max_v = float("-inf") v_template = T.zeros_like(v) for a in range(n_actions): tp = tps[s, a, :] max_v = T.largest(max_v, T.dot(tp, reward + discount*v)) new_diff = abs(v[s] - max_v) if T.lt(prev_diff, new_diff): diff = new_diff else: diff = prev_diff return (diff, T.set_subtensor(v_template[s], max_v)), {}
def _mmd2_and_ratio(K_XX, K_XY, K_YY, unit_diagonal=False, biased=False, min_var_est=_eps): mmd2, var_est = _mmd2_and_variance(K_XX, K_XY, K_YY, unit_diagonal=unit_diagonal, biased=biased) ratio = mmd2 / T.sqrt(T.largest(var_est, min_var_est)) return mmd2, ratio
def predict_odim(Lmm, Amm, beta_sp, hyp, X_sp, x): hyps = (hyp[:idims+1], hyp[idims+1]) kernel_func = partial(cov.Sum, hyps, self.covs) k = kernel_func(x, X_sp).flatten() mean = k.dot(beta_sp) kL = solve_lower_triangular(Lmm, k) kA = solve_lower_triangular(Amm, Lmm.T.dot(k)) variance = kernel_func(x, all_pairs=False) variance += -(kL.dot(kL) + kA.dot(kA)) variance = tt.largest(variance, 0.0) + 1e-3 return mean, variance
def apply(self, X): X_conv = nnfuns.relu(self.apply_lin(X)) #full convolution #for each pixel remove mean of (filter_size[0]xfilter_size[1]) neighbourhood mid = int(np.floor(self.filter_size[0]/2.)) #middle value X_centered = X - X_conv[:,:,mid:-mid, mid:-mid] #same shape as X X_sq = nnfuns.relu(self.apply_lin(X_centered ** 2)) denom = T.sqrt(X_sq[:,:,mid:-mid, mid:-mid]) per_img_mean = denom.mean(axis = [2,3]) divisor = T.largest(per_img_mean.dimshuffle(0,1, 'x', 'x'), denom) new_X = X_centered / T.maximum(1., divisor) #same format as input return new_X
def apply(self, X): X_conv = nnfuns.relu(self.apply_lin(X)) #full convolution #for each pixel remove mean of (filter_size[0]xfilter_size[1]) neighbourhood mid = int(np.floor(self.filter_size[0] / 2.)) #middle value X_centered = X - X_conv[:, :, mid:-mid, mid:-mid] #same shape as X X_sq = nnfuns.relu(self.apply_lin(X_centered**2)) denom = T.sqrt(X_sq[:, :, mid:-mid, mid:-mid]) per_img_mean = denom.mean(axis=[2, 3]) divisor = T.largest(per_img_mean.dimshuffle(0, 1, 'x', 'x'), denom) new_X = X_centered / T.maximum(1., divisor) #same format as input return new_X
def lecun_lcn(self, X, kernel_size=9, threshold = 1e-4, use_divisor=True, border=False): """ Yann LeCun's local contrast normalization Orginal code in Theano by: Guillaume Desjardins """ filter_shape = (1, 1, kernel_size, kernel_size) filters = gaussian_filter(kernel_size).reshape(filter_shape) filters = shared(_asarray(filters, dtype=floatX), borrow=True) mid = int(floor(kernel_size/2.)) if border: r = (kernel_size-1)/2 up = X[:,:,0:1,:].repeat(r,axis=2) down = X[:,:,-1:,:].repeat(r,axis=2) X_ = T.concatenate([up,X,down],axis=2) left = X_[:,:,:,0:1].repeat(r,axis=3) right = X_[:,:,:,-1:].repeat(r,axis=3) X_ = T.concatenate([left,X_,right],axis=3) convout = conv2d(X_, filters=filters, filter_shape=filter_shape, border_mode='valid') centered_X = X - convout else: convout = conv2d(X, filters=filters, filter_shape=filter_shape, border_mode='full') # For each pixel, remove mean of kernel_sizexkernel_size neighborhood centered_X = X - convout[:,:,mid:-mid,mid:-mid] if use_divisor: # Scale down norm of kernel_sizexkernel_size patch sum_sqr_XX = conv2d(T.sqr(X), filters=filters, filter_shape=filter_shape, border_mode='full') sum_sqr_XX = sum_sqr_XX[:,:,mid:-mid,mid:-mid] sum_sqr_XX = T.maximum(sum_sqr_XX, threshold) denom = T.sqrt(sum_sqr_XX) # denom = abs(centered_X) per_img_mean = denom.mean(axis=[2,3]) divisor = T.largest(per_img_mean.dimshuffle(0,1,'x','x'), denom) divisor = T.maximum(divisor, threshold) new_X = centered_X / divisor return new_X else: return centered_X
def gen_fcn(batch_size, img_shape, kernel_size, data_type='float32', threshold=1e-4): ''' generate theano function for doing lecun lcn of a given setting modified from lecun_lcn in pylearn2.datasets.preprocessing currently data_type can only be float32 if not, will report error saying input and kernel should be the same type and kernel type is float32 ''' X = tensor.matrix(dtype=data_type) X = X.reshape((batch_size, img_shape[0], img_shape[1], 1)) filter_shape = (1, 1, kernel_size, kernel_size) filters = sharedX(gaussian_filter(kernel_size).reshape(filter_shape)) input_space = Conv2DSpace(shape=img_shape, num_channels=1) transformer = Conv2D(filters=filters, batch_size=batch_size, input_space=input_space, border_mode='full') convout = transformer.lmul(X) # For each pixel, remove mean of 9x9 neighborhood mid = int(np.floor(kernel_size / 2.)) centered_X = X - convout[:, mid:-mid, mid:-mid, :] # Scale down norm of 9x9 patch if norm is bigger than 1 transformer = Conv2D(filters=filters, batch_size=batch_size, input_space=input_space, border_mode='full') sum_sqr_XX = transformer.lmul(X**2) denom = tensor.sqrt(sum_sqr_XX[:, mid:-mid, mid:-mid, :]) per_img_mean = denom.mean(axis=[1, 2]) divisor = tensor.largest(per_img_mean.dimshuffle(0, 'x', 'x', 1), denom) divisor = tensor.maximum(divisor, threshold) new_X = centered_X / divisor new_X = tensor.flatten(new_X, outdim=3) f = function([X], new_X) return f
def make_network(input_p, input_q, dim, criterion='mmd', biased=True, streaming_est=False, linear_kernel=False, log_sigma=0, hotelling_reg=0, opt_log=True, batchsize=None, net_version='nothing'): in_p = lasagne.layers.InputLayer(shape=(batchsize, dim), input_var=input_p) in_q = lasagne.layers.InputLayer(shape=(batchsize, dim), input_var=input_q) net_p, net_q, reg = net_versions[net_version](in_p, in_q) rep_p, rep_q = lasagne.layers.get_output([net_p, net_q]) choices = { # criterion, linear kernel, streaming ('mmd', False, False): mmd.rbf_mmd2, ('mmd', False, True): mmd.rbf_mmd2_streaming, ('mmd', True, False): mmd.linear_mmd2, ('ratio', False, False): mmd.rbf_mmd2_and_ratio, ('ratio', False, True): mmd.rbf_mmd2_streaming_and_ratio, ('ratio', True, False): mmd.linear_mmd2_and_ratio, ('hotelling', True, False): mmd.linear_mmd2_and_hotelling, } try: fn = choices[criterion, linear_kernel, streaming_est] except KeyError: raise ValueError("Bad parameter combo: criterion = {}, {}, {}".format( criterion, "linear kernel" if linear_kernel else "rbf kernel", "streaming" if streaming_est else "not streaming")) kwargs = {} if linear_kernel: log_sigma = None else: log_sigma = theano.shared(make_floatX(log_sigma), name='log_sigma') kwargs['sigma'] = T.exp(log_sigma) if not streaming_est: kwargs['biased'] = biased if criterion == 'hotelling': kwargs['reg'] = hotelling_reg mmd2_pq, stat = fn(rep_p, rep_q, **kwargs) obj = -(T.log(T.largest(stat, 1e-6)) if opt_log else stat) + reg return mmd2_pq, obj, rep_p, net_p, net_q, log_sigma
def rbf_mmd2_streaming_and_ratio(X, Y, sigma=0): # n = (T.smallest(X.shape[0], Y.shape[0]) // 2) * 2 n = (X.shape[0] // 2) * 2 gamma = 1 / (2 * sigma**2) rbf = lambda A, B: T.exp(-gamma * ((A - B)**2).sum(axis=1)) h_bits = (rbf(X[:n:2], X[1:n:2]) + rbf(Y[:n:2], Y[1:n:2]) - rbf(X[:n:2], Y[1:n:2]) - rbf(X[1:n:2], Y[:n:2])) mmd2 = h_bits.mean() # variance is 1/2 E_{v, v'} (h(v) - h(v'))^2 # estimate with even, odd diffs m = (n // 2) * 2 approx_var = 1 / 2 * ((h_bits[:m:2] - h_bits[1:m:2])**2).mean() ratio = mmd2 / T.sqrt(T.largest(approx_var, _eps)) return mmd2, ratio
def lecun_lcn(input, img_shape, kernel_shape, threshold=1e-4): """ Yann LeCun's local contrast normalization Original code in Theano by: Guillaume Desjardins Parameters ---------- input : WRITEME img_shape : WRITEME kernel_shape : WRITEME threshold : WRITEME """ input = input.reshape((input.shape[0], input.shape[1], input.shape[2], 1)) X = tensor.matrix(dtype=input.dtype) X = X.reshape((len(input), img_shape[0], img_shape[1], 1)) filter_shape = (1, 1, kernel_shape, kernel_shape) filters = sharedX(gaussian_filter(kernel_shape).reshape(filter_shape)) input_space = Conv2DSpace(shape=img_shape, num_channels=1) transformer = Conv2D(filters=filters, batch_size=len(input), input_space=input_space, border_mode='full') convout = transformer.lmul(X) # For each pixel, remove mean of 9x9 neighborhood mid = int(numpy.floor(kernel_shape / 2.)) centered_X = X - convout[:, mid:-mid, mid:-mid, :] # Scale down norm of 9x9 patch if norm is bigger than 1 transformer = Conv2D(filters=filters, batch_size=len(input), input_space=input_space, border_mode='full') sum_sqr_XX = transformer.lmul(X ** 2) denom = tensor.sqrt(sum_sqr_XX[:, mid:-mid, mid:-mid, :]) per_img_mean = denom.mean(axis=[1, 2]) divisor = tensor.largest(per_img_mean.dimshuffle(0, 'x', 'x', 1), denom) divisor = tensor.maximum(divisor, threshold) new_X = centered_X / divisor new_X = tensor.flatten(new_X, outdim=3) f = function([X], new_X) return f(input)
def leaky_beta_asymmetric_fixation_2b(o, t, o2, f1, f2, v, alpha, beta, d, omega, tau_p, tau_n, theta): """ Forgetful beta model with asymmetric updating and continuous fixation weighting of value ---------------------------------------------------------------------------------------- Identical to the second fixation model but with weighting updated continuously rather than downweighting the least fixated option. Args: o: Trial outcome t: Time (not used) o2: Outcome of the other stimulus f1: Fixation duration proportion for this stimulus f2: Fixation duration proportion for the other stimulus v: Previous trial value estimate (not used) alpha: Starting alpha beta: Starting beta d: Decay (forgetting) rate omega: Weight of the other stimulus outcome tau_p: Positive update weight tau_n: Negative update weight theta: Weighting on fixation-dependent bonus to alpha Returns: Mean: Estimated probability on the current trial (mean of beta distribution) Alpha: Alpha value on current trial Beta: Beta value on current trial Var: Variance of beta distribution """ alpha = (1 - d) * alpha + (o * tau_p) + ( omega * f2 * o2) + T.largest(0, f1 - f2) * theta beta = (1 - d) * beta + ((1 - o) * tau_n) + (omega * f2 * (1 - o2)) alpha = T.maximum(T.power(0.1, 10), alpha) beta = T.maximum(T.power(0.1, 10), beta) value = alpha / (alpha + beta) var = (alpha * beta) / (T.pow(alpha + beta, 2) * (alpha + beta + 1)) return (value, alpha, beta, var)
def lecun_lcn(input, kernel_size=9, threshold=1e-4, use_divisor=False): """ Yann LeCun's local contrast normalization Orginal code in Theano by: Guillaume Desjardins :param input: :param kernel_size: :param threshold: :param use_divisor: :return: """ input_shape = (input.shape[0], 1, input.shape[1], input.shape[2]) input = input.reshape(input_shape).astype(floatX) X = T.tensor4(dtype=floatX) filter_shape = (1, 1, kernel_size, kernel_size) filters = gaussian_filter(kernel_size).reshape(filter_shape) filters = shared(_asarray(filters, dtype=floatX), borrow=True) convout = conv2d(input=X, filters=filters, input_shape=input.shape, filter_shape=filter_shape, border_mode='half') new_X = X - convout if use_divisor: # Scale down norm of kernel_size x kernel_size patch sum_sqr_XX = conv2d(input=T.sqr(T.abs_(new_X)), filters=filters, input_shape=input.shape, filter_shape=filter_shape, border_mode='half') denom = T.sqrt(sum_sqr_XX) per_img_mean = denom.mean(axis=[2, 3]) divisor = T.largest(per_img_mean.dimshuffle(0, 1, 'x', 'x'), denom) divisor = T.maximum(divisor, threshold) new_X = new_X / divisor new_X = new_X.dimshuffle(0, 2, 3, 1) new_X = new_X.flatten(ndim=3) f = function([X], new_X) return f(input)
def lecun_lcn(input, img_shape, kernel_shape, threshold=1e-4): """ Yann LeCun's local contrast normalization This is performed per-colorchannel!!! http://yann.lecun.com/exdb/publis/pdf/jarrett-iccv-09.pdf """ input = input.reshape((input.shape[0], 1, input.shape[1], input.shape[2])) X = T.matrix(dtype=input.dtype) X = X.reshape((len(input), 1, img_shape[0], img_shape[1])) filter_shape = (1, 1, kernel_shape, kernel_shape) filters = theano.shared( gaussian_filter(kernel_shape).reshape(filter_shape)) convout = conv.conv2d(input=X, filters=filters, image_shape=(input.shape[0], 1, img_shape[0], img_shape[1]), filter_shape=filter_shape, border_mode='full') # For each pixel, remove mean of 9x9 neighborhood mid = int(np.floor(kernel_shape / 2.)) centered_X = X - convout[:, :, mid:-mid, mid:-mid] # Scale down norm of 9x9 patch if norm is bigger than 1 sum_sqr_XX = conv.conv2d(input=T.sqr(X), filters=filters, image_shape=(input.shape[0], 1, img_shape[0], img_shape[1]), filter_shape=filter_shape, border_mode='full') denom = T.sqrt(sum_sqr_XX[:, :, mid:-mid, mid:-mid]) per_img_mean = T.mean(denom, axis=(1, 2)) divisor = T.largest(per_img_mean.dimshuffle(0, 1, 'x', 'x'), denom) divisor = T.maximum(divisor, threshold) new_X = centered_X / divisor #new_X = theano.tensor.flatten(new_X, outdim=3) f = theano.function([X], new_X) return f(input)
def make_lecun_lcn(input_shape, img_shape, kernel_shape, threshold=1e-4): """ lecun local contrast normalization :param input_shape: (batch_size, stack_size, nb_row, nb_col) :param img_shape: (nb_row, nb_col) image dimensions :param kernel_shape: kernel shape of image eg: 9x9 :param threshold: threshold to allow enhance of edges :return: theano function that computes the local contrast normalized image """ X = T.matrix(dtype=theano.config.floatX) X = X.reshape(input_shape) filter_shape = (1, 1, kernel_shape, kernel_shape) filters = gaussian_filter(kernel_shape).reshape(filter_shape) convout = conv.conv2d(input=X, filters=filters, image_shape=(input_shape[0], 1, img_shape[0], img_shape[1]), filter_shape=filter_shape, border_mode='full') # For each pixel, remove mean of 9x9 neighborhood mid = int(np.floor(kernel_shape / 2.)) centered_X = X - convout[:, :, mid:-mid, mid:-mid] # Scale down norm of 9x9 patch if norm is bigger than 1 sum_sqr_XX = conv.conv2d(input=centered_X ** 2, filters=filters, image_shape=(input_shape[0], 1, img_shape[0], img_shape[1]), filter_shape=filter_shape, border_mode='full') denom = T.sqrt(sum_sqr_XX[:, :, mid:-mid, mid:-mid]) per_img_mean = denom.mean(axis=[1, 2]) divisor = T.largest(per_img_mean.dimshuffle(0, 'x', 'x', 1), denom) divisor = T.maximum(divisor, threshold) new_X = centered_X / divisor new_X = new_X.dimshuffle(0, 2, 3, 1) new_X = new_X.flatten(ndim=3) f = theano.function([X], new_X) return f
def lecun_lcn(input, img_shape, kernel_shape, threshold=1e-4): """ Yann LeCun's local contrast normalization This is performed per-colorchannel!!! http://yann.lecun.com/exdb/publis/pdf/jarrett-iccv-09.pdf """ input = input.reshape((input.shape[0], 1, input.shape[1], input.shape[2])) X = T.matrix(dtype=input.dtype) X = X.reshape((len(input), 1, img_shape[0], img_shape[1])) filter_shape = (1, 1, kernel_shape, kernel_shape) filters = theano.shared(gaussian_filter(kernel_shape).reshape(filter_shape)) convout = conv.conv2d(input=X, filters=filters, image_shape=(input.shape[0], 1, img_shape[0], img_shape[1]), filter_shape=filter_shape, border_mode='full') # For each pixel, remove mean of 9x9 neighborhood mid = int(np.floor(kernel_shape / 2.)) centered_X = X - convout[:, :, mid:-mid, mid:-mid] # Scale down norm of 9x9 patch if norm is bigger than 1 sum_sqr_XX = conv.conv2d(input=T.sqr(X), filters=filters, image_shape=(input.shape[0], 1, img_shape[0], img_shape[1]), filter_shape=filter_shape, border_mode='full') denom = T.sqrt(sum_sqr_XX[:, :, mid:-mid, mid:-mid]) per_img_mean = T.mean(denom, axis=(1, 2)) divisor = T.largest(per_img_mean.dimshuffle(0, 1, 'x', 'x'), denom) divisor = T.maximum(divisor, threshold) new_X = centered_X / divisor #new_X = theano.tensor.flatten(new_X, outdim=3) f = theano.function([X], new_X) return f(input)
def lcn_lacombe(data, kernel_shape, n_maps): # create basic filter that spans all feature maps filter_shape = (1, n_maps, kernel_shape, kernel_shape) filters = np.resize(gaussian_filter(kernel_shape), filter_shape) filters = filters / np.sum( filters ) # todo: don't scale as this makes input much smaller than weights filters = sharedX(filters) # for feature_map in xrange(data.shape[0]): # # temp[1, feature_map, :, :] = filters # # temp = temp / ml.repmat(np.sum(temp), (1, data.shape[0], kernel_shape, kernel_shape)) # filters = sharedX(temp) # data = [examples, maps, length, width]; filters = [1, maps, kernel_shape, kernel_shape] # output = [examples, 1, length - (kernel_shape - 1), width - (kernel_shape - 1)] convout = conv2d(data, filters=filters, border_mode='full') # convout = np.reshape(convout, (convout.shape[0], data.shape[1], convout.shape[2], convout.shape[3])) # For each pixel, remove mean of 9x9 neighborhood mid = int(np.floor(kernel_shape / 2.)) convout = convout[:, :, mid:-mid, mid:-mid] centered_X = data - T.tile(convout, (1, n_maps, 1, 1)) # Scale down norm of 9x9 patch if norm is bigger than 1 sum_sqr_XX = conv2d(T.sqr(data), filters=filters, border_mode='full') denom = T.sqrt(sum_sqr_XX[:, :, mid:-mid, mid:-mid]) per_img_mean = denom.mean(axis=[1, 2, 3]) divisor = T.largest(per_img_mean.dimshuffle(0, 'x', 'x', 'x'), T.tile(denom, (1, n_maps, 1, 1))) new_X = centered_X / T.maximum(1., divisor) # new_X = new_X[:, :, mid:-mid, mid:-mid] # maybe safer to return valid area return new_X
def lecun_lcn(input, img_shape, kernel_shape, threshold=1e-4): """ Yann LeCun's local contrast normalization Orginal code in Theano by: Guillaume Desjardins """ input = input.reshape(input.shape[0], 1, img_shape[0], img_shape[1]) X = T.matrix(dtype=theano.config.floatX) X = X.reshape(input.shape) filter_shape = (1, 1, kernel_shape, kernel_shape) filters = gaussian_filter(kernel_shape).reshape(filter_shape) convout = conv.conv2d(input=X, filters=filters, image_shape=(input.shape[0], 1, img_shape[0], img_shape[1]), filter_shape=filter_shape, border_mode='full') # For each pixel, remove mean of 9x9 neighborhood mid = int(np.floor(kernel_shape / 2.)) centered_X = X - convout[:, :, mid:-mid, mid:-mid] # Scale down norm of 9x9 patch if norm is bigger than 1 sum_sqr_XX = conv.conv2d(input=centered_X ** 2, filters=filters, image_shape=(input.shape[0], 1, img_shape[0], img_shape[1]), filter_shape=filter_shape, border_mode='full') denom = T.sqrt(sum_sqr_XX[:, :, mid:-mid, mid:-mid]) per_img_mean = denom.mean(axis=[1, 2]) divisor = T.largest(per_img_mean.dimshuffle(0, 'x', 'x', 1), denom) divisor = T.maximum(divisor, threshold) new_X = centered_X / divisor new_X = new_X.dimshuffle(0, 2, 3, 1) new_X = new_X.flatten(ndim=3) f = theano.function([X], new_X) return f(input)
def lcn_sublayer(image, image_shape, fmaps, pool_depth, width, sigma): """ """ print image_shape print fmaps, pool_depth, width, sigma border = width//2 filters = lcn_filters(fmaps, pool_depth, width, sigma) filter_shape = filters.shape blurred_mean = conv.conv2d(input=image, filters=filters, image_shape=image_shape, filter_shape=filter_shape, border_mode='full') image -= blurred_mean[:, :, border:-border, border:-border] image_sqr = T.sqr(image) blurred_sqr = conv.conv2d(input=image_sqr, filters=filters, image_shape=image_shape, filter_shape=filter_shape, border_mode='full') div = T.sqrt(blurred_sqr[:, :, border:-border, border:-border]) fm_mean = div.mean(axis=[2, 3]) div = T.largest(fm_mean.dimshuffle(0, 1, 'x', 'x'), div) + 1e-6 image = image/div return T.cast(image, theano.config.floatX)
def lecun_lcn( X, kernel_size=7, threshold = 1e-4, use_divisor=False): filter_shape = (1, 1, kernel_size, kernel_size) filters = gaussian_filter(kernel_size).reshape(filter_shape) filters = theano.shared(np.array(asarray(filters, dtype='float32')), borrow=True) convout = theano.tensor.nnet.conv2d(X, filters=filters, filter_shape=filter_shape, border_mode='full') mid = int(floor(kernel_size/2.)) new_X = X - convout[:,:,mid:-mid,mid:-mid] if use_divisor: sum_sqr_XX = conv2d(T.sqr(T.abs_(X)), filters=filters, filter_shape=filter_shape, border_mode='full') denom = T.sqrt(sum_sqr_XX[:,:,mid:-mid,mid:-mid]) per_img_mean = denom.mean(axis=[2,3]) divisor = T.largest(per_img_mean.dimshuffle(0,1,'x','x'), denom) divisor = T.maximum(divisor, threshold) new_X /= divisor return new_X
def lecun_lcn_batch(input, kernel_shape=9, threshold=1e-4): input = np.float64(input) X = input.transpose(3, 0, 1, 2).reshape( (input.shape[0] * input.shape[3], 1, input.shape[1], input.shape[2])) filter_shape = (1, 1, kernel_shape, kernel_shape) filters = gaussian_filter(kernel_shape).reshape(filter_shape) filters = theano.shared(theano._asarray(filters, dtype=theano.config.floatX), borrow=True) convout = conv.conv2d(input=X, filters=filters, filter_shape=filter_shape, border_mode='full') mid = int(np.floor(kernel_shape / 2.)) centered_X = X - convout[:, :, mid:-mid, mid:-mid] sum_sqr_XX = conv.conv2d(input=centered_X**2, filters=filters, filter_shape=filter_shape, border_mode='full') denom = T.sqrt(sum_sqr_XX[:, :, mid:-mid, mid:-mid]) per_img_mean = denom.mean(axis=[2, 3]) divisor = T.largest(per_img_mean.dimshuffle(0, 1, 'x', 'x'), denom) divisor = T.maximum(divisor, threshold) new_X = centered_X / divisor output = new_X.eval().reshape( (input.shape[3], input.shape[0], input.shape[1], input.shape[2])).transpose(1, 2, 3, 0) return output
def lcn_lacombe(data, kernel_shape, n_maps): # create basic filter that spans all feature maps filter_shape = (1, n_maps, kernel_shape, kernel_shape) filters = np.resize(gaussian_filter(kernel_shape), filter_shape) filters = filters / np.sum(filters) # todo: don't scale as this makes input much smaller than weights filters = sharedX(filters) # for feature_map in xrange(data.shape[0]): # # temp[1, feature_map, :, :] = filters # # temp = temp / ml.repmat(np.sum(temp), (1, data.shape[0], kernel_shape, kernel_shape)) # filters = sharedX(temp) # data = [examples, maps, length, width]; filters = [1, maps, kernel_shape, kernel_shape] # output = [examples, 1, length - (kernel_shape - 1), width - (kernel_shape - 1)] convout = conv2d(data, filters=filters, border_mode='full') # convout = np.reshape(convout, (convout.shape[0], data.shape[1], convout.shape[2], convout.shape[3])) # For each pixel, remove mean of 9x9 neighborhood mid = int(np.floor(kernel_shape / 2.)) convout = convout[:, :, mid:-mid, mid:-mid] centered_X = data - T.tile(convout, (1, n_maps, 1, 1)) # Scale down norm of 9x9 patch if norm is bigger than 1 sum_sqr_XX = conv2d(T.sqr(data), filters=filters, border_mode='full') denom = T.sqrt(sum_sqr_XX[:, :, mid:-mid, mid:-mid]) per_img_mean = denom.mean(axis=[1, 2, 3]) divisor = T.largest(per_img_mean.dimshuffle(0, 'x', 'x', 'x'), T.tile(denom, (1, n_maps, 1, 1))) new_X = centered_X / T.maximum(1., divisor) # new_X = new_X[:, :, mid:-mid, mid:-mid] # maybe safer to return valid area return new_X
def main(): """Download the Rouder et al. (2008) data set, organize it, fit the model, and plot the traces. """ # load the data a = "https://raw.githubusercontent.com/PerceptionCognitionLab/" b = "data0/master/wmPNAS2008/lk2clean.csv" df = pd.read_csv(urlopen(a + b), index_col=0) # compress into "binomial" format data = [] for (subj, N), _df in df.groupby(["sub", "N"]): data.append({ "subj": subj, "M": N, "H": _df[_df.ischange.astype(bool)].resp.sum(), "D": _df.ischange.sum(), "F": _df[(1 - _df.ischange).astype(bool)].resp.sum(), "S": (1 - _df.ischange).sum(), }) data = pd.DataFrame(data) subjects = data.subj.unique() # create a design matrix to map subjects to rows in data X = np.asarray(dmatrix("0 + C(subj)", data)) # create model with pm.Model(): # capacity mu = pm.Cauchy(name=r"$\mu_{(\kappa)}$", alpha=0, beta=5) de = pm.Normal(name=r"$\delta_{\kappa)}$", mu=0, sigma=1, shape=len(subjects)) si = pm.HalfCauchy(name=r"$\sigma_{(\kappa)}$", beta=5) x = pm.Deterministic(r"$\kappa$", mu + de * si) x = pm.Deterministic(r"$k$", tt.largest(x, tt.zeros(len(subjects)))) k = pm.math.dot(X, x) # guesses "same" mu = pm.Cauchy(name=r"$\mu_{(\gamma)}$", alpha=0, beta=5) de = pm.Normal(name=r"$\delta_{\gamma)}$", mu=0, sigma=1, shape=len(subjects)) si = pm.HalfCauchy(name=r"$\sigma_{(\gamma)}$", beta=5) x = pm.Deterministic(r"$\gamma$", mu + de * si) x = pm.Deterministic(r"$g$", pm.math.sigmoid(x)) g = pm.math.dot(X, x) # does not lapse mu = pm.Cauchy(name=r"$\mu_{(\zeta)}$", alpha=0, beta=5) de = pm.Normal(name=r"$\delta_{\zeta)}$", mu=0, sigma=1, shape=len(subjects)) si = pm.HalfCauchy(name=r"$\sigma_{(\zeta)}$", beta=5) x = pm.Deterministic(r"$\zeta$", mu + de * si) x = pm.Deterministic(r"$z$", pm.math.sigmoid(x)) z = pm.math.dot(X, x) # probabilities q = tt.smallest(k / data.M, tt.ones(len(data))) h = (1 - z) * g + z * q + z * (1 - q) * g f = (1 - z) * g + z * (1 - q) * g # responses pm.Binomial(name="$H$", p=h, n=data.D, observed=data.H) pm.Binomial(name="$F$", p=f, n=data.S, observed=data.F) # sample and plot trace = pm.sample(draws=5000, tune=2000, chains=2) pm.traceplot(trace, compact=True) plt.savefig("../../assets/images/wm-cap.png", bbox_inches=0, transparent=True)
def create_train(self, lenW, n_facts): ONE = theano.shared(np.float32(1)) ZERO = theano.shared(np.float32(0)) # 仔细看这三个函数,就是搞成等长的 def phi_x1(x_t, L): # 公式(5)的 return T.concatenate([L[x_t].reshape((-1,)), zeros((2*lenW,)), zeros((3,))], axis=0) def phi_x2(x_t, L): # 公式(5)的 return T.concatenate([zeros((lenW,)), L[x_t].reshape((-1,)), zeros((lenW,)), zeros((3,))], axis=0) def phi_y(x_t, L): # 公式(5)的 return T.concatenate([zeros((2*lenW,)), L[x_t].reshape((-1,)), zeros((3,))], axis=0) def phi_t(x_t, y_t, yp_t, L): # 公式(10)的,可见yp_t的意思 return T.concatenate([zeros(3*lenW,), T.stack(T.switch(T.lt(x_t,y_t), ONE, ZERO), T.switch(T.lt(x_t,yp_t), ONE, ZERO), T.switch(T.lt(y_t,yp_t), ONE, ZERO))], axis=0) def s_Ot(xs, y_t, yp_t, L): # 就是论文里的s右下角O右下角t,公式(3)的 result, updates = theano.scan( lambda x_t, t: T.dot(T.dot(T.switch(T.eq(t, 0), phi_x1(x_t, L).reshape((1,-1)), phi_x2(x_t, L).reshape((1,-1))), self.U_Ot.T), T.dot(self.U_Ot, (phi_y(y_t, L) - phi_y(yp_t, L) + phi_t(x_t, y_t, yp_t, L)))), sequences=[xs, T.arange(T.shape(xs)[0])]) return result.sum() def sR(xs, y_t, L, V): # 就是论文里的s右下角R,公式(4)的 result, updates = theano.scan( lambda x_t, t: T.dot(T.dot(T.switch(T.eq(t, 0), phi_x1(x_t, L).reshape((1,-1)), phi_x2(x_t, L).reshape((1,-1))), self.U_R.T), T.dot(self.U_R, phi_y(y_t, V))), sequences=[xs, T.arange(T.shape(xs)[0])]) return result.sum() x_t = T.iscalar('x_t') y_t = T.iscalar('y_t') yp_t = T.iscalar('yp_t') xs = T.ivector('xs') m = [x_t] + [T.iscalar('m_o%d' % i) for i in xrange(n_facts)] # m应该是内存 f = [T.iscalar('f%d_t' % i) for i in xrange(n_facts)] # 公式(6)(7)的 r_t = T.iscalar('r_t') # 公式(8)的 gamma = T.scalar('gamma') L = T.fmatrix('L') # list of messages 多个向量 每个代表一句话 V = T.fmatrix('V') # vocab 多个向量 每个代表一个词 r_args = T.stack(*m) cost_arr = [0] * 2 * (len(m)-1) for i in xrange(len(m)-1): cost_arr[2*i], _ = theano.scan( # 就是公式(6) lambda f_bar, t: T.switch(T.or_(T.eq(t, f[i]), T.eq(t, T.shape(L)[0]-1)), 0, T.largest(gamma - s_Ot(T.stack(*m[:i+1]), # T.stack(*m[:i+1])应该就是之前的记忆 f[i], t, L), 0)), sequences=[L, T.arange(T.shape(L)[0])]) cost_arr[2*i] /= T.shape(L)[0] cost_arr[2*i+1], _ = theano.scan( # 就是公式(7) lambda f_bar, t: T.switch(T.or_(T.eq(t, f[i]), T.eq(t, T.shape(L)[0]-1)), 0, T.largest(gamma + s_Ot(T.stack(*m[:i+1]), # T.stack(*m[:i+1])应该就是之前的记忆 t, f[i], L), 0)), sequences=[L, T.arange(T.shape(L)[0])]) cost_arr[2*i+1] /= T.shape(L)[0] cost1, _ = theano.scan( # 就是公式(8) lambda r_bar, t: T.switch(T.eq(r_t, t), 0, T.largest(gamma - sR(r_args, r_t, L, V) + sR(r_args, t, L, V), 0)), sequences=[V, T.arange(T.shape(V)[0])]) cost1 /= T.shape(V)[0] cost = cost1.sum() for c in cost_arr: cost += c.sum() updates = sgd(cost, [self.U_Ot, self.U_R], learning_rate=self.lr) # print([r_t, gamma, L, V] + m + f) # 实际上是 [r_t, gamma, L, V, x_t, m_o0, f0_t] self.train_model = theano.function( inputs=[r_t, gamma, L, V] + m + f, # 实际上是 [r_t, gamma, L, V, x_t, m_o0, f0_t] outputs=[cost], updates=updates) # theano.printing.pydotprint(self.train_model, outfile="./test.png", # var_with_name_simple=True) self.sR = theano.function([xs, y_t, L, V], sR(xs, y_t, L, V)) self.s_Ot = theano.function([xs, y_t, yp_t, L], s_Ot(xs, y_t, yp_t, L))
def create_train(self, lenW, n_facts): ONE = theano.shared(np.float32(1)) ZERO = theano.shared(np.float32(0)) # 仔细看这三个函数,就是搞成等长的 def phi_x1(x_t, L): # 公式(5)的 return T.concatenate( [L[x_t].reshape((-1, )), zeros((2 * lenW, )), zeros((3, ))], axis=0) def phi_x2(x_t, L): # 公式(5)的 return T.concatenate([ zeros((lenW, )), L[x_t].reshape((-1, )), zeros((lenW, )), zeros((3, )) ], axis=0) def phi_y(x_t, L): # 公式(5)的 return T.concatenate( [zeros((2 * lenW, )), L[x_t].reshape((-1, )), zeros((3, ))], axis=0) def phi_t(x_t, y_t, yp_t, L): # 公式(10)的,可见yp_t的意思 return T.concatenate([ zeros(3 * lenW, ), T.stack(T.switch(T.lt(x_t, y_t), ONE, ZERO), T.switch(T.lt(x_t, yp_t), ONE, ZERO), T.switch(T.lt(y_t, yp_t), ONE, ZERO)) ], axis=0) def s_Ot(xs, y_t, yp_t, L): # 就是论文里的s右下角O右下角t,公式(3)的 result, updates = theano.scan( lambda x_t, t: T.dot( T.dot( T.switch(T.eq(t, 0), phi_x1(x_t, L).reshape((1, -1)), phi_x2(x_t, L).reshape( (1, -1))), self.U_Ot.T), T.dot(self.U_Ot, (phi_y(y_t, L) - phi_y(yp_t, L) + phi_t( x_t, y_t, yp_t, L)))), sequences=[xs, T.arange(T.shape(xs)[0])]) return result.sum() def sR(xs, y_t, L, V): # 就是论文里的s右下角R,公式(4)的 result, updates = theano.scan( lambda x_t, t: T.dot( T.dot( T.switch(T.eq(t, 0), phi_x1(x_t, L).reshape((1, -1)), phi_x2(x_t, L).reshape((1, -1))), self.U_R.T), T.dot(self.U_R, phi_y(y_t, V))), sequences=[xs, T.arange(T.shape(xs)[0])]) return result.sum() x_t = T.iscalar('x_t') y_t = T.iscalar('y_t') yp_t = T.iscalar('yp_t') xs = T.ivector('xs') m = [x_t] + [T.iscalar('m_o%d' % i) for i in xrange(n_facts)] # m应该是内存 f = [T.iscalar('f%d_t' % i) for i in xrange(n_facts)] # 公式(6)(7)的 r_t = T.iscalar('r_t') # 公式(8)的 gamma = T.scalar('gamma') L = T.fmatrix('L') # list of messages 多个向量 每个代表一句话 V = T.fmatrix('V') # vocab 多个向量 每个代表一个词 r_args = T.stack(*m) cost_arr = [0] * 2 * (len(m) - 1) for i in xrange(len(m) - 1): cost_arr[2 * i], _ = theano.scan( # 就是公式(6) lambda f_bar, t: T.switch( T.or_(T.eq(t, f[i]), T.eq(t, T.shape(L)[0] - 1)), 0, T.largest( gamma - s_Ot( T.stack(*m[:i + 1]), # T.stack(*m[:i+1])应该就是之前的记忆 f[i], t, L), 0)), sequences=[L, T.arange(T.shape(L)[0])]) cost_arr[2 * i] /= T.shape(L)[0] cost_arr[2 * i + 1], _ = theano.scan( # 就是公式(7) lambda f_bar, t: T.switch( T.or_(T.eq(t, f[i]), T.eq(t, T.shape(L)[0] - 1)), 0, T.largest( gamma + s_Ot( T.stack(*m[:i + 1]), # T.stack(*m[:i+1])应该就是之前的记忆 t, f[i], L), 0)), sequences=[L, T.arange(T.shape(L)[0])]) cost_arr[2 * i + 1] /= T.shape(L)[0] cost1, _ = theano.scan( # 就是公式(8) lambda r_bar, t: T.switch( T.eq(r_t, t), 0, T.largest(gamma - sR(r_args, r_t, L, V) + sR(r_args, t, L, V), 0)), sequences=[V, T.arange(T.shape(V)[0])]) cost1 /= T.shape(V)[0] cost = cost1.sum() for c in cost_arr: cost += c.sum() updates = sgd(cost, [self.U_Ot, self.U_R], learning_rate=self.lr) # print([r_t, gamma, L, V] + m + f) # 实际上是 [r_t, gamma, L, V, x_t, m_o0, f0_t] self.train_model = theano.function( inputs=[r_t, gamma, L, V] + m + f, # 实际上是 [r_t, gamma, L, V, x_t, m_o0, f0_t] outputs=[cost], updates=updates) # theano.printing.pydotprint(self.train_model, outfile="./test.png", # var_with_name_simple=True) self.sR = theano.function([xs, y_t, L, V], sR(xs, y_t, L, V)) self.s_Ot = theano.function([xs, y_t, yp_t, L], s_Ot(xs, y_t, yp_t, L))
def create_train(self, lenW, n_facts): ONE = theano.shared(np.float32(1)) ZERO = theano.shared(np.float32(0)) def phi_x1(x_t, L): return T.concatenate([L[x_t].reshape((-1,)), zeros((2*lenW,)), zeros((3,))], axis=0) def phi_x2(x_t, L): return T.concatenate([zeros((lenW,)), L[x_t].reshape((-1,)), zeros((lenW,)), zeros((3,))], axis=0) def phi_y(x_t, L): return T.concatenate([zeros((2*lenW,)), L[x_t].reshape((-1,)), zeros((3,))], axis=0) def phi_t(x_t, y_t, yp_t, L): return T.concatenate([zeros(3*lenW,), T.stack(T.switch(T.lt(x_t,y_t), ONE, ZERO), T.switch(T.lt(x_t,yp_t), ONE, ZERO), T.switch(T.lt(y_t,yp_t), ONE, ZERO))], axis=0) def s_Ot(xs, y_t, yp_t, L): result, updates = theano.scan( lambda x_t, t: T.dot(T.dot(T.switch(T.eq(t, 0), phi_x1(x_t, L).reshape((1,-1)), phi_x2(x_t, L).reshape((1,-1))), self.U_Ot.T), T.dot(self.U_Ot, (phi_y(y_t, L) - phi_y(yp_t, L) + phi_t(x_t, y_t, yp_t, L)))), sequences=[xs, T.arange(T.shape(xs)[0])]) return result.sum() def sR(xs, y_t, L, V): result, updates = theano.scan( lambda x_t, t: T.dot(T.dot(T.switch(T.eq(t, 0), phi_x1(x_t, L).reshape((1,-1)), phi_x2(x_t, L).reshape((1,-1))), self.U_R.T), T.dot(self.U_R, phi_y(y_t, V))), sequences=[xs, T.arange(T.shape(xs)[0])]) return result.sum() x_t = T.iscalar('x_t') y_t = T.iscalar('y_t') yp_t = T.iscalar('yp_t') xs = T.ivector('xs') m = [x_t] + [T.iscalar('m_o%d' % i) for i in xrange(n_facts)] f = [T.iscalar('f%d_t' % i) for i in xrange(n_facts)] r_t = T.iscalar('r_t') gamma = T.scalar('gamma') L = T.fmatrix('L') # list of messages V = T.fmatrix('V') # vocab r_args = T.stack(*m) cost_arr = [0] * 2 * (len(m)-1) for i in xrange(len(m)-1): cost_arr[2*i], _ = theano.scan( lambda f_bar, t: T.switch(T.or_(T.eq(t, f[i]), T.eq(t, T.shape(L)[0]-1)), 0, T.largest(gamma - s_Ot(T.stack(*m[:i+1]), f[i], t, L), 0)), sequences=[L, T.arange(T.shape(L)[0])]) cost_arr[2*i] /= T.shape(L)[0] cost_arr[2*i+1], _ = theano.scan( lambda f_bar, t: T.switch(T.or_(T.eq(t, f[i]), T.eq(t, T.shape(L)[0]-1)), 0, T.largest(gamma + s_Ot(T.stack(*m[:i+1]), t, f[i], L), 0)), sequences=[L, T.arange(T.shape(L)[0])]) cost_arr[2*i+1] /= T.shape(L)[0] cost1, _ = theano.scan( lambda r_bar, t: T.switch(T.eq(r_t, t), 0, T.largest(gamma - sR(r_args, r_t, L, V) + sR(r_args, t, L, V), 0)), sequences=[V, T.arange(T.shape(V)[0])]) cost1 /= T.shape(V)[0] cost = cost1.sum() for c in cost_arr: cost += c.sum() updates = sgd(cost, [self.U_Ot, self.U_R], learning_rate=self.lr) self.train_model = theano.function( inputs=[r_t, gamma, L, V] + m + f, outputs=[cost], updates=updates) self.sR = theano.function([xs, y_t, L, V], sR(xs, y_t, L, V)) self.s_Ot = theano.function([xs, y_t, yp_t, L], s_Ot(xs, y_t, yp_t, L))
def __init__(self, X, image_shape, threshold=1e-4, radius=9, use_divisor=True): """ Allocate an LCN. :type X: theano.tensor.dtensor4 :param X: symbolic image tensor, of shape image_shape :type image_shape: tuple or list of length 4 :param image_shape: (batch size, num input feature maps, image height, image width) :type threshold: double :param threshold: the threshold will be used to avoid division by zeros :type radius: int :param radius: determines size of Gaussian filter patch (default 9x9) :type use_divisor: Boolean :param use_divisor: whether or not to apply divisive normalization """ # Get Gaussian filter filter_shape = (1, image_shape[1], radius, radius) self.filters = theano.shared(self.gaussian_filter(filter_shape), borrow=True) # Compute the Guassian weighted average by means of convolution convout = conv.conv2d( input=X, filters=self.filters, image_shape=image_shape, filter_shape=filter_shape, border_mode='full' ) # Subtractive step mid = int(numpy.floor(filter_shape[2] / 2.)) # Make filter dimension broadcastable and subtract centered_X = X - T.addbroadcast(convout[:, :, mid:-mid, mid:-mid], 1) # Boolean marks whether or not to perform divisive step if use_divisor: # Note that the local variances can be computed by using the centered_X # tensor. If we convolve this with the mean filter, that should give us # the variance at each point. We simply take the square root to get our # denominator # Compute variances sum_sqr_XX = conv.conv2d( input=T.sqr(centered_X), filters=self.filters, image_shape=image_shape, filter_shape=filter_shape, border_mode='full' ) # Take square root to get local standard deviation denom = T.sqrt(sum_sqr_XX[:,:,mid:-mid,mid:-mid]) per_img_mean = denom.mean(axis=[2,3]) divisor = T.largest(per_img_mean.dimshuffle(0, 1, 'x', 'x'), denom) # Divisise step new_X = centered_X / T.maximum(T.addbroadcast(divisor, 1), threshold) else: new_X = centered_X self.output = new_X
filters = sharedX(gaussian_filter(kernel_shape).reshape(filter_shape)) X = X.dimshuffle(0, 3, 1, 2) convout = conv2d(X, filters=filters, border_mode='full') # For each pixel, remove mean of 9x9 neighborhood mid = int(np.floor(kernel_shape / 2.)) centered_X = X - convout[:, :, mid:-mid, mid:-mid] # Scale down norm of 9x9 patch if norm is bigger than 1 sum_sqr_XX = conv2d(T.sqr(X), filters=filters, border_mode='full') denom = T.sqrt(sum_sqr_XX[:, :, mid:-mid, mid:-mid]) per_img_mean = denom.mean(axis=[2, 3]) divisor = T.largest(per_img_mean.dimshuffle(0, 1, 'x', 'x'), denom) new_X = centered_X / T.maximum(1., divisor) new_X = new_X.dimshuffle(0, 2, 3, 1) from theano import function f = function([orig_X], new_X) j = 0 for path in paths: if j % 100 == 0: print j try: raw_path = path path = base + '/' + path
def create_train(self, lenW, n_facts): ONE = theano.shared(np.float32(1)) ZERO = theano.shared(np.float32(0)) def phi_x1(x_t, L): #处理phi_x里x是问句的情况,from the actual input x return T.concatenate( [L[x_t].reshape((-1, )), zeros((2 * lenW, )), zeros((3, ))], axis=0 ) # reshape里出现-1意味着这一维度可以被推导出来。这三个phi函数的reshape((-1,))意味转成一维。可是没啥用,问句就一句。 # 把x_t在向量空间模型里对应的向量拿出来放到第一个lenW位置上 # 这里返回一个很长的list,长度为3*lenW + 3 def phi_x2(x_t, L): #处理phi_x里x不是问句,是记忆的情况,from the supporting memories return T.concatenate( [ zeros((lenW, )), L[x_t].reshape((-1, )), zeros((lenW, )), zeros((3, )) ], axis=0) # 返回长度3*lenW + 3,和前面的phi_x1区别在于维度位置不一样,见论文第3页 def phi_y(x_t, L): return T.concatenate( [zeros((2 * lenW, )), L[x_t].reshape((-1, )), zeros((3, ))], axis=0) #放在第三个lenW的位置上 def phi_t(x_t, y_t, yp_t, L): return T.concatenate([ zeros(3 * lenW, ), T.stack(T.switch(T.lt(x_t, y_t), ONE, ZERO), T.switch(T.lt(x_t, yp_t), ONE, ZERO), T.switch(T.lt(y_t, yp_t), ONE, ZERO)) ], axis=0) # 3*lenW + 3里的3 # lt(a, b): a < b,在这里都是id,id越小意味着越早写入记忆了,也就是越older,设为1 def s_Ot(xs, y_t, yp_t, L): result, updates = theano.scan(lambda x_t, t: T.dot( T.dot( T.switch(T.eq(t, 0), phi_x1(x_t, L).reshape((1, -1)), phi_x2(x_t, L).reshape((1, -1))), self.U_Ot.T), T.dot(self.U_Ot, (phi_y(y_t, L) - phi_y(yp_t, L) + phi_t( x_t, y_t, yp_t, L)))), sequences=[ xs, T.arange(T.shape(xs)[0]) ]) # T.eq(t, 0) 如果t是id(第0个),也就是问句 # y_t是正确的事实。 return result.sum( ) # 把所有事实加起来,相当于论文里第3页注释3,这是由于VSM的线性关系。因为传入了前n个事实,对每个事实分别计算其与记忆的s_o,直接累加起来。 def sR(xs, y_t, L, V): result, updates = theano.scan( lambda x_t, t: T.dot( T.dot( T.switch(T.eq(t, 0), phi_x1(x_t, L).reshape((1, -1)), phi_x2(x_t, L).reshape((1, -1))), self.U_R.T), T.dot(self.U_R, phi_y(y_t, V))), sequences=[xs, T.arange(T.shape(xs)[0])]) return result.sum() x_t = T.iscalar('x_t') y_t = T.iscalar('y_t') yp_t = T.iscalar('yp_t') xs = T.ivector('xs') m = [x_t] + [T.iscalar('m_o%d' % i) for i in xrange(n_facts)] #x_t用于“灌入”id f = [T.iscalar('f%d_t' % i) for i in xrange(n_facts)] #m和f一样,都是正确的事实(在原论文里是mo1和mo2) r_t = T.iscalar('r_t') #self.H[line['answer']],正确答案,用于R部分。 gamma = T.scalar('gamma') L = T.fmatrix('L') # list of messages #memory_list,是一个记忆的vsm矩阵,行是记忆的长度,列数为lenW(因为是vsm) V = T.fmatrix('V') # vocab #self.V,一个关于词汇的vsm矩阵 r_args = T.stack(*m) #将m并在一起,和concatenate的区别在于,这里会增加1维。感觉这里没啥必要。 cost_arr = [0] * 2 * (len(m) - 1) for i in xrange(len(m) - 1): #len(m)-1,就是事实的个数,原论文里是2 cost_arr[2 * i], _ = theano.scan( lambda f_bar, t: T.switch( T.or_(T.eq(t, f[i]), T.eq(t, T.shape(L)[0] - 1)), 0, T.largest(gamma - s_Ot(T.stack(*m[:i + 1]), f[i], t, L), 0) ), sequences=[L, T.arange(T.shape(L)[0]) ]) # 在这里,f[i]代表第i个事实,而t代表随机生成的(在这里是顺序循环)的错误答案。 # T.eq(t, f[i]),即t命中了事实;或者是T.eq(t, T.shape(L)[0]-1),即t是最后一句(问句),返回0。否则返回后者largest部分。 # 看论文p14公式,它的加总,排除了命中事实这种情况(t!=f)。另一方面,问句也不需要进入计算。 # m[:i+1]输入前i个事实,外带一个id。 # f_bar没啥用 # T.switch(T.or_(T.eq(t, f[i]), T.eq(t, T.shape(L)[0] - 1)), 0, # T.largest(gamma - s_Ot(T.stack(*m[:i + 1]), f[i], t, L), 0)) # 改成类似的ifelse如下: # if ((t == f[i])) | (t == T.shape(L)[0] - 1){ # return 0 # }else{ # return T.largest(gamma - s_Ot(T.stack(*m[: i + 1]), f[i], t, L), 0) # } cost_arr[2 * i] /= T.shape(L)[0] #这个除法在原论文里没看到 cost_arr[2 * i + 1], _ = theano.scan(lambda f_bar, t: T.switch( T.or_(T.eq(t, f[i]), T.eq(t, T.shape(L)[0] - 1)), 0, T.largest(gamma + s_Ot(T.stack(*m[:i + 1]), t, f[i], L), 0)), sequences=[ L, T.arange(T.shape(L)[0]) ]) cost_arr[2 * i + 1] /= T.shape(L)[0] # 作者这里做了一个有趣的处理,他设置了一个2倍事实数的数组cost_arr,其中偶数作为公式里的减部分,奇数作为公式里的加部分。 cost1, _ = theano.scan(lambda r_bar, t: T.switch( T.eq(r_t, t), 0, T.largest(gamma - sR(r_args, r_t, L, V) + sR(r_args, t, L, V), 0)), sequences=[V, T.arange(T.shape(V)[0])]) cost1 /= T.shape(V)[0] # 公式的后部分 cost = cost1.sum() for c in cost_arr: cost += c.sum() updates = sgd(cost, [self.U_Ot, self.U_R], learning_rate=self.lr) self.train_model = theano.function(inputs=[r_t, gamma, L, V] + m + f, outputs=[cost], updates=updates) self.sR = theano.function([xs, y_t, L, V], sR(xs, y_t, L, V)) self.s_Ot = theano.function([xs, y_t, yp_t, L], s_Ot(xs, y_t, yp_t, L))
def __init__(self, input_units, hidden_units,gamma,alpha): W = input_units * 3 D = hidden_units V = input_units #self.V = vocab_size self.U_O = theano.shared((np.random.uniform(-1.0, 1.0,(D, W)) * 0.2).astype(np.float32)) self.U_R = theano.shared((np.random.uniform(-1.0, 1.0,(D, W)) * 0.2).astype(np.float32)) f1 = T.ivector("f1") _f1 = T.imatrix("_f1") f2 = T.ivector("f2") _f2 = T.imatrix("_f2") r = T.ivector("r") _r = T.imatrix("_r") x = T.ivector("x") m = T.imatrix("m") #memory v = T.imatrix("v") #vocabulary def S_O(x,y): x_emb = T.dot(self.U_O[:,:V],x) y_emb = T.dot(self.U_O[:,2*V:],y) return T.dot(x_emb.T,y_emb) def S_O_f(x,y): x_emb = T.dot(self.U_O[:,V:2*V],x) y_emb = T.dot(self.U_O[:,2*V:],y) return T.dot(x_emb.T,y_emb) def S_R(x,y): x_emb = T.dot(self.U_R[:,:V],x) y_emb = T.dot(self.U_R[:,2*V:],y) return T.dot(x_emb.T,y_emb) def S_R_f(x,y): x_emb = T.dot(self.U_R[:,V:2*V],x) y_emb = T.dot(self.U_R[:,2*V:],y) return T.dot(x_emb.T,y_emb) cost1,_ = theano.scan( lambda f_bar: T.largest(gamma - S_O(x,f1) + S_O(x,f_bar), 0), sequences = [_f1] ) cost2,_ = theano.scan( lambda f_bar: T.largest(gamma - S_O(x,f2) - S_O_f(f1,f2) + S_O(x,f_bar) + S_O_f(f1,f_bar), 0), sequences = [_f2] ) cost3,_ = theano.scan( lambda r_bar: T.largest(gamma - S_R(x,r) - S_R_f(f1,r) - S_R_f(f2,r) + S_R(x,r_bar) + S_R_f(f1,r_bar) + S_R_f(f2,r_bar), 0), sequences = [_r] ) fact1 = T.argmax(S_O(x,m)) self.getFact1 = theano.function( inputs= [x, m], outputs= fact1 ) fact2 = T.argmax(S_O(x,m) + S_O_f(f1,m)) self.getFact2 = theano.function( inputs= [x, f1, m], outputs= fact2 ) predict = T.argmax(S_R(x,v) + S_R_f(f1,v) + S_R_f(f2,v)) self.getAnswer = theano.function( inputs= [x, f1, f2, v], outputs= predict ) cost = cost1.sum() + cost2.sum() + cost3.sum() grad_o, grad_r = T.grad(cost, [self.U_O,self.U_R]) self.train = theano.function( inputs=[x, f1, _f1, f2, _f2, r, _r], outputs=[cost], updates=[(self.U_O, self.U_O - alpha*grad_o), (self.U_R,self.U_R - alpha*grad_r)] ) self.computeCost = theano.function( inputs=[x, f1, _f1, f2, _f2, r, _r], outputs=[cost] )
#Definition of scoring function score = UTemp.dot( T.tanh(E1Temp.dot(BTemp).dot(T.transpose(E2Temp)) + ATemp.dot(E1E2Temp))) scoringFunction = theano.function( [ATemp, BTemp, UTemp, E1Temp, E2Temp, E1E2Temp], score) #Definition of loss function #calculated score of corrupted triplet to calculate loss scoreCorrupted = UTemp.dot( T.tanh(E1Temp.dot(BTemp).dot(T.transpose(ECTemp)) + ATemp.dot(E1ECTemp))) loss = T.largest(0, (1 - (UTemp.dot( T.tanh(E1Temp.dot(BTemp).dot(T.transpose(E2Temp)) + ATemp.dot(E1E2Temp)) )) + (UTemp.dot( T.tanh(E1Temp.dot(BTemp).dot(T.transpose(ECTemp)) + ATemp.dot(E1ECTemp)))) + regparam * (T.sum(ATemp**2) + T.sum(BTemp**2) + T.sum(UTemp**2)) / 3)) lossFunction = theano.function( [ATemp, BTemp, UTemp, E1Temp, E2Temp, E1E2Temp, ECTemp, E1ECTemp], loss) #Defining gradients dA = T.grad(T.sum(loss), ATemp) dB = T.grad(T.sum(loss), BTemp) dU = T.grad(T.sum(loss), UTemp) dE1 = T.grad(T.sum(loss), E1Temp) dE2 = T.grad(T.sum(loss), E2Temp) dEC = T.grad(T.sum(loss), ECTemp) #Definition of function to return gradients
def __init__(self, rng, input, filter_shape, filter_stride, filter_pad, image_shape, pool_stride,poolsize=(2, 2), normalistation=False, pooling=False): """ Allocate a LeNetConvPoolLayer with shared variable internal parameters. :type rng: numpy.random.RandomState :param rng: a random number generator used to initialize weights :type input: theano.tensor.dtensor4 :param input: symbolic image tensor, of shape image_shape :type filter_shape: tuple or list of length 4 :param filter_shape: (number of filters, num input feature maps, filter height, filter width) :type image_shape: tuple or list of length 4 :param image_shape: (batch size, num input feature maps, image height, image width) :type poolsize: tuple or list of length 2 :param poolsize: the downsampling (pooling) factor (#rows, #cols) """ print(image_shape[1]) print(filter_shape[1]) assert image_shape[1] == filter_shape[1] self.input = input #xavier init: W = np.random.randn(fan_in,fan_out)/np.sqrt(fan_in/2) # there are "num input feature maps * filter height * filter width" # inputs to each hidden unit fan_in = numpy.prod(filter_shape[1:]) # each unit in the lower layer receives a gradient from: # "num output feature maps * filter height * filter width" / # pooling size fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) / numpy.prod(poolsize)) # initialize weights with random weights #W_bound = numpy.sqrt(6. / (fan_in + fan_out)) self.W = theano.shared( numpy.asarray( rng.normal(scale=0.01,size=filter_shape)/numpy.sqrt(fan_in/2), dtype=theano.config.floatX ), borrow=True ) ''' We initialized the weights in each layer from a zero-mean Gaussian distribution with standard deviation 0.01. We initialized the neuron biases in the second, fourth, and fifth convolutional layers, as well as in the fully-connected hidden layers, with the constant 1. This initialization accelerates the early stages of learning by providing the ReLUs with positive inputs. We initialized the neuron biases in the remaining layers with the constant 0. ''' # the bias is a 1D tensor -- one bias per output feature map b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX) self.b = theano.shared(value=b_values, borrow=True) print(input.shape) #npad = ((0,0),(0,0),(filter_pad,filter_pad),(filter_pad,filter_pad)) #input = numpy.pad(input, pad_width=npad, mode='constant', constant_values=0) #image_shape = input.shape # convolve input feature maps with filters if filter_pad>0: conv_out = T.nnet.conv2d( input=input, filters=self.W, filter_shape=filter_shape, image_shape=image_shape, subsample = (filter_stride,filter_stride), border_mode = filter_pad ) else: conv_out = conv.conv2d( input=input, filters=self.W, filter_shape=filter_shape, image_shape=image_shape, subsample = (filter_stride,filter_stride) ) # downsample each feature map individually, using maxpooling if pooling == True: pooled_out = downsample.max_pool_2d( input=conv_out, ds=poolsize, ignore_border=True, st = (pool_stride,pool_stride) ) else: pooled_out=conv_out if normalistation == True: a=1 else: normalised = pooled_out # add the bias term. Since the bias is a vector (1D array), we first # reshape it to a tensor of shape (1, n_filters, 1, 1). Each bias will # thus be broadcasted across mini-batches and feature map # width & height self.preOutput = normalised + self.b.dimshuffle('x', 0, 'x', 'x') print(T.largest(0,(normalised + self.b.dimshuffle('x', 0, 'x', 'x')))) self.output = T.largest(0,(normalised + self.b.dimshuffle('x', 0, 'x', 'x'))) #self.output = T.tanh(normalised + self.b.dimshuffle('x', 0, 'x', 'x'))+(normalised + self.b.dimshuffle('x', 0, 'x', 'x'))*0.001 #self.output = T.switch((pooled_out + self.b.dimshuffle('x', 0, 'x', 'x')) > 0, (pooled_out + self.b.dimshuffle('x', 0, 'x', 'x')), 0 * (pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))) # store parameters of this layer self.params = [self.W, self.b] self.L1 = ( abs(self.W).sum() ) self.L2 = ( (self.W**2).sum() ) # keep track of model input self.input = input self.filter_shape = filter_shape self.image_shape = image_shape
def __init__(self, X, image_shape, threshold=1e-4, radius=9, use_divisor=True): """ Allocate an LCN. :type X: theano.tensor.dtensor4 :param X: symbolic image tensor, of shape image_shape :type image_shape: tuple or list of length 4 :param image_shape: (batch size, num input feature maps, image height, image width) :type threshold: double :param threshold: the threshold will be used to avoid division by zeros :type radius: int :param radius: determines size of Gaussian filter patch (default 9x9) :type use_divisor: Boolean :param use_divisor: whether or not to apply divisive normalization """ # Get Gaussian filter filter_shape = (1, image_shape[1], radius, radius) self.filters = theano.shared(self.gaussian_filter(filter_shape), borrow=True) # Compute the Guassian weighted average by means of convolution convout = conv.conv2d(input=X, filters=self.filters, image_shape=image_shape, filter_shape=filter_shape, border_mode='full') # Subtractive step mid = int(numpy.floor(filter_shape[2] / 2.)) # Make filter dimension broadcastable and subtract centered_X = X - T.addbroadcast(convout[:, :, mid:-mid, mid:-mid], 1) # Boolean marks whether or not to perform divisive step if use_divisor: # Note that the local variances can be computed by using the centered_X # tensor. If we convolve this with the mean filter, that should give us # the variance at each point. We simply take the square root to get our # denominator # Compute variances sum_sqr_XX = conv.conv2d(input=T.sqr(centered_X), filters=self.filters, image_shape=image_shape, filter_shape=filter_shape, border_mode='full') # Take square root to get local standard deviation denom = T.sqrt(sum_sqr_XX[:, :, mid:-mid, mid:-mid]) per_img_mean = denom.mean(axis=[2, 3]) divisor = T.largest(per_img_mean.dimshuffle(0, 1, 'x', 'x'), denom) # Divisise step new_X = centered_X / T.maximum(T.addbroadcast(divisor, 1), threshold) else: new_X = centered_X self.output = new_X
def gated_se(self, y): gates = T.largest(self.output[:, :, :, 2:-2], y[:, :, :, 2:-2]) return T.sum(gates * (self.output[:, :, :, 2:-2] - y[:, :, :, 2:-2]) ** 2) / T.sum(gates)
def create_train(self, lenW, n_facts): ONE = theano.shared(np.float32(1)) ZERO = theano.shared(np.float32(0)) def phi_x1(x_t, L): return T.concatenate( [L[x_t].reshape((-1, )), zeros((2 * lenW, )), zeros((3, ))], axis=0) def phi_x2(x_t, L): return T.concatenate([ zeros((lenW, )), L[x_t].reshape((-1, )), zeros((lenW, )), zeros((3, )) ], axis=0) def phi_y(x_t, L): return T.concatenate( [zeros((2 * lenW, )), L[x_t].reshape((-1, )), zeros((3, ))], axis=0) def phi_t(x_t, y_t, yp_t, L): return T.concatenate([ zeros(3 * lenW, ), T.stack(T.switch(T.lt(x_t, y_t), ONE, ZERO), T.switch(T.lt(x_t, yp_t), ONE, ZERO), T.switch(T.lt(y_t, yp_t), ONE, ZERO)) ], axis=0) def s_Ot(xs, y_t, yp_t, L): result, updates = theano.scan( lambda x_t, t: T.dot( T.dot( T.switch(T.eq(t, 0), phi_x1(x_t, L).reshape((1, -1)), phi_x2(x_t, L).reshape( (1, -1))), self.U_Ot.T), T.dot(self.U_Ot, (phi_y(y_t, L) - phi_y(yp_t, L) + phi_t( x_t, y_t, yp_t, L)))), sequences=[xs, T.arange(T.shape(xs)[0])]) return result.sum() def sR(xs, y_t, L, V): result, updates = theano.scan( lambda x_t, t: T.dot( T.dot( T.switch(T.eq(t, 0), phi_x1(x_t, L).reshape((1, -1)), phi_x2(x_t, L).reshape((1, -1))), self.U_R.T), T.dot(self.U_R, phi_y(y_t, V))), sequences=[xs, T.arange(T.shape(xs)[0])]) return result.sum() x_t = T.iscalar('x_t') y_t = T.iscalar('y_t') yp_t = T.iscalar('yp_t') xs = T.ivector('xs') m = [x_t] + [T.iscalar('m_o%d' % i) for i in xrange(n_facts)] f = [T.iscalar('f%d_t' % i) for i in xrange(n_facts)] r_t = T.iscalar('r_t') gamma = T.scalar('gamma') L = T.fmatrix('L') # list of messages V = T.fmatrix('V') # vocab r_args = T.stack(*m) cost_arr = [0] * 2 * (len(m) - 1) for i in xrange(len(m) - 1): cost_arr[2 * i], _ = theano.scan(lambda f_bar, t: T.switch( T.or_(T.eq(t, f[i]), T.eq(t, T.shape(L)[0] - 1)), 0, T.largest(gamma - s_Ot(T.stack(*m[:i + 1]), f[i], t, L), 0)), sequences=[ L, T.arange(T.shape(L)[0]) ]) cost_arr[2 * i] /= T.shape(L)[0] cost_arr[2 * i + 1], _ = theano.scan(lambda f_bar, t: T.switch( T.or_(T.eq(t, f[i]), T.eq(t, T.shape(L)[0] - 1)), 0, T.largest(gamma + s_Ot(T.stack(*m[:i + 1]), t, f[i], L), 0)), sequences=[ L, T.arange(T.shape(L)[0]) ]) cost_arr[2 * i + 1] /= T.shape(L)[0] cost1, _ = theano.scan(lambda r_bar, t: T.switch( T.eq(r_t, t), 0, T.largest(gamma - sR(r_args, r_t, L, V) + sR(r_args, t, L, V), 0)), sequences=[V, T.arange(T.shape(V)[0])]) cost1 /= T.shape(V)[0] cost = cost1.sum() for c in cost_arr: cost += c.sum() updates = sgd(cost, [self.U_Ot, self.U_R], learning_rate=self.lr) self.train_model = theano.function(inputs=[r_t, gamma, L, V] + m + f, outputs=[cost], updates=updates) self.sR = theano.function([xs, y_t, L, V], sR(xs, y_t, L, V)) self.s_Ot = theano.function([xs, y_t, yp_t, L], s_Ot(xs, y_t, yp_t, L))