def step(x_t,y_t,h_tm1,Wx,Wh,bh,Wy,by,lr,switch): h_t = relu(T.dot(x_t,Wx)+T.dot(h_tm1,Wh)+bh) yo_t = relu(T.dot(h_t,Wy)+by) updates = OrderedDict() # Train the RNN: backprop (loss + DNI output) loss = T.mean(T.square(yo_t-y_t)) dni_out = self.dni.output(h_t) for param in self.params: dlossdparam = T.grad(loss,param) dniJ = T.Lop(h_t,param,dni_out,disconnected_inputs='ignore') updates[param] = param-lr*T.switch(T.gt(switch,0), dlossdparam+dniJ, dlossdparam) # Update the DNI (from the last step) # re-calculate the DNI prediction from the last step # note: can't be passed through scan or T.grad won't work dni_out_old = self.dni.output(h_tm1) # dni_target: current loss backprop'ed + new dni backprop'ed dni_target = T.grad(loss,h_tm1) \ +T.Lop(h_t,h_tm1,dni_out) dni_error = T.sum(T.square(dni_out_old-dni_target)) for param in self.dni.params: gparam = T.grad(dni_error,param) updates[param] = param-lr*gparam return [h_t,loss,dni_error],updates
def model(X, params, featMaps, pieces, pDropConv, pDropHidden): lnum = 0 # conv: (32, 32) pool: (16, 16) layer = conv2d(X, params[lnum][0], border_mode='half') + \ params[lnum][1].dimshuffle('x', 0, 'x', 'x') layer = maxout(layer, featMaps[lnum], pieces[lnum]) layer = pool_2d(layer, (2, 2), st=(2, 2), ignore_border=False, mode='max') layer = basicUtils.dropout(layer, pDropConv) lnum += 1 # conv: (16, 16) pool: (8, 8) layer = conv2d(layer, params[lnum][0], border_mode='half') + \ params[lnum][1].dimshuffle('x', 0, 'x', 'x') layer = maxout(layer, featMaps[lnum], pieces[lnum]) layer = pool_2d(layer, (2, 2), st=(2, 2), ignore_border=False, mode='max') layer = basicUtils.dropout(layer, pDropConv) lnum += 1 # conv: (8, 8) pool: (4, 4) layer = conv2d(layer, params[lnum][0], border_mode='half') + \ params[lnum][1].dimshuffle('x', 0, 'x', 'x') layer = maxout(layer, featMaps[lnum], pieces[lnum]) layer = pool_2d(layer, (2, 2), st=(2, 2), ignore_border=False, mode='max') layer = basicUtils.dropout(layer, pDropConv) lnum += 1 layer = T.flatten(layer, outdim=2) layer = T.dot(layer, params[lnum][0]) + params[lnum][1].dimshuffle('x', 0) layer = relu(layer, alpha=0) layer = basicUtils.dropout(layer, pDropHidden) lnum += 1 layer = T.dot(layer, params[lnum][0]) + params[lnum][1].dimshuffle('x', 0) layer = relu(layer, alpha=0) layer = basicUtils.dropout(layer, pDropHidden) lnum += 1 return softmax(T.dot(layer, params[lnum][0]) + params[lnum][1].dimshuffle('x', 0)) # 如果使用nnet中的softmax训练产生NAN
def metaOp1(i, j, X, w1, w2, b1, b2): # (n,1,r,c)**(16,1,3,3)=(n,16,r,c) hiddens = conv2d(X[:, j, :, :, :], w1[i, j, :, :, :, :], border_mode='half') + b1[i, j, :, :, :, :] hiddens = relu(hiddens, alpha=0) # (n,16,r,c)**(1,16,1,1)=(n,1,r,c) outputs = conv2d(hiddens, w2[i, j, :, :, :, :], border_mode='valid') + b2[i, j, :, :, :, :] outputs = relu(outputs, alpha=0) return outputs
def __init__(self, rng, input, filter_shape, W=None, b=None, stride=(1, 1), layer_index=0): self.layername = 'Conv' + str(layer_index) self.input = input # num input feature maps * filter height * filter width fan_in = numpy.prod(filter_shape[1:]) # num output feature maps * filter height * filter width fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:])) # W if W is None: W_bound = numpy.sqrt(6. / (fan_in + fan_out)) self.W = theano.shared(numpy.asarray(rng.uniform( low=-W_bound, high=W_bound, size=filter_shape), dtype=theano.config.floatX), borrow=True) else: self.W = theano.shared(value=W.astype(theano.config.floatX), borrow=True) self.W.name = self.layername + '#W' # b if b is None: self.b = theano.shared(numpy.zeros((filter_shape[0], ), dtype=theano.config.floatX), borrow=True) else: self.b = theano.shared(value=b.astype(theano.config.floatX), borrow=True) self.b.name = self.layername + '#b' # batch size * num feature maps * feature map height * width conv_out = conv2d(input=input, filters=self.W, filter_shape=filter_shape, border_mode='half', subsample=stride) self.output = relu(conv_out + self.b.dimshuffle('x', 0, 'x', 'x')) # prepared for the last 3 FC layers valid_conv_out = conv2d(input=input, filters=self.W, filter_shape=filter_shape, subsample=stride) self.nopad_output = relu(valid_conv_out + self.b.dimshuffle('x', 0, 'x', 'x')) # store parameters of this layer self.params = [self.W, self.b]
def build_network(self, layers): """ Method building the network :param layers: List of layer width :return: None """ if debug: print("Creating Neural Network") print("=======================") print("Input layer neurons: " + str(layers[0])) for i in range(1, len(layers) - 1): print("Hidden layer #" + str(i) + " neurons: " + str(layers[i])) print("Output layer neurons: " + str(layers[-1])) print("") # Define the input variable and the expected values ipt = T.fmatrix("input") expected = T.fmatrix("expected") # Variables holding the weights, activations and noises weights = [] activations = [] # Build the layers for i in range(1, len(layers)): weight = theano.shared(np.asarray(np.random.randn(*(layers[i - 1], layers[i])) * 0.01)) weight.name = "Weight " + str(i) weights.append(weight) if i == 1: activation = Tann.relu(T.dot(ipt, weights[-1])) elif i == (len(layers) - 1): activation = Tann.softmax(T.dot(activations[-1], weights[-1])) else: activation = Tann.relu(T.dot(activations[-1], weights[-1])) activation.name = "Activation " + str(i) activations.append(activation) # Build params list params = [] for i in range(len(weights)): params.append(weights[i]) # Error and backprop error = T.sum((activations[-1] - expected) ** 2) updates = self.rmsprop(error, params) # Define the train function self.train = theano.function(inputs=[ipt, expected], outputs=error, updates=updates, allow_input_downcast=True) # Define the predict function output = T.argmax(activations[-1], axis=1) self.predict = theano.function(inputs=[ipt], outputs=output, allow_input_downcast=True)
def build_network(self, layers): """ Method building the network :param layers: List of layer width :return: None """ if debug: print('Creating Neural Network') print('=======================') print('Input layer neurons: ' + str(layers[0])) for i in range(1, len(layers) - 1): print('Hidden layer #' + str(i) + ' neurons: ' + str(layers[i])) print('Output layer neurons: ' + str(layers[-1])) print('') # Define the input variable and the expected values ipt = T.fmatrix('input') expected = T.fmatrix('expected') # Variables holding the weights, activations and noises weights = [] activations = [] # Build the layers for i in range(1, len(layers)): weight = theano.shared(np.asarray(np.random.randn(*(layers[i - 1], layers[i])) * 0.01)) weight.name = 'Weight ' + str(i) weights.append(weight) if i == 1: activation = Tann.relu(T.dot(ipt, weights[-1])) elif i == (len(layers) - 1): activation = Tann.softmax(T.dot(activations[-1], weights[-1])) else: activation = Tann.relu(T.dot(activations[-1], weights[-1])) activation.name = 'Activation ' + str(i) activations.append(activation) # Build params list params = [] for i in range(len(weights)): params.append(weights[i]) # Error and backprop error = T.sum((activations[-1] - expected)**2) updates = self.rmsprop(error, params) # Define the train function self.train = theano.function(inputs=[ipt, expected], outputs=error, updates=updates, allow_input_downcast=True) # Define the predict function output = T.argmax(activations[-1], axis=1) self.predict = theano.function(inputs=[ipt], outputs=output, allow_input_downcast=True)
def __init__(self, input, params=None, rng=np.random.RandomState(), zsize=100): self.input = input h_input = input h = FullyConnected(input=h_input, n_in=zsize, n_out=4 * 4 * 1024, W=params[0] if params is not None else None, b=params[1] if params is not None else None, rng=rng) h_out = relu(batchnorm(h.output.reshape((input.shape[0], 1024, 4, 4)))) conv1 = ConvLayer(h_out, 4, 8, 1024, 512, rng=rng, W=params[2] if params is not None else None) conv1_out = relu(batchnorm(conv1.output)) conv2 = ConvLayer(conv1_out, 8, 16, 512, 256, rng=rng, W=params[3] if params is not None else None) conv2_out = relu(batchnorm(conv2.output)) conv3 = ConvLayer(conv2_out, 16, 32, 256, 128, rng=rng, W=params[4] if params is not None else None) conv3_out = relu(batchnorm(conv3.output)) conv4 = ConvLayer(conv3_out, 32, 64, 128, 3, rng=rng, W=params[5] if params is not None else None) conv4_out = T.tanh(conv4.output) self.output = conv4_out self.params = h.params + conv1.params + conv2.params + \ conv3.params + conv4.params
def model(X, w1, w2, w3, w4): l1 = relu((conv2d(X, w1, border_mode='full'))) l2 = relu((conv2d(l1, w2, border_mode='valid'))) l3 = relu((conv2d(l2, w3, border_mode='full'))) l4 = conv2d(l3, w4, border_mode='valid') output = l2_norm_layer(l4) return output
def recurrence1(wrut, wrct, urx_pre1, cpt_pre1): # ResNet更新 ur_t = relu(T.dot(wrut, urx_pre1) + urx_pre1) # (d, ) cp_t = relu(T.dot(cpt_pre1, wrct) + cpt_pre1) # (size, d) # att计算生成上下文向量 e_t = T.dot(tanh(T.dot(wa2, ur_t) + T.dot(cp_t, wa3)), wa1) a_t = hsoftmax(e_t) # (size, ) c_t = T.sum(cp_t * a_t.dimshuffle(0, 'x'), axis=0) # (d, ) return [ur_t, cp_t, c_t]
def apply(self, output_length, A, B, padding): A_, garbage = self.GRU_A.apply(padding, states=A) WA_ = self.W.apply(A_) # output_length x batch_size x output_dim B_, garbage = self.GRU_B.apply(WA_, states=B) # batch_size x output_length x output_dim B_ = B_.swapaxes(0,1) fc1_r = relu(self.fc1.apply(B_)) fc2_r = relu(self.fc2.apply(fc1_r)) return fc2_r
def model(X, params, pDropHidden1, pDropHidden2): lnum = 0 layer = T.dot(X, params[lnum][0]) + params[lnum][1].dimshuffle('x', 0) layer = relu(layer, alpha=0) layer = basicUtils.dropout(layer, pDropHidden1) lnum += 1 layer = T.dot(layer, params[lnum][0]) + params[lnum][1].dimshuffle('x', 0) layer = relu(layer, alpha=0) layer = basicUtils.dropout(layer, pDropHidden2) lnum += 1 return softmax(T.dot(layer, params[lnum][0]) + params[lnum][1].dimshuffle('x', 0)) # 如果使用nnet中的softmax训练产生NAN
def fully_layer(params, input, results, nCategories=101, nout=512, weights_path=None): trng = RandomStreams(SEED) # Used for dropout. use_noise = theano.shared(numpy_floatX(0.)) ninput = tensor.prod(input.shape[1:]) denselayer1 = tensor.dot(input, params['fc1_w']) + params['fc1_b'] denselayer1 = relu(denselayer1) denselayer2 = tensor.dot(denselayer1, params['fc2_w']) + params['fc2_b'] denselayer2 = relu(denselayer2) results['fc1'] = denselayer1 results['fc2'] = denselayer2 return params, results
def modelFlow(X, params): lconv1 = relu(conv2d(X, params[0][0], border_mode='full') + params[0][1].dimshuffle('x', 0, 'x', 'x')) lds1 = pool_2d(lconv1, (2, 2)) lconv2 = relu(conv2d(lds1, params[1][0]) + params[1][1].dimshuffle('x', 0, 'x', 'x')) lds2 = pool_2d(lconv2, (2, 2)) lconv3 = relu(conv2d(lds2, params[2][0]) + params[2][1].dimshuffle('x', 0, 'x', 'x')) lds3 = pool_2d(lconv3, (2, 2)) return X, lconv1, lds1, lconv2, lds2, lconv3, lds3
def activation(X, X_test, input_shape, activation_type='relu'): if activation_type=='relu': output = relu(X) output_test = relu(X_test) elif activation_type=='sigmoid': output = theano.tensor.nnet.sigmoid(X) output_test = theano.tensor.nnet.sigmoid(X_test) else: raise Exception('this non linearity does not exist: %s' % activation_type) return output, output_test, [], input_shape
def nin2(X, param, shape): w1, w2 = param map0 = [] for i in xrange(shape[0]): map1 = [] for j in xrange(shape[1]): Xj = X[:, j, :, :].dimshuffle(0, 'x', 1, 2) w1ij = w1[i, j, :, :, :].dimshuffle(0, 'x', 1, 2) w2ij = w2[i, j, :].dimshuffle('x', 0, 'x', 'x') tmp = conv2d(Xj, w1ij, border_mode='valid') tmp = relu(tmp, alpha=0) map1.append(conv2d(tmp, w2ij, border_mode='valid')) map0.append(relu(T.sum(map1, axis=0), alpha=0)) return T.concatenate(map0, axis=1)
def activation(X, X_test, input_shape, activation_type='relu'): if activation_type == 'relu': output = relu(X) output_test = relu(X_test) elif activation_type == 'sigmoid': output = tensor.nnet.sigmoid(X) output_test = tensor.nnet.sigmoid(X_test) else: raise Exception('this non linearity does not exist: %s' % activation_type) return output, output_test, [], input_shape
def recurrence1(wrut, wrct, urx_pre1, cpt_pre1): # ResNet更新 ur_t = relu(T.dot(wrut, urx_pre1.T).T + urx_pre1) # (batch_size, d) cp_t = relu(T.dot(cpt_pre1, wrct) + cpt_pre1) # (batch_size, set_size, d) # att计算生成上下文向量 ur_t_emb = T.dot(wa2, ur_t.T).T.dimshuffle(0, 'x', 1) e_t = T.dot(tanh(ur_t_emb + T.dot(cp_t, wa3)), wa1) # shape=(batch_size, set_size) a_t = softmax(e_t) c_t = T.sum(cp_t * a_t.dimshuffle(0, 1, 'x'), axis=1) return [ ur_t, cp_t, c_t ] # (batch_size, d), (batch_size, set_size, d), (batch_size, d)
def layer(self, x, w, b, dropout=False): m = T.dot(x, w) + b h = nnet.relu(m) if dropout: return self.dropout(h) else: return h
def model(X, w_h, w_h2, w_o, s_h, s_h2, p_use_input, p_use_hidden): #X = dropout(X, p_drop_input) h = PRelu(T.dot(X, w_h), s_h) #h = dropout(h, p_drop_hidden) h2 = relu(T.dot(h, w_h2), s_h2) #h2 = dropout(h2, p_drop_hidden) py_x = softmax(T.dot(h2, w_o)) return h, h2, py_x
def metaOp(i, j, X, w1, w2, b1, b2): # (n,1,r,c)**(16,1,3,3)=(n,16,r,c) hiddens = conv2d(X[:, j, :, :, :], w1[i, j, :, :, :, :], border_mode='half') + b1[i, j, :, :, :, :] hiddens = relu(hiddens, alpha=0) # 在元操作中就需要包含relu激活 # return conv2d(hiddens, w2[i, j, :, :, :, :], border_mode='valid') + b2[i, j, :, :, :, :] # (n,16,r,c)**(1,16,1,1)=(n,1,r,c) outputs = conv2d(hiddens, w2[i, j, :, :, :, :], border_mode='valid') + b2[i, j, :, :, :, :] return T.nnet.relu(outputs)
def model(X, prams, pDropConv, pDropHidden): lconv1 = relu(conv2d(X, prams[0][0], border_mode='full') + prams[0][1].dimshuffle('x', 0, 'x', 'x')) lds1 = pool_2d(lconv1, (2, 2)) lds1 = basicUtils.dropout(lds1, pDropConv) lconv2 = relu(conv2d(lds1, prams[1][0]) + prams[1][1].dimshuffle('x', 0, 'x', 'x')) lds2 = pool_2d(lconv2, (2, 2)) lds2 = basicUtils.dropout(lds2, pDropConv) lconv3 = relu(conv2d(lds2, prams[2][0]) + prams[2][1].dimshuffle('x', 0, 'x', 'x')) lds3 = pool_2d(lconv3, (2, 2)) lds3 = basicUtils.dropout(lds3, pDropConv) lflat = T.flatten(lds3, outdim=2) lfull = relu(T.dot(lflat, prams[3][0]) + prams[3][1]) lfull = basicUtils.dropout(lfull, pDropHidden) return softmax(T.dot(lfull, prams[4][0]) + prams[4][1]) # 如果使用nnet中的softmax训练产生NAN
def optimizer(self): if not hasattr(self, '_optimizer'): df = self.fvector('A') - self.fvector('B') phi = df / (1 + tn.relu(df.norm(2) - 1)) y = tt.dot(self.samples, phi) p = tt.sum(tt.switch(y < 0, 1., 0.)) q = tt.sum(tt.switch(y > 0, 1., 0.)) if not hasattr(self, 'avg_case'): obj = tt.minimum(tt.sum(1. - tt.exp(-tn.relu(y))), tt.sum(1. - tt.exp(-tn.relu(-y)))) else: obj = p * tt.sum(1. - tt.exp(-tn.relu(y))) + q * tt.sum( 1. - tt.exp(-tn.relu(-y))) variables = [self.x0] for robot in self.robots: variables += [robot.x[0]] + robot.u for human in self.human.values(): variables += human.u self._optimizer = Maximizer(obj, variables) return self._optimizer
def step(x_t, y_t, h_tmT, Wx, Wh, bh, Wy, by, lr, switch): # manually build the graph for the inner loop... # passing correct h_tm1 is impossible in nested scans yo_t = [] h_tm1 = h_tmT for t in range(self.steps): h_t = relu(T.dot(x_t[t], Wx) + T.dot(h_tm1, Wh) + bh) yo_t.append(relu(T.dot(h_t, Wy) + by)) h_tm1 = h_t updates = OrderedDict() # Train the RNN: backprop (loss + DNI output) loss = T.mean(T.square(yo_t - y_t)) dni_out = self.dni.output(h_t) for param in self.params: dlossdparam = T.grad(loss, param) dniJ = T.Lop(h_t, param, dni_out, disconnected_inputs='ignore') updates[param] = param - lr * T.switch( T.gt(switch, 0), dlossdparam + dniJ, dlossdparam) # Update the DNI (from the last step) # re-calculate the DNI prediction from the last step # note: can't be passed through scan or T.grad won't work dni_out_old = self.dni.output(h_tmT) # dni_target: current loss backprop'ed + new dni backprop'ed dni_target = T.grad(loss,h_tmT) \ +T.Lop(h_t,h_tmT,dni_out) dni_error = T.sum(T.square(dni_out_old - dni_target)) for param in self.dni.params: gparam = T.grad(dni_error, param) updates[param] = param - lr * gparam return [h_t, loss, dni_error], updates
def __step(img, prev_bbox, state, timestep): conv1 = conv2d(img, conv1_filters, subsample=(conv1_stride, conv1_stride), border_mode='half') act1 = NN.relu(conv1) flat1 = TT.reshape(act1, (-1, conv1_output_dim)) gru_in = TT.concatenate([flat1, prev_bbox], axis=1) gru_z = NN.sigmoid(TT.dot(gru_in, Wz) + TT.dot(state, Uz) + bz) gru_r = NN.sigmoid(TT.dot(gru_in, Wr) + TT.dot(state, Ur) + br) gru_h_ = TT.tanh(TT.dot(gru_in, Wg) + TT.dot(gru_r * state, Ug) + bg) gru_h = (1 - gru_z) * state + gru_z * gru_h_ bbox = TT.tanh(TT.dot(gru_h, W_fc2) + b_fc2) bbox_cx = ((bbox[:, 2] + bbox[:, 0]) / 2 + 1) / 2 * img_row bbox_cy = ((bbox[:, 3] + bbox[:, 1]) / 2 + 1) / 2 * img_col bbox_w = TT.abs_(bbox[:, 2] - bbox[:, 0]) / 2 * img_row bbox_h = TT.abs_(bbox[:, 3] - bbox[:, 1]) / 2 * img_col x = TT.arange(img_row, dtype=T.config.floatX) y = TT.arange(img_col, dtype=T.config.floatX) mx = TT.maximum(TT.minimum(-TT.abs_(x.dimshuffle('x', 0) - bbox_cx.dimshuffle(0, 'x')) + bbox_w.dimshuffle(0, 'x') / 2., 1), 1e-4) my = TT.maximum(TT.minimum(-TT.abs_(y.dimshuffle('x', 0) - bbox_cy.dimshuffle(0, 'x')) + bbox_h.dimshuffle(0, 'x') / 2., 1), 1e-4) bbox_mask = mx.dimshuffle(0, 1, 'x') * my.dimshuffle(0, 'x', 1) new_cls1_f = cls_f new_cls1_b = cls_b mask = act1 * bbox_mask.dimshuffle(0, 'x', 1, 2) new_featmaps = TG.disconnected_grad(TT.set_subtensor(featmaps[:, timestep], mask)) new_featmaps.name = 'new_featmaps' new_probmaps = TG.disconnected_grad(TT.set_subtensor(probmaps[:, timestep], bbox_mask)) new_probmaps.name = 'new_probmaps' train_featmaps = TG.disconnected_grad(new_featmaps[:, :timestep+1].reshape(((timestep + 1) * batch_size, conv1_nr_filters, img_row, img_col))) train_featmaps.name = 'train_featmaps' train_probmaps = TG.disconnected_grad(new_probmaps[:, :timestep+1]) train_probmaps.name = 'train_probmaps' for _ in range(0, 5): train_convmaps = conv2d(train_featmaps, new_cls1_f, subsample=(cls1_stride, cls1_stride), border_mode='half').reshape((batch_size, timestep + 1, batch_size, img_row, img_col)) train_convmaps.name = 'train_convmaps' train_convmaps_selected = train_convmaps[TT.arange(batch_size).repeat(timestep+1), TT.tile(TT.arange(timestep+1), batch_size), TT.arange(batch_size).repeat(timestep+1)].reshape((batch_size, timestep+1, img_row, img_col)) train_convmaps_selected.name = 'train_convmaps_selected' train_predmaps = NN.sigmoid(train_convmaps_selected + new_cls1_b.dimshuffle(0, 'x', 'x', 'x')) train_loss = NN.binary_crossentropy(train_predmaps, train_probmaps).mean() train_grad_cls1_f, train_grad_cls1_b = T.grad(train_loss, [new_cls1_f, new_cls1_b]) new_cls1_f -= train_grad_cls1_f * 0.1 new_cls1_b -= train_grad_cls1_b * 0.1 return (bbox, gru_h, timestep + 1, mask, bbox_mask), {cls_f: TG.disconnected_grad(new_cls1_f), cls_b: TG.disconnected_grad(new_cls1_b), featmaps: TG.disconnected_grad(new_featmaps), probmaps: TG.disconnected_grad(new_probmaps)}
def Interm_Distribution(layers, data): weights_bias = [] mean_matrix = [] std_matrix = [] for layer in layers: if type(layer) is not Dropout: weights_bias.append(layer.get_weights()) m, n = np.array(weights_bias).shape previous_LayerOutput = data for i in range(m): LayerOutput = relu( np.dot(previous_LayerOutput, weights_bias[i][0]) + np.array(weights_bias[i][1])) previous_LayerOutput = LayerOutput mean_matrix.append(np.mean(LayerOutput, axis=0)) std_matrix.append(np.std(LayerOutput, axis=0)) return mean_matrix, std_matrix
def old_main(): x, h, lstm_out, params, tparams = construct_lstm_1d() import pdb; pdb.set_trace() x = tensor.matrix('x', dtype='float32') h = tensor.vector('h', dtype='float32') rng = numpy.random.RandomState(0) ndim = 3 W = theano.shared(rng.randn(ndim, ndim).astype(numpy.float32), name="W", borrow=True) b = theano.shared(rng.randn(ndim).astype(numpy.float32), name="b", borrow=True) components, updates = theano.scan(fn=lambda x, h: nnet.relu(tensor.dot(W,h) + x + b), outputs_info=h, sequences=x) calculate_hiddens = theano.function(inputs=[x, h], outputs=[components]) ntimes = 39 x = rng.randn(ntimes, ndim).astype(numpy.float32) h = rng.randn(ndim).astype(numpy.float32) hs = calculate_hiddens(x, h)
def mlp_forward_prop(num_registers, num_layers, gates, registers, params): """Run forward propogation on the register machine (one step).""" debug = {} # Extract 0th component from all registers. last_layer = registers[:, :, 0] debug['input'] = last_layer # Propogate forward to hidden layers. idx = 0 for i in range(num_layers): W, b, idx = take(params, idx) last_layer = relu(last_layer.dot(W) + b) debug['hidden-%d' % i] = last_layer # Propogate forward to gate coefficient outputs. # In the result list, each result is a list of # coefficients, as gates may have 0, 1, or 2 inputs. controller_coefficients = [] for i, gate in enumerate(gates): coeffs = [] for j in range(gate.arity): W, b, idx = take(params, idx) layer = softmax(last_layer.dot(W) + b) coeffs.append(layer) debug['coeff-gate-%d/%d' % (i, j)] = layer controller_coefficients.append(coeffs) # Forward propogate to new register value coefficients. for i in range(num_registers): W, b, idx = take(params, idx) coeffs = softmax(last_layer.dot(W) + b) controller_coefficients.append(coeffs) debug['coeff-reg-%d' % i] = coeffs # Forward propogate to generate willingness to complete. W, b, idx = take(params, idx) complete = sigmoid(last_layer.dot(W) + b) debug['complete'] = complete return debug, controller_coefficients, complete
def __init__(self, rng, input, filter_shape, image_shape, poolsize=(4, 3)): assert image_shape[1] == filter_shape[1] self.input = input fan_in = numpy.prod(filter_shape[1:]) fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) // numpy.prod(poolsize)) W_bound = numpy.sqrt(6. / (fan_in + fan_out)) self.W = theano.shared( numpy.asarray( rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=theano.config.floatX ), borrow=True ) b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX) self.b = theano.shared(value=b_values, borrow=True) conv_out = conv2d( input=input, filters=self.W, filter_shape=filter_shape, input_shape=image_shape ) pooled_out = pool_2d( input=conv_out, ds=poolsize, ignore_border=False ) self.output = relu(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x')) self.params = [self.W, self.b] self.input = input
def set_output(self): shuffled_output = self._prev_layer.output.dimshuffle(0, 2, 1, 3, 4) pooled_output = pool.pool_3d(shuffled_output, ds=self._pool_size, ignore_border=True, padding=self._padding, mode='average_inc_pad') reshape_pooled = tensor.reshape( pooled_output, (self._input_shape[0], self._input_shape[2])) output1 = tensor.dot(reshape_pooled, self.W1.val) if self.bias: output1 += self.b1.val output1 = relu(output1) output2 = tensor.dot(output1, self.W2.val) if self.bias: output2 += self.b2.val output2 = sigmoid(output2) se_out = tensor.reshape( output2, (self._input_shape[0], 1, self._input_shape[2], 1, 1)) self._output = self._prev_layer.output * se_out
def model_prediction_zero(model_dict, cell_data, classification=True): # Apply cell model cell_model = model_dict["cell_n_hidden"] cell_input = cell_data for l in xrange(len(cell_model)): # cell_input = prelu( # T.dot(cell_input, cell_model[l].W) + cell_model[l].b, # cell_model[l].alpha # ) cell_input = relu( T.dot(cell_input, cell_model[l].W) + cell_model[l].b, cell_model[l].alpha) # NO FUSION # # Combine to multiplicative fusion layer input = cell_input # Finally, apply linearity/non-linearity if classification == True: log_model = model_dict["logistic"] log_layer = T.nnet.softmax( T.dot(input, log_model.W) + log_model.b ) prediction = T.argmax(log_layer, axis=1) prediction = prediction.eval() else : lin_model = model_dict["linear"] lin_layer = T.dot(input, lin_model.W) + lin_model.b prediction = lin_layer[:,0] prediction = prediction.eval() # Return prediction return prediction
def __init__(self, rng, input, n_in, n_out, W=None, b=None, layer_index=0): self.layername = 'Softmax' + str(layer_index) self.input = input # W if W is None: W_bound = numpy.sqrt(6. / (n_in + n_out)) self.W = theano.shared(numpy.asarray(rng.uniform(low=-W_bound, high=W_bound, size=(n_in, n_out)), dtype=theano.config.floatX), borrow=True) else: self.W = theano.shared(value=W.astype(theano.config.floatX), borrow=True) self.W.name = self.layername + '#W' # b if b is None: self.b = theano.shared(numpy.zeros((n_out, ), dtype=theano.config.floatX), borrow=True) else: self.b = theano.shared(value=b.astype(theano.config.floatX), borrow=True) self.b.name = self.layername + '#b' output = relu(T.dot(input, self.W) + self.b) self.softmax_output = softmax(output) self.pred = self.softmax_output.argmax(axis=1) # store parameters of this layer self.params = [self.W, self.b]
def old_main(): x, h, lstm_out, params, tparams = construct_lstm_1d() import pdb pdb.set_trace() x = tensor.matrix('x', dtype='float32') h = tensor.vector('h', dtype='float32') rng = numpy.random.RandomState(0) ndim = 3 W = theano.shared(rng.randn(ndim, ndim).astype(numpy.float32), name="W", borrow=True) b = theano.shared(rng.randn(ndim).astype(numpy.float32), name="b", borrow=True) components, updates = theano.scan( fn=lambda x, h: nnet.relu(tensor.dot(W, h) + x + b), outputs_info=h, sequences=x) calculate_hiddens = theano.function(inputs=[x, h], outputs=[components]) ntimes = 39 x = rng.randn(ntimes, ndim).astype(numpy.float32) h = rng.randn(ndim).astype(numpy.float32) hs = calculate_hiddens(x, h)
def set_output(self): pooled_output = pool.pool_2d(self._prev_layer.output, ds=self._pool_size, ignore_border=True, padding=self._padding, mode='average_inc_pad') # reshape_pooled = tensor.reshape(pooled_output, (self._input_shape[0], self._input_shape[1])) reshape_pooled = pooled_output.flatten(2) output1 = tensor.dot(reshape_pooled, self.W1.val) if self.bias: output1 += self.b1.val output1 = relu(output1) output2 = tensor.dot(output1, self.W2.val) if self.bias: output2 += self.b2.val output2 = sigmoid(output2) print(output2.shape) se_output = tensor.reshape( output2, [self._input_shape[0], self._input_shape[1], 1, 1]) self._output = self._prev_layer.output * se_output
def __init__(self, inpt, filter_shape, image_shape): '''Make HexConvLayer with internal params.''' assert filter_shape[2] == filter_shape[3] assert filter_shape[2] % 2 == 1 assert image_shape[1] == filter_shape[1] fan_in = np.prod(filter_shape[1:]) fan_out = (filter_shape[0] * np.prod(filter_shape[2:])) W_bound = np.sqrt(6. / (fan_in + fan_out)) self.W = theano.shared(np.asarray(rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=theano.config.floatX), borrow=True) # elig trace self.W_e = theano.shared( value=np.zeros(filter_shape, dtype=theano.config.floatX), borrow=True, ) b_values = np.zeros((filter_shape[0], ), dtype=theano.config.floatX) self.b = theano.shared(value=b_values, borrow=True) b_e_values = np.zeros((filter_shape[0], ), dtype=theano.config.floatX) self.b_e = theano.shared(value=b_e_values, borrow=True) conv_out = conv2d(input=inpt, filters=self.W, filter_shape=filter_shape, input_shape=image_shape, border_mode='half') self.output = relu(conv_out + self.b.dimshuffle('x', 0, 'x', 'x')) # cleanup self.params = [self.W, self.b] self.eligs = [self.W_e, self.b_e] self.inpt = inpt
def build(self): # convolve input feawture maps with filters conv_out = conv2d(input=input, filters=self.W, filters_shape=self.filter_shape, image_shape=self.image_shape) if self.non_linear == 'tanh': conv_out_tanh = T.tanh(conv_out + self.b.dimshuffle('x', 0, 'x', 'x')) self.output = downsample.max_pool_2d(input=conv_out_tanh, ds=self.pool_size, ignore_border=True) elif self.non_linear == 'relu': conv_out_relu = relu(conv_out + self.b.dimshuffle('x', 0, 'x', 'x')) self.output = downsample.max_pool_2d(input=conv_out_relu, ds=self.pool_size, ignore_border=True) else: pooled_out = downsample.max_pool_2d(input=conv_out, ds=self.pool_size, ignore_border=True) self.output = pooled_out + self.b.dimshuffle('x', 0, 'x', 'x')
def metaOp2(i, X, w3, b3): # (n,32,r,c)**(2,32,1,1)=(n,2,r,c) hiddens = conv2d(X[i, :, :, :, :], w3[i, :, :, :, :], border_mode='valid') + b3[i, :, :, :, :] hiddens = relu(hiddens, alpha=0) return hiddens
def conv1t1(X, param): wconv, bconv = param layer = conv2d(X, wconv, border_mode='valid') + bconv.dimshuffle('x', 0, 'x', 'x') layer = relu(layer, alpha=0) return layer
def __init__(self, rng, input, filter_shape, image_shape, unpoolsize=(2, 2), switch=None, zero_pad=True, pad_bottom=False, pad_right=False, read_file=False, W_input=None, b_input=None, non_linearity=False, relu_param=0.1, sigmoid=False): assert image_shape[1] == filter_shape[1] self.input = input unpooled_out = self.unpool(input=input, ds=unpoolsize, switch=switch, pad_bottom=pad_bottom, pad_right=pad_right) if zero_pad == True: input = unpooled_out.transpose(2, 0, 1, 3) input = T.concatenate([ T.shape_padleft(T.zeros_like(input[0]), 1), input, T.shape_padleft(T.zeros_like(input[0]), 1) ], axis=0) input = input.transpose(1, 2, 0, 3) input = input.transpose(3, 0, 1, 2) input = T.concatenate([ T.shape_padleft(T.zeros_like(input[0]), 1), input, T.shape_padleft(T.zeros_like(input[0]), 1) ], axis=0) input = input.transpose(1, 2, 3, 0) fan_in = numpy.prod(filter_shape[1:]) fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) / numpy.prod(unpoolsize)) W_bound = numpy.sqrt(6. / (fan_in + fan_out)) self.W = theano.shared(numpy.asarray(rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=theano.config.floatX), borrow=True) b_values = numpy.zeros((filter_shape[0], ), dtype=theano.config.floatX) self.b = theano.shared(value=b_values, borrow=True) if read_file == True: self.W = W_input self.b = b_input image_ht = image_shape[2] * unpoolsize[0] + 2 image_wd = image_shape[3] * unpoolsize[1] + 2 if pad_bottom == True: image_ht += 1 if pad_right == True: image_wd += 1 image_shape = (image_shape[0], image_shape[1], image_ht, image_wd) deconv_out = conv.conv2d(input=input, filters=self.W, filter_shape=filter_shape, image_shape=image_shape, border_mode='valid') self.output = (deconv_out + self.b.dimshuffle('x', 0, 'x', 'x')) if non_linearity == True: if sigmoid == False: self.output = relu(self.output, relu_param) else: self.output = T.nnet.sigmoid(self.output) self.params = [self.W, self.b]
def getOutput(self, X): layer = conv2d(X, self.wconv, border_mode=self.mode, subsample=self.subsample) + \ self.bconv.dimshuffle('x', 0, 'x', 'x') layer = relu(layer, alpha=0) return layer
filename = '%s.%s' % (filename, PARAM_EXTENSION) with open(filename, 'w') as f: pickle.dump(data, f) batchsize = 5 def dist(a, b): return ((a - b)*(a - b)).sum() L = 1 ground_pics = lasagne.layers.get_output(SliceLayer(net['output'], indices=slice(0, batchsize), axis=0)) true_pics = lasagne.layers.get_output(SliceLayer(net['output'], indices=slice(batchsize, 2*batchsize), axis=0)) false_pics = lasagne.layers.get_output(SliceLayer(net['output'], indices=slice(2*batchsize, 3*batchsize), axis=0)) loss = relu(L + dist(ground_pics, true_pics) - dist(ground_pics, false_pics)) loss = loss.mean() ground_pics_val = lasagne.layers.get_output(SliceLayer(net['output'], indices=slice(0, batchsize), axis=0), deterministic=True) true_pics_val = lasagne.layers.get_output(SliceLayer(net['output'], indices=slice(batchsize, 2*batchsize), axis=0), deterministic=True) false_pics_val = lasagne.layers.get_output(SliceLayer(net['output'], indices=slice(2*batchsize, 3*batchsize), axis=0), deterministic=True) val_loss = relu(L + dist(ground_pics_val, true_pics_val) - dist(ground_pics_val, false_pics_val)) val_loss = val_loss.mean() params = lasagne.layers.get_all_params(net['output'], trainable=True) updates = update(loss, params, .0000001)
def layer(x, w): b = np.array([1], dtype=theano.config.floatX) new_x = T.concatenate([x, b]) m = T.dot(w.T, new_x) h = nnet.relu(m) return h
def step(x_t, h_tm1, Wx, Wh, Wy, bh, by): h_t = relu(T.dot(x_t, Wx) + T.dot(h_tm1, Wh) + bh) y_t = relu(T.dot(h_t, Wy) + by) return [h_t, y_t]
def fc(X, param): w, b = param layer = T.dot(X, w) + b.dimshuffle('x', 0) layer = relu(layer, alpha=0) return layer
world.avg_case = True phis = [] '''db['W'] = W db['N'] = N db['S'] = S db['P'] = P db['method'] = method''' if method == 2: f = vector(len(W)) phi = f / tt.maximum(1., f.norm(2)) A = matrix(0, len(W)) y = tt.dot(A, phi) p = tt.sum(tt.switch(y < 0, 1., 0.)) q = tt.sum(tt.switch(y > 0, 1., 0.)) #obj = tt.minimum(tt.sum(1.-tt.exp(-tn.relu(y))), tt.sum(1.-tt.exp(-tn.relu(-y)))) obj = p * tt.sum(1. - tt.exp(-tn.relu(y))) + q * tt.sum( 1. - tt.exp(-tn.relu(-y))) optimizer = Maximizer(obj, [f]) if method == 5: cand_phis = [] for i in range(50): x = np.random.normal(size=len(W)) cand_phis.append(x / np.linalg.norm(x)) if method == 6: cand_phis = [] for i in range(50): world.randomize() cand_phis.append(world.ndf) if method == 5 or method == 6: f = tt.vector() A = matrix(0, len(W))
input_shape=X_shape, filter_shape=W_shape, border_mode='valid') # NOTE: # output_shape = (minibatch, 1, output_rows, output_columns) # Pooling layer pooled_out = pool_2d(input=conv_out_layer, ds=pool_size, ignore_border=True) # NOTE: # ignore_border ==> round down if convolution_output / pool_size is not int # Implement the bias term and nonlinearity b_conv = theano.shared(np.zeros(n_filters,), name='b_conv') conv_out = relu(pooled_out + b_conv.dimshuffle('x',0,'x','x')) conv_out_flat = conv_out.flatten(2) # Fully-connected layers n_full = 1/(np.sqrt(n_inputs)) W1_full = theano.shared(n_full*rng.randn(n_inputs,n_hidden), name='W1_full') b1_full = theano.shared(np.zeros(n_hidden), name='b1_full') W2_full = theano.shared(n_full*rng.randn(n_hidden,n_outputs), name='W2_full') b2_full = theano.shared(np.zeros(n_outputs), name='b2_full') z1 = conv_out_flat.dot(W1_full) + b1_full hidden = relu(z1) z2 = hidden.dot(W2_full) + b2_full output = T.nnet.softmax(z2) prediction = np.argmax(output,axis=1) crossent = T.nnet.categorical_crossentropy(output,y)/n_samples
def gap(X, param): wgap = param layer = conv2d(X, wgap, border_mode='valid') layer = relu(layer, alpha=0) layer = T.mean(layer, axis=(2, 3)) return layer
def __init__(self, rng, input, filter_shape, image_shape, zero_pad=True, poolsize=(2, 2), read_file=False, W_input=None, b_input=None, relu_param=0.1): #print image_shape, filter_shape assert image_shape[1] == filter_shape[1] fan_in = numpy.prod(filter_shape[1:]) fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) / numpy.prod(poolsize)) W_bound = numpy.sqrt(6. / (fan_in + fan_out)) self.W = theano.shared(numpy.asarray(rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=theano.config.floatX), borrow=True) b_values = numpy.zeros((filter_shape[0], ), dtype=theano.config.floatX) self.b = theano.shared(value=b_values, borrow=True) if read_file == True: self.W = W_input self.b = b_input if zero_pad == True: input = input.transpose(2, 0, 1, 3) input = T.concatenate([ T.shape_padleft(T.zeros_like(input[0]), 1), input, T.shape_padleft(T.zeros_like(input[0]), 1) ], axis=0) input = input.transpose(1, 2, 0, 3) input = input.transpose(3, 0, 1, 2) input = T.concatenate([ T.shape_padleft(T.zeros_like(input[0]), 1), input, T.shape_padleft(T.zeros_like(input[0]), 1) ], axis=0) input = input.transpose(1, 2, 3, 0) self.input = input image_shape = (image_shape[0], image_shape[1], image_shape[2] + 2, image_shape[3] + 2) conv_out = conv.conv2d(input=self.input, filters=self.W, filter_shape=filter_shape, image_shape=image_shape, border_mode='valid') pooled_out = downsample.max_pool_2d(input=conv_out, ds=poolsize, ignore_border=True) self.switch = T.abs_(1 - T.sgn( T.abs_(conv_out - pooled_out.repeat(2, axis=2).repeat(2, axis=3)))) self.output = relu(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'), relu_param) self.params = [self.W, self.b]
def gap(X, param): wgap, bgap = param layer = conv2d(X, wgap, border_mode='valid') + bgap.dimshuffle('x', 0, 'x', 'x') layer = relu(layer, alpha=0) layer = T.mean(layer, axis=(2, 3)) return layer
def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2, 2)): """ Allocate a LeNetConvPoolLayer with shared variable internal parameters. :type rng: numpy.random.RandomState :param rng: a random number generator used to initialize weights :type input: theano.tensor.dtensor4 :param input: symbolic image tensor, of shape image_shape :type filter_shape: tuple or list of length 4 :param filter_shape: (number of filters, num input feature maps, filter height, filter width) :type image_shape: tuple or list of length 4 :param image_shape: (batch size, num input feature maps, image height, image width) :type poolsize: tuple or list of length 2 :param poolsize: the downsampling (pooling) factor (#rows, #cols) """ assert image_shape[1] == filter_shape[1] self.input = input # there are "num input feature maps * filter height * filter width" # inputs to each hidden unit fan_in = numpy.prod(filter_shape[1:]) # each unit in the lower layer receives a gradient from: # "num output feature maps * filter height * filter width" / # pooling size fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) // numpy.prod(poolsize)) # initialize weights with random weights W_bound = numpy.sqrt(6. / (fan_in + fan_out)) self.W = theano.shared(numpy.asarray(rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=theano.config.floatX), borrow=True) # the bias is a 1D tensor -- one bias per output feature map b_values = numpy.zeros((filter_shape[0], ), dtype=theano.config.floatX) self.b = theano.shared(value=b_values, borrow=True) # convolve input feature maps with filters conv_out = conv2d(input=input, filters=self.W, filter_shape=filter_shape, input_shape=image_shape) # pool each feature map individually, using maxpooling pooled_out = pool.pool_2d(input=conv_out, ds=poolsize, ignore_border=True) # add the bias term. Since the bias is a vector (1D array), we first # reshape it to a tensor of shape (1, n_filters, 1, 1). Each bias will # thus be broadcasted across mini-batches and feature map # width & height self.output = relu(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x')) # store parameters of this layer self.params = [self.W, self.b] # keep track of model input self.input = input
def __init__(self, rng, input, filter_shape, **kwargs): """ Convolutional layer :type rng: numpy.random.RandomState :param rng: a random number generator used to initialize weights :type input: theano.tensor.dtensor4 :param input: symbolic image tensor, of shape image_shape :type filter_shape: tuple or list of length 4 :param filter_shape: (number of filters, num input feature maps, filter height, filter width) :type image_shape: tuple or list of length 4 :param image_shape: (batch size, num input feature maps, image height, image width) :type poolsize: tuple or list of length 2 :param poolsize: the downsampling (pooling) factor (#rows, #cols) """ pad = kwargs.get('pad', 0) subsample = kwargs.get('subsample', (1,1)) self.W_learning_rate=kwargs.get('W_lr_mult', 0.01) self.W_decay_mult = kwargs.get('W_decay_mult', 0) self.b_learning_rate=kwargs.get('b_lr_mult', 0.01) self.b_decay_mult = kwargs.get('b_decay_mult', 0) self.input = input # there are "num input feature maps * filter height * filter width" # inputs to each hidden unit fan_in = numpy.prod(filter_shape[1:]) # each unit in the lower layer receives a gradient from: # "num output feature maps * filter height * filter width" fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:])) #//numpy.prod(poolsize)) # initialize weights with random weights W_bound = numpy.sqrt(6. / (fan_in + fan_out)) W_value = kwargs.get('W', numpy.asarray( rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=theano.config.floatX )) W_value = W_value.astype(theano.config.floatX) self.W = theano.shared( W_value, borrow=True ) # the bias is a 1D tensor -- one bias per output feature map b_values = kwargs.get('b',numpy.zeros((filter_shape[0],), dtype=theano.config.floatX)) b_values = b_values.astype(theano.config.floatX) self.b = theano.shared(value=b_values, borrow=True) # convolve input feature maps with filters conv_out = conv2d( input=input, filters=self.W, filter_shape=filter_shape, border_mode = pad, subsample = subsample ) # add the bias term. Since the bias is a vector (1D array), we first # reshape it to a tensor of shape (1, n_filters, 1, 1). Each bias will # thus be broadcasted across mini-batches and feature map # width & height self.output = relu(conv_out + self.b.dimshuffle('x', 0, 'x', 'x')) # store parameters of this layer self.params = [self.W, self.b] # keep track of model input self.input = input
def getOutput(self, X): layer = T.dot(X, self.whidden) + self.bhidden.dimshuffle('x', 0) layer = relu(layer, alpha=0) return layer
def metaOp(i, j, X, w1, w2, b1, b2): hiddens = conv2d(X[:, j, :, :, :], w1[i, j, :, :, :, :], border_mode='half') + b1[i, j, :, :, :, :] hiddens = T.nnet.relu(hiddens, alpha=0) outputs = conv2d(hiddens, w2[i, j, :, :, :, :], border_mode='valid') + b2[i, j, :, :, :, :] return relu(outputs)
def output(self, x): next_input = x for n in range(self.n_layers): next_input = relu(T.dot(next_input, self.W[n]) + self.b[n]) return next_input
def layer(x, w): b = np.array([1], dtype=theano.config.floatX) new_x = T.concatenate([x, b]) m = T.dot(w.T, new_x) #theta1: 3x3 * x: 3x1 = 3x1 ;;; theta2: 1x4 * 4x1 h = 2 * nnet.relu(m) return h
def symb_forward(self, symb_input): return relu(symb_input, self.alpha)
def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2, 2)): """ Allocate a LeNetConvPoolLayer with shared variable internal parameters. :type rng: numpy.random.RandomState :param rng: a random number generator used to initialize weights :type input: theano.tensor.dtensor4 :param input: symbolic image tensor, of shape image_shape :type filter_shape: tuple or list of length 4 :param filter_shape: (number of filters, num input feature maps, filter height, filter width) :type image_shape: tuple or list of length 4 :param image_shape: (batch size, num input feature maps, image height, image width) :type poolsize: tuple or list of length 2 :param poolsize: the downsampling (pooling) factor (#rows, #cols) """ assert image_shape[1] == filter_shape[1] self.input = input pad = (2, 2) zero_padding = T.zeros( (image_shape[0], image_shape[1], image_shape[2] + (2 * pad[0]), image_shape[3] + (2 * pad[1])), dtype=theano.config.floatX, ) zero_padding = T.set_subtensor( zero_padding[:, :, pad[0] : image_shape[2] + pad[0], pad[1] : image_shape[3] + pad[1]], input ) image_shape = (image_shape[0], image_shape[1], image_shape[2] + (2 * pad[0]), image_shape[3] + (2 * pad[1])) # there are "num input feature maps * filter height * filter width" # inputs to each hidden unit fan_in = numpy.prod(filter_shape[1:]) # each unit in the lower layer receives a gradient from: # "num output feature maps * filter height * filter width" / # pooling size fan_out = filter_shape[0] * numpy.prod(filter_shape[2:]) // numpy.prod(poolsize) # initialize weights with random weights W_bound = numpy.sqrt(6.0 / (fan_in + fan_out)) self.W = theano.shared( numpy.asarray(rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=theano.config.floatX), borrow=True, ) # the bias is a 1D tensor -- one bias per output feature map b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX) self.b = theano.shared(value=b_values, borrow=True) # convolve input feature maps with filters conv_out = conv2d(input=zero_padding, filters=self.W, filter_shape=filter_shape, image_shape=image_shape) non_linearized = relu(conv_out) # downsample each feature map individually, using maxpooling pooled_out = downsample.max_pool_2d(input=non_linearized, ds=poolsize, ignore_border=True) # add the bias term. Since the bias is a vector (1D array), we first # reshape it to a tensor of shape (1, n_filters, 1, 1). Each bias will # thus be broadcasted across mini-batches and feature map # width & height self.output = pooled_out + self.b.dimshuffle("x", 0, "x", "x") # store parameters of this layer self.params = [self.W, self.b] # keep track of model input self.input = input
def metaOp(i, j, X, w1, w2): hiddens = conv2d(X[:, j, :, :, :], w1[i, j, :, :, :, :], border_mode='valid') hiddens = relu(hiddens, alpha=0) outputs = conv2d(hiddens, w2[i, j, :, :, :, :], border_mode='valid') return relu(outputs)