def __init__(self, fin, h1, piece1, h2, piece2, outputs, lr, C, pDropHidden1=0.2, pDropHidden2=0.5): # 超参数 self.lr = lr self.C = C self.pDropHidden1 = pDropHidden1 self.pDropHidden2 = pDropHidden2 # 所有需要优化的参数放入列表中,分别是连接权重和偏置 self.params = [] hiddens = [] pieces = [] # maxout层,指定piece表示分段线性函数的段数,即使用隐隐层的个数,维度与一般MLP相同,使用跨通道最大池化 self.params.append(layerMLPParams((fin, h1 * piece1))) hiddens.append(h1) pieces.append(piece1) self.params.append(layerMLPParams((h1, h2 * piece2))) hiddens.append(h2) pieces.append(piece2) self.params.append(layerMLPParams((h2, outputs))) # 定义 Theano 符号变量,并构建 Theano 表达式 self.X = T.matrix('X') self.Y = T.matrix('Y') # 训练集代价函数 YDropProb = model(self.X, self.params, hiddens, pieces, pDropHidden1, pDropHidden2) self.trNeqs = basicUtils.neqs(YDropProb, self.Y) trCrossEntropy = categorical_crossentropy(YDropProb, self.Y) self.trCost = T.mean(trCrossEntropy) + C * basicUtils.regularizer(flatten(self.params)) # 测试验证集代价函数 YFullProb = model(self.X, self.params, hiddens, pieces, 0., 0.) self.vateNeqs = basicUtils.neqs(YFullProb, self.Y) self.YPred = T.argmax(YFullProb, axis=1) vateCrossEntropy = categorical_crossentropy(YFullProb, self.Y) self.vateCost = T.mean(vateCrossEntropy) + C * basicUtils.regularizer(flatten(self.params))
def __init__(self, fin, h1, h2, outputs, lr, C, pDropHidden1=0.2, pDropHidden2=0.5): # 超参数 self.lr = lr self.C = C self.pDropHidden1 = pDropHidden1 self.pDropHidden2 = pDropHidden2 # 所有需要优化的参数放入列表中,分别是连接权重和偏置 self.params = [] # 全连接层,需要计算卷积最后一层的神经元个数作为MLP的输入 self.params.append(layerMLPParams((fin, h1))) self.params.append(layerMLPParams((h1, h2))) self.params.append(layerMLPParams((h2, outputs))) # 定义 Theano 符号变量,并构建 Theano 表达式 self.X = T.matrix('X') self.Y = T.matrix('Y') # 训练集代价函数 YDropProb = model(self.X, self.params, pDropHidden1, pDropHidden2) self.trNeqs = basicUtils.neqs(YDropProb, self.Y) trCrossEntropy = categorical_crossentropy(YDropProb, self.Y) self.trCost = T.mean(trCrossEntropy) + C * basicUtils.regularizer(flatten(self.params)) # 测试验证集代价函数 YFullProb = model(self.X, self.params, 0., 0.) self.vateNeqs = basicUtils.neqs(YFullProb, self.Y) self.YPred = T.argmax(YFullProb, axis=1) vateCrossEntropy = categorical_crossentropy(YFullProb, self.Y) self.vateCost = T.mean(vateCrossEntropy) + C * basicUtils.regularizer(flatten(self.params))
def __init__(self, fin, f1, nin1, f2, nin2, f3, nin3, expand, h1, outputs, lr, C, pDropConv=0.2, pDropHidden=0.5): # 超参数 self.lr = lr self.C = C self.pDropConv = pDropConv self.pDropHidden = pDropHidden # 所有需要优化的参数放入列表中,分别是连接权重和偏置 self.params = [] self.paramsNIN = [] self.paramsConv = [] # 卷积层,w=(本层特征图个数,上层特征图个数,卷积核行数,卷积核列数),b=(本层特征图个数) self.paramsNIN.append(layerNINParams((f1, fin, nin1, 3, 3), expand)) self.paramsNIN.append(layerNINParams((f2, f1 * expand, nin2, 3, 3), expand)) self.paramsNIN.append(layerNINParams((f3, f2 * expand, nin3, 3, 3), expand)) # 全局平均池化层 self.paramsConv.append(layerConvParams((h1, f3 * expand, 1, 1))) self.paramsConv.append(layerConvParams((outputs, h1, 1, 1))) self.params = self.paramsNIN + self.paramsConv # 定义 Theano 符号变量,并构建 Theano 表达式 self.X = T.tensor4('X') self.Y = T.matrix('Y') # 训练集代价函数 YDropProb = model(self.X, self.params, pDropConv, pDropHidden) self.trNeqs = basicUtils.neqs(YDropProb, self.Y) trCrossEntropy = categorical_crossentropy(YDropProb, self.Y) self.trCost = T.mean(trCrossEntropy) + C * basicUtils.regularizer(flatten(self.params)) # 测试验证集代价函数 YFullProb = model(self.X, self.params, 0., 0.) self.vateNeqs = basicUtils.neqs(YFullProb, self.Y) self.YPred = T.argmax(YFullProb, axis=1) vateCrossEntropy = categorical_crossentropy(YFullProb, self.Y) self.vateCost = T.mean(vateCrossEntropy) + C * basicUtils.regularizer(flatten(self.params))
def cost(self, targets, mask=None): prediction = self.p_y_given_x if prediction.ndim == 3: # prediction = prediction.dimshuffle(1,2,0).flatten(2).dimshuffle(1,0) prediction_flat = prediction.reshape(((prediction.shape[0] * prediction.shape[1]), prediction.shape[2]), ndim=2) targets_flat = targets.flatten() mask_flat = mask.flatten() ce = categorical_crossentropy(prediction_flat, targets_flat) * mask_flat else: ce = categorical_crossentropy(prediction, targets) return T.sum(ce)
def cost(self, p_y_given_x, targets, mask=None): prediction = p_y_given_x if prediction.ndim == 3: prediction_flat = prediction.reshape(((prediction.shape[0] * prediction.shape[1]), prediction.shape[2]), ndim=2) targets_flat = targets.flatten() mask_flat = mask.flatten() ce = categorical_crossentropy(prediction_flat, targets_flat) * mask_flat return T.sum(ce) assert mask is None ce = categorical_crossentropy(prediction, targets) return T.sum(ce)
def cost_entry(self, targets, mask=None): prediction = self.p_y_given_x # (9,5,24) if prediction.ndim == 3: # prediction = prediction.dimshuffle(1,2,0).flatten(2).dimshuffle(1,0) prediction_flat = prediction.reshape(((prediction.shape[0] * prediction.shape[1]), prediction.shape[2]), ndim=2) # (45,24) targets_flat = targets.flatten() mask_flat = mask.flatten() ce = categorical_crossentropy(prediction_flat, targets_flat) * mask_flat else: ce = categorical_crossentropy(prediction, targets) ce_entry = ce.reshape((prediction.shape[0], prediction.shape[1]), ndim=2).sum(axis=0) # (5) return ce_entry
def __init__(self, fin, f1, piece1, f2, piece2, f3, piece3, h1, pieceh1, h2, pieceh2, outputs, lr, C, pDropConv=0.2, pDropHidden=0.5): # 超参数 self.lr = lr self.C = C self.pDropConv = pDropConv self.pDropHidden = pDropHidden # 所有需要优化的参数放入列表中,分别是连接权重和偏置 self.params = [] self.paramsCNN = [] self.paramsMLP = [] mapunits = [] pieces = [] # 卷积层,w=(本层特征图个数,上层特征图个数,卷积核行数,卷积核列数),b=(本层特征图个数) self.paramsCNN.append(layerCNNParams((f1 * piece1, fin, 3, 3))) # conv: (32, 32) pool: (16, 16) mapunits.append(f1) pieces.append(piece1) self.paramsCNN.append(layerCNNParams((f2 * piece2, f1, 3, 3))) # conv: (16, 16) pool: (8, 8) mapunits.append(f2) pieces.append(piece2) self.paramsCNN.append(layerCNNParams((f3 * piece3, f2, 3, 3))) # conv: (8, 8) pool: (4, 4) mapunits.append(f3) pieces.append(piece3) # 全连接层,需要计算卷积最后一层的神经元个数作为MLP的输入 self.paramsMLP.append(layerMLPParams((f3 * 4 * 4, h1 * pieceh1))) mapunits.append(h1) pieces.append(pieceh1) self.paramsMLP.append(layerMLPParams((h1, h2 * pieceh2))) mapunits.append(h2) pieces.append(pieceh2) self.paramsMLP.append(layerMLPParams((h2, outputs))) self.params = self.paramsCNN + self.paramsMLP # 定义 Theano 符号变量,并构建 Theano 表达式 self.X = T.tensor4('X') self.Y = T.matrix('Y') # 训练集代价函数 YDropProb = model(self.X, self.params, mapunits, pieces, pDropConv, pDropHidden) self.trNeqs = basicUtils.neqs(YDropProb, self.Y) trCrossEntropy = categorical_crossentropy(YDropProb, self.Y) self.trCost = T.mean(trCrossEntropy) + C * basicUtils.regularizer(flatten(self.params)) # 测试验证集代价函数 YFullProb = model(self.X, self.params, mapunits, pieces, 0., 0.) self.vateNeqs = basicUtils.neqs(YFullProb, self.Y) self.YPred = T.argmax(YFullProb, axis=1) vateCrossEntropy = categorical_crossentropy(YFullProb, self.Y) self.vateCost = T.mean(vateCrossEntropy) + C * basicUtils.regularizer(flatten(self.params))
def categorical_crossentropy_segm(prediction_proba, targets): ''' MODIFICATIONS: - reshape from image-size to array and back ''' shape = T.shape(prediction_proba) pred_mod1 = T.transpose(prediction_proba, (0,2,3,1)) pred_mod = T.reshape(pred_mod1, (-1,shape[1])) if prediction_proba.ndim == targets.ndim: targ_mod1 = T.transpose(targets,(0,2,3,1)) targ_mod = T.reshape(targ_mod1,(-1,shape[1])) else: targ_mod = T.reshape(targets, (-1,)) results = categorical_crossentropy(pred_mod, targ_mod) results = T.reshape(results, (shape[0],shape[2],shape[3])) # QUICK IMPLEMENTATION FOR TWO SPECIFIC CLASSES. NEEDS GENERALIZATION # Weights depending on class occurency: weights = (1.02275, 44.9647) cars_indx, not_cars_indx = T.nonzero(targets), T.nonzero(T.eq(targets,0)) T.set_subtensor(results[cars_indx], results[cars_indx]*float32(weights[1]) ) T.set_subtensor(results[not_cars_indx], results[not_cars_indx]*float32(weights[0]) ) return T.sum(results, axis=(1,2))
def get_expression(model, train, img_in, label_in): conv1 = relu(model.conv(img_in, name='conv1', shape=(32, 3, 3, 3, 1, 1))) conv2 = relu(model.conv(conv1, name='conv2', shape=(32, 32, 3, 3, 1, 1))) pool1 = model.pooling(conv2, name='pool1', shape=(2, 2)) if train: pool1 = drop1.drop(pool1) conv3 = relu(model.conv(pool1, name='conv3', shape=(64, 32, 3, 3, 1, 1))) conv4 = relu(model.conv(conv3, name='conv4', shape=(64, 64, 3, 3, 1, 1))) pool2 = model.pooling(conv4, name='pool2', shape=(2, 2)) if train: pool2 = drop2.drop(pool2) pool2 = pool2.reshape((batch_size, -1)) fc1 = relu(model.fc(pool2, name='fc1', shape=(4096, 512))) if train: fc1 = drop3.drop(fc1) fc2 = softmax(model.fc(fc1, name='fc2', shape=(512, 10))) loss = T.mean(NN.categorical_crossentropy(fc2, label_in)) if train: grads = rmsprop(loss, model.get_params(), lr=var_lr, epsilon=var_lr**2, return_norm=False) return loss, fc2, grads else: return loss, fc2
def create_train_func(layers, lr=0.01): # dims: batch, sequence, vocabulary X = T.tensor3('X') X_batch = T.tensor3('X_batch') # dims: target y = T.ivector('y') y_batch = T.ivector('y_batch') y_hat = get_output(layers['l_out'], X, deterministic=False) train_loss = T.mean(categorical_crossentropy(y_hat, y), axis=0) params = get_all_params(layers['l_out'], trainable=True) updates = adagrad(train_loss, params, lr) train_func = theano.function( inputs=[theano.In(X_batch), theano.In(y_batch)], outputs=train_loss, updates=updates, givens={ X: X_batch, y: y_batch, }, ) return train_func
def masked_softmax_cross_entropy(self, preds, labels, mask): """Softmax cross-entropy loss with masking.""" loss = nnet.categorical_crossentropy(preds, labels) mask = mask.astype('float32') mask /= T.mean(mask) loss *= mask return T.mean(loss)
def sequence_categorical_crossentropy(prediction, targets, mask): prediction_flat = prediction.reshape( ((prediction.shape[0] * prediction.shape[1]), prediction.shape[2]), ndim=2) targets_flat = targets.flatten() mask_flat = mask.flatten() ce = categorical_crossentropy(prediction_flat, targets_flat) return T.sum(ce * mask_flat)
def sequence_categorical_crossentropy(prediction, targets, mask): prediction_flat = prediction.reshape(((prediction.shape[0] * prediction.shape[1]), prediction.shape[2]), ndim=2) targets_flat = targets.flatten() mask_flat = mask.flatten() ce = categorical_crossentropy(prediction_flat, targets_flat) return T.sum(ce * mask_flat)
def __init__(self, fin, f1, f2, f3, f4, f5, f6, h1, h2, outputs, lr, C, pDropConv=0.2, pDropHidden=0.5, batchSize=128): # 超参数 self.lr = lr self.C = C self.pDropConv = pDropConv self.pDropHidden = pDropHidden self.batchSize = batchSize # 所有需要优化的参数放入列表中,分别是连接权重和偏置 self.params = [] self.paramsCNN = [] self.paramsMLP = [] self.indices = [] # 卷积层,w=(本层特征图个数,上层特征图个数,卷积核行数,卷积核列数),b=(本层特征图个数) inputShape = (batchSize, fin, 32, 32) layerShape = addConvLayer(inputShape, (f1, fin, 3, 3), self.paramsCNN, self.indices, 'half', (1, 1)) layerShape = addPoolLayer(layerShape, (2, 2), 'valid') layerShape = addConvLayer(layerShape, (f2, f1, 3, 3), self.paramsCNN, self.indices, 'half', (1, 1)) layerShape = addPoolLayer(layerShape, (2, 2), 'valid') layerShape = addConvLayer(layerShape, (f3, f2, 3, 3), self.paramsCNN, self.indices, 'half', (1, 1)) layerShape = addPoolLayer(layerShape, (2, 2), 'valid') # 全连接层,需要计算卷积最后一层的神经元个数作为MLP的输入 self.paramsMLP.append(layerMLPParams((f3 * np.prod(layerShape[-2:]), h1))) self.paramsMLP.append(layerMLPParams((h1, h2))) self.paramsMLP.append(layerMLPParams((h2, outputs))) self.params = self.paramsCNN + self.paramsMLP # 定义 Theano 符号变量,并构建 Theano 表达式 self.X = T.tensor4('X') self.Y = T.matrix('Y') # 训练集代价函数 YDropProb = model(self.X, self.params, self.indices, pDropConv, pDropHidden) self.trNeqs = basicUtils.neqs(YDropProb, self.Y) trCrossEntropy = categorical_crossentropy(YDropProb, self.Y) self.trCost = T.mean(trCrossEntropy) + C * basicUtils.regularizer(flatten(self.params)) # 测试验证集代价函数 YFullProb = model(self.X, self.params, self.indices, 0., 0.) self.vateNeqs = basicUtils.neqs(YFullProb, self.Y) self.YPred = T.argmax(YFullProb, axis=1) vateCrossEntropy = categorical_crossentropy(YFullProb, self.Y) self.vateCost = T.mean(vateCrossEntropy) + C * basicUtils.regularizer(flatten(self.params))
def cross_entropy(yhat, y): last_dim_len = y.shape[-1] if y.ndim == yhat.ndim: #y is one-hot yhat = T.reshape(-1, last_dim_len) y = T.reshape(-1, last_dim_len) elif y.ndim == yhat.ndim + 1: yhat = T.reshape(-1, last_dim_len) y = T.flatten(y) return T.mean(nnet.categorical_crossentropy(yhatt, yt))
def __init__(self, fin, f1, nin1, f2, nin2, f3, nin3, h1, outputs, lr, C, pDropConv=0.2, pDropHidden=0.5): # 超参数 self.lr = lr self.C = C self.pDropConv = pDropConv self.pDropHidden = pDropHidden # 所有需要优化的参数放入列表中,分别是连接权重和偏置 self.params = [] self.paramsNIN = [] self.paramsFCorConv = [] # 卷积层,w=(本层特征图个数,上层特征图个数,卷积核行数,卷积核列数),b=(本层特征图个数) inputShape = (32, 32) layerShape = addNINLayer(inputShape, (f1, fin, nin1, 3, 3), self.paramsNIN, 'half') layerShape = addPoolLayer(layerShape, (2, 2)) layerShape = addNINLayer(layerShape, (f2, f1, nin2, 3, 3), self.paramsNIN, 'half') layerShape = addPoolLayer(layerShape, (2, 2)) layerShape = addNINLayer(layerShape, (f3, f2, nin3, 3, 3), self.paramsNIN, 'half') layerShape = addPoolLayer(layerShape, (2, 2)) # 全连接层,需要计算卷积最后一层的神经元个数作为MLP的输入 # self.paramsFCorGAP.append(layerMLPParams((f3 * np.prod(layerShape), h1))) # self.paramsFCorGAP.append(layerMLPParams((h1, outputs))) # 全局平均池化层 self.paramsFCorConv.append(layerConvParams((h1, f3, 1, 1))) self.paramsFCorConv.append(layerConvParams((outputs, h1, 1, 1))) self.params = self.paramsNIN + self.paramsFCorConv # 定义 Theano 符号变量,并构建 Theano 表达式 self.X = T.tensor4('X') self.Y = T.matrix('Y') # 训练集代价函数 YDropProb = model(self.X, self.params, pDropConv, pDropHidden) self.trNeqs = basicUtils.neqs(YDropProb, self.Y) trCrossEntropy = categorical_crossentropy(YDropProb, self.Y) self.trCost = T.mean(trCrossEntropy) + C * basicUtils.regularizer(flatten(self.params)) # 测试验证集代价函数 YFullProb = model(self.X, self.params, 0., 0.) self.vateNeqs = basicUtils.neqs(YFullProb, self.Y) self.YPred = T.argmax(YFullProb, axis=1) vateCrossEntropy = categorical_crossentropy(YFullProb, self.Y) self.vateCost = T.mean(vateCrossEntropy) + C * basicUtils.regularizer(flatten(self.params))
def get_loss(self): """ The mean of the categorical cross-entropy tensor. Returns ------- theano expression The loss function. """ input = self.inputs[0] target = self.targets[0] return mean(nnet.categorical_crossentropy(input, target))
def __init__(self, n_inputs, n_hidden, n_outputs, **kwargs): property_defaults = { 'epochs': 1000, 'print_every': 100, 'reg': ..001, 'alpha': .01, 'batch': 0, 'noise_scale': 1.0, 'nonlin': T.nnet.relu } for (prop, default) in property_defaults.items(): setattr(self, prop, kwargs.get(prop, default)) self.arch = [n_inputs] + n_hidden + [n_outputs] final = len(self.arch) - 1 # the true "number of layers" self.X = T.dmatrix('X') self.y = T.dmatrix('y') # one-hot outputs # Construct layers layer_outputs,self.parameters,weights = [self.X],[],[] for index, layer in enumerate(n_hidden+[n_outputs]): nonlin = T.nnet.softmax if index == final-1 else self.nonlin layer = Layer(n_inputs=self.arch[index], n_nodes=self.arch[index+1], inputs=layer_outputs[index], layer=index+1, noise_scale=self.noise_scale, nonlin=nonlin) layer_outputs.append(layer.output) self.parameters.extend([layer.W,layer.b]) weights.append(layer.W) # Expressions for building theano functions output = layer_outputs[-1] prediction = np.argmax(output,axis=1) crossentropy = categorical_crossentropy(output,self.y).mean() regularization = self.reg * sum([(W**2).sum() for W in weights]) cost = crossentropy + regularization # gradients grads = T.grad(cost,self.parameters) updates = [(p,p - self.alpha*g) for p,g in zip(self.parameters, grads)] # build theano functions for gradient descent and model tuning self.epoch = theano.function(inputs = [self.X,self.y], outputs = [], updates = updates) self.count_cost = theano.function(inputs = [self.X,self.y], outputs = cost) self.predict = theano.function(inputs=[self.X], outputs=prediction)
def create_train_func(layers, rnn): if rnn == "LSTM": import model.LSTM rnn = model.LSTM elif rnn == "GRU": import model.GRU rnn = model.GRU elif rnn == "Recurrent": import model.Recurrent rnn = model.Recurrent # dims: batch, sequence, vocabulary X = T.tensor3('X') X_batch = T.tensor3('X_batch') # dims: target y = T.ivector('y') y_batch = T.ivector('y_batch') y_hat = lasagne.layers.get_output(layers['l_out'], X, deterministic=True) train_loss = T.mean(categorical_crossentropy(y_hat, y), axis=0) # define lr lr = T.scalar(name='lr') # ML: if quantized, W updates. Canot work W = lasagne.layers.get_all_params(layers['l_out'], binary=True) W_grads = rnn.compute_rnn_grads(train_loss, layers['l_out']) updates = lasagne.updates.adam( loss_or_grads=W_grads, params=W, learning_rate=lr) # ML: the default upgrade mode is ada # ML: lack of cliping params = lasagne.layers.get_all_params(layers['l_out'], trainable=True, binary=False) updates = OrderedDict(updates.items() + lasagne.updates.adam( loss_or_grads=train_loss, params=params, learning_rate=lr).items()) '''params = lasagne.layers.get_all_params(layers['l_out'], trainable=True) updates = lasagne.updates.adagrad(train_loss, params, lr)''' train_func = theano.function( inputs=[theano.In(X_batch), theano.In(y_batch), lr], outputs=train_loss, updates=updates, givens={ X: X_batch, y: y_batch, }, ) return train_func
def forward_pass(self, sentence, label): """ Given sentence, forward pass """ inpt_tree = self.mgr.get_tree(sentence) golden = label one_hot_golden = np.ones(shape=(self.num_classes, 1)) * 1e-9 one_hot_golden[golden] = 1 stack = self.mgr.get_tree_stack(sentence) node_hidden = [np.zeros(shape=self.mem_dim)] * (len(stack) + 1) node_c = [np.zeros(shape=self.mem_dim)] * (len(stack) + 1) #level-order traversal for node in stack: if node.is_leaf(): # print(node.word) x = self.mgr.get_glove_vec(node.word) node_c[node.idx] = self.leaf_i(x) * self.leaf_u(x) node_hidden[node.idx] = node_c[node.idx] * self.get_tanh( node_c[node.idx]) # node_hidden[node.idx] = self.leaf_o(x) * self.outer_activation(node_c[node.idx]) # node_hidden[node.idx] = self.leaf_o(node_c[node.idx]) # print(node_c[node.idx]) # print(node_hidden[node.idx]) else: child_l, child_r = node.get_child() node_c[node.idx] = ( (self.composer_i(node_hidden[child_r.idx], node_hidden[child_l.idx]) * self.composer_u(node_hidden[child_r.idx], node_hidden[child_l.idx])) + (self.composer_f(node_hidden[child_r.idx], node_hidden[child_l.idx]) * self.combine_c(node_c[child_r.idx], node_c[child_l.idx]))) node_hidden[node.idx] = (self.composer_o( node_hidden[child_r.idx], node_hidden[child_l.idx]) * self.get_tanh(node_c[node.idx])) # node_c[node.idx]=((self.composer_i(node_c[child_r.idx],node_c[child_l.idx])* # self.composer_u(node_c[child_r.idx],node_c[child_l.idx]))+ # (self.composer_f(node_c[child_r.idx],node_c[child_l.idx]) * # self.combine_c(node_c[child_r.idx],node_c[child_l.idx]))) # node_hidden[node.idx]=(self.composer_o(node_c[child_r.idx],node_c[child_l.idx])) #apply softmax pred = self.softmax(node_hidden[inpt_tree.root.idx]) # print("pred:{} \n golden:{}".format(pred,one_hot_golden)) # pred = self.softmax(node_c[inpt_tree.root.idx]) self.error = categorical_crossentropy(one_hot_golden, pred) + self.param_error # print(self.error) return self.error, pred
def seq_cat_crossent(pred, targ, mask, normalize=False): # dim 0 is time, dim 1 is batch, dim 2 is category pred_flat = pred.reshape(((pred.shape[0] * pred.shape[1]), pred.shape[2]), ndim=2) targ_flat = targ.flatten() mask_flat = mask.flatten() ce = categorical_crossentropy(pred_flat, targ_flat) # normalize by batch size and seq length cost = T.sum(ce * mask_flat) if normalize: # normalize by batch and length cost = cost / T.sum(mask_flat) else: # just normalize by batch size cost = cost / pred.shape[1] return cost
def cross_entropy(obj): obj.out = T.clip(obj.out, _EPSILON, 1.0 - _EPSILON) obj.loss = nnet.categorical_crossentropy(obj.out, obj.y).mean() # one-hot to serial obj.out = obj.out.argmax(axis=1)[:, None] obj.y = obj.y.argmax(axis=1)[:, None] # classification accuracy obj.train_acc = (T.eq(obj.out, obj.y).sum().astype(theano.config.floatX) / obj.n_batch) obj.valid_acc = (T.eq(obj.out, obj.y).sum().astype(theano.config.floatX) / obj.x_test_arr.shape[0]) return obj
def __call__(self, prediction_proba, targets): if not self.boosting_weights: raise ValueError("Boosting residuals not set up") shape = T.shape(prediction_proba) pred_mod1 = T.transpose(prediction_proba, (0,2,3,1)) pred_mod = T.reshape(pred_mod1, (-1,shape[1])) if prediction_proba.ndim == targets.ndim: targ_mod1 = T.transpose(targets,(0,2,3,1)) targ_mod = T.reshape(targ_mod1,(-1,shape[1])) else: targ_mod = T.reshape(targets, (-1,)) results = categorical_crossentropy(pred_mod, targ_mod) results *= self.boosting_weights[:results.shape[0]] results = T.reshape(results, (shape[0],shape[2],shape[3])) return T.sum(results, axis=(1,2))
def __init__(self, filterShape, C): self.C = C filterRand = myUtils.elm.convfilterinit(filterShape) self.filterShared = theano.shared(floatX(filterRand), borrow=True) self.X = T.ftensor4() self.Y = T.fmatrix() self.forwardout = self._forward() self.forwardfn = theano.function([self.X], self.forwardout, allow_input_downcast=True) self.sharedBeta = theano.shared(floatX(np.zeros((5760, 10))), borrow=True) predictout = self.forwardout.dot(self.sharedBeta) predictout = softmax(predictout) # 输出必须有softmax限制在0和1之间 self.predictfn = theano.function([self.X], predictout, allow_input_downcast=True) crossentropy = categorical_crossentropy(predictout, self.Y) cost = T.mean(crossentropy) + basicUtils.regularizer([self.filterShared]) updates = gradient.sgdm(cost, [self.filterShared]) self.trainfn = theano.function([self.X, self.Y], cost, updates=updates, allow_input_downcast=True)
def test_asymptotic_32(): """ This test makes sure that our functions behave sensibly when huge values are present """ #TODO: consider adding the optimization of crossentropy into the current mode for the # purpose of running this test for dtype in 'float32', 'float64': if dtype == 'float32': x = tensor.fmatrix() x2 = tensor.fvector() else: x = tensor.dmatrix() x2 = tensor.dvector() y = tensor.lvector() c = categorical_crossentropy(softmax(x + x2), y) f = theano.function([x, y, x2], [c.sum(), tensor.grad(c.sum(), x)], mode='FAST_RUN') if 0: for i, n in enumerate(f.maker.env.toposort()): print i, n xval = numpy.zeros((5, 5), dtype=dtype) x2val = numpy.zeros(5, dtype=xval.dtype) for i in xrange(100): cval, gxval = f(xval, numpy.arange(5), x2val) xval -= 100.3 * gxval #print cval, gxval assert cval == 0 # no problem going to zero error #what about when x gets really big? xval = numpy.zeros((5, 5), dtype=dtype) x2val = numpy.zeros(5, dtype=xval.dtype) for i in xrange(100): cval, gxval = f(xval, numpy.arange(5), x2val) xval += 100000.3 * gxval #print cval, gxval assert cval > 61750000 assert gxval[0, 0] == -1.0 assert gxval[0, 1] == 0.25
def test_asymptotic_32(): """ This test makes sure that our functions behave sensibly when huge values are present """ #TODO: consider adding the optimization of crossentropy into the current # mode for the purpose of running this test for dtype in 'float32', 'float64': if dtype == 'float32': x = tensor.fmatrix() x2 = tensor.fvector() else: x = tensor.dmatrix() x2 = tensor.dvector() y = tensor.lvector() c = categorical_crossentropy(softmax(x + x2), y) f = theano.function([x, y, x2], [c.sum(), tensor.grad(c.sum(), x)], mode='FAST_RUN') if 0: for i, n in enumerate(f.maker.fgraph.toposort()): print i, n xval = numpy.zeros((5, 5), dtype=dtype).astype(dtype) x2val = numpy.zeros(5, dtype=xval.dtype).astype(dtype) for i in xrange(100): cval, gxval = f(xval, numpy.arange(5), x2val) xval -= 100.3 * gxval #print cval, gxval assert cval == 0 # no problem going to zero error #what about when x gets really big? xval = numpy.zeros((5, 5), dtype=dtype) x2val = numpy.zeros(5, dtype=xval.dtype) for i in xrange(100): cval, gxval = f(xval, numpy.arange(5), x2val) xval += 100000.3 * gxval #print cval, gxval assert cval > 61750000 assert gxval[0, 0] == -1.0 assert gxval[0, 1] == 0.25
def test11(): x = T.vector("x") x2 = T.matrix("x2") y = T.ivector("y") #z = T.vector("z") #z = T.nnet.softmax(x) z2 = categorical_crossentropy(x2, y) #fn = theano.function(inputs=[x], outputs=[z]) fn2 = theano.function(inputs=[x2, y], outputs=[z2]) x_in = [1, 2, 3, 4] x2_in = [ [1,2,3], [1,2,3], [1,2,3], [1,2,3] ] y_in = [1, 0, 1, 0] #print fn(x_in) print fn2(x2_in, y_in)
def create_vali_func(layers): # dims: batch, sequence, vocabulary X = T.tensor3('X') X_batch = T.tensor3('X_batch') # dims: target y = T.ivector('y') y_batch = T.ivector('y_batch') y_hat = lasagne.layers.get_output(layers['l_out'], X, deterministic=True) vali_loss = T.mean(categorical_crossentropy(y_hat, y), axis=0) vali_func = theano.function( inputs=[theano.In(X_batch), theano.In(y_batch)], outputs=vali_loss, updates=None, givens={ X: X_batch, y: y_batch, }, ) return vali_func
params.append([w31, w32, b31, b32]) # 全局平均池化 wgap1 = initial.weightInitCNN3((h1, f3 * expand, 1, 1), 'wgap') bgap1 = initial.biasInit((h1,), 'bgap') params.append([wgap1, bgap1]) wgap2 = initial.weightInitCNN3((outputs, h1, 1, 1), 'wgap') bgap2 = initial.biasInit((outputs,), 'bgap') params.append([wgap2, bgap2]) # 定义 Theano 符号变量,并构建 Theano 表达式 X = T.tensor4('X') Y = T.matrix('Y') # 训练集代价函数 YDropProb = model(X, params, 0.2, 0.5) trNeqs = basicUtils.neqs(YDropProb, Y) trCrossEntropy = categorical_crossentropy(YDropProb, Y) trCost = T.mean(trCrossEntropy) + C * basicUtils.regularizer(flatten(params)) # 测试验证集代价函数 YFullProb = model(X, params, 0., 0.) vateNeqs = basicUtils.neqs(YFullProb, Y) YPred = T.argmax(YFullProb, axis=1) vateCrossEntropy = categorical_crossentropy(YFullProb, Y) vateCost = T.mean(vateCrossEntropy) + C * basicUtils.regularizer(flatten(params)) updates = gradient.sgdm(trCost, flatten(params), lr, nesterov=True) train = function( inputs=[X, Y], outputs=[trCost, trNeqs], # 减少返回参数节省时间 updates=updates, allow_input_downcast=True )
lasagne.layers.set_all_param_values(net['prob'], model['param values']) googlenet_features = lasagne.layers.get_output(net['pool5/7x7_s1'], X) # add a mlp on top of this W = theano.shared( numpy.random.uniform(low=-0.1, high=0.1, size=(1024, 10)).astype(numpy.float32), 'linear_weights') b = theano.shared(numpy.zeros(10).astype(numpy.float32)) all_parameters = [W, b] output = tensor.dot(googlenet_features, W) + b pred = tensor.nnet.softmax(output) loss = categorical_crossentropy(pred, targets).mean() loss.name = 'loss' loss_test = categorical_crossentropy(pred, targets).mean() loss.name = 'loss_test' error = tensor.neq(tensor.argmax(pred, axis=1), tensor.argmax(targets, axis=1)).mean() error.name = 'error' error_test = tensor.neq(tensor.argmax(pred, axis=1), tensor.argmax(targets, axis=1)).mean() error.name = 'error_test' # construct update rule learning_rate = 0.01
def cost_validation(self,net): return T.mean(categorical_crossentropy(self.output,net.y))
train_stream = RandomHorizontalFlip(train_stream, which_sources=('features', )) test_dataset = CIFAR10(('train', ), subset=slice_test) test_stream = DataStream.default_stream(test_dataset, iteration_scheme=SequentialScheme( test_dataset.num_examples, batch_size)) test_stream = OneHotEncode(test_stream, which_sources=('targets', )) X = tensor.ftensor4('features') targets = tensor.fmatrix('targets') output, output_test, all_parameters, acc_parameters = get_model( X, batch_size, (32, 32)) loss = categorical_crossentropy(output[:, :, 0, 0], targets).mean() loss.name = 'loss' loss_test = categorical_crossentropy(output_test[:, :, 0, 0], targets).mean() loss.name = 'loss_test' error = tensor.neq(tensor.argmax(output[:, :, 0, 0], axis=1), tensor.argmax(targets, axis=1)).mean() error.name = 'error' error_test = tensor.neq(tensor.argmax(output_test[:, :, 0, 0], axis=1), tensor.argmax(targets, axis=1)).mean() error.name = 'error_test' # construct update rule learning_rate = 0.1
def cost_validation(self, net): return T.mean(categorical_crossentropy(self.output, net.y))
def __init__(self, i_size, h_size, o_size, weights=None): if not weights: self.W_xi = _init_weights((i_size, h_size)) self.W_hi = _init_weights((h_size, h_size)) self.W_ci = _init_weights((h_size, h_size)) self.b_i = _init_zero_vec(h_size) self.W_xf = _init_weights((i_size, h_size)) self.W_hf = _init_weights((h_size, h_size)) self.W_cf = _init_weights((h_size, h_size)) self.b_f = _init_zero_vec(h_size) self.W_xc = _init_weights((i_size, h_size)) self.W_hc = _init_weights((h_size, h_size)) self.b_c = _init_zero_vec(h_size) self.W_xo = _init_weights((i_size, h_size)) self.W_ho = _init_weights((h_size, h_size)) self.W_co = _init_weights((h_size, h_size)) self.b_o = _init_zero_vec(h_size) self.W_hy = _init_weights((h_size, o_size)) self.b_y = _init_zero_vec(o_size) else: self.W_xi = weights['W_xi'] self.W_hi = weights['W_hi'] self.W_ci = weights['W_ci'] self.b_i = weights['b_i'] self.W_xf = weights['W_xf'] self.W_hf = weights['W_hf'] self.W_cf = weights['W_cf'] self.b_f = weights['b_f'] self.W_xc = weights['W_xc'] self.W_hc = weights['W_hc'] self.b_c = weights['b_c'] self.W_xo = weights['W_xo'] self.W_ho = weights['W_ho'] self.W_co = weights['W_co'] self.b_o = weights['b_o'] self.W_hy = weights['W_hy'] self.b_y = weights['b_y'] S_h = _init_zero_vec(h_size) # init values for hidden units S_c = _init_zero_vec(h_size) # init values for cell units S_x = T.matrix() # inputs Y = T.matrix() # targets (S_h_r, S_c_r, S_y_r), _ = theano.scan( fn=_step, sequences=S_x, outputs_info=[S_h, S_c, None], non_sequences=[ self.W_xi, self.W_hi, self.W_ci, self.b_i, self.W_xf, self.W_hf, self.W_cf, self.b_f, self.W_xc, self.W_hc, self.b_c, self.W_xo, self.W_ho, self.W_co, self.b_o, self.W_hy, self.b_y ]) cost = T.mean(categorical_crossentropy(softmax(S_y_r), Y)) updates = _gradient_descent(cost, [ self.W_xi, self.W_hi, self.W_ci, self.b_i, self.W_xf, self.W_hf, self.W_cf, self.b_f, self.W_xc, self.W_hc, self.b_c, self.W_xo, self.W_ho, self.W_co, self.b_o, self.W_hy, self.b_y ]) self.train = theano.function(inputs=[S_x, Y], outputs=cost, updates=updates, allow_input_downcast=True) self.predict = theano.function(inputs=[S_x], outputs=S_y_r, allow_input_downcast=True) S_h_v = T.vector() S_c_v = T.vector() S_h_s, S_c_s, S_y_s = _step(S_x, S_h_v, S_c_v, self.W_xi, self.W_hi, self.W_ci, self.b_i, self.W_xf, self.W_hf, self.W_cf, self.b_f, self.W_xc, self.W_hc, self.b_c, self.W_xo, self.W_ho, self.W_co, self.b_o, self.W_hy, self.b_y) self.sampling = theano.function(inputs=[S_x, S_h_v, S_c_v], outputs=[S_h_s, S_c_s, S_y_s], allow_input_downcast=True)
train_stream = OneHotEncode(train_stream, which_sources=('targets',)) train_stream = RandomHorizontalFlip(train_stream, which_sources=('features',)) test_dataset = CIFAR10(('train',), subset=slice_test) test_stream = DataStream.default_stream( test_dataset, iteration_scheme=SequentialScheme(test_dataset.num_examples, batch_size) ) test_stream = OneHotEncode(test_stream, which_sources=('targets',)) X = tensor.ftensor4('features') targets = tensor.fmatrix('targets') output, output_test, all_parameters, acc_parameters = get_model(X, batch_size, (32, 32)) loss = categorical_crossentropy(output[:,:,0,0], targets).mean() loss.name = 'loss' loss_test = categorical_crossentropy(output_test[:,:,0,0], targets).mean() loss.name = 'loss_test' error = tensor.neq(tensor.argmax(output[:,:,0,0], axis=1), tensor.argmax(targets, axis=1)).mean() error.name = 'error' error_test = tensor.neq(tensor.argmax(output_test[:,:,0,0], axis=1), tensor.argmax(targets, axis=1)).mean() error.name = 'error_test' # construct update rule learning_rate = 0.1 updates, updates_stats = [], [] for param in all_parameters:
def _cost(target_seq, pred_seq): pred_seq = tensor.clip(pred_seq, EPS, 1.0 - EPS) cce = categorical_crossentropy( coding_dist=pred_seq, true_dist=target_seq).mean(axis=0).mean(axis=0) return cce
train_dataset_100, iteration_scheme=SequentialScheme(train_dataset_100.num_examples, batch_size) ) train_stream_100 = OneHotEncode100(train_stream_100, which_sources=('fine_labels',)) lr_cifar100 = learning_rate# * num_train_example/num_train_cifar100 ## build computational graph X = tensor.ftensor4('features') targets = tensor.fmatrix('targets') targets_100 = tensor.fmatrix('fine_labels') output_10, output_test_10, output_100, output_test_100, all_parameters, acc_parameters = get_model(X, batch_size, (32, 32)) loss = alpha * categorical_crossentropy(output_10[:,:,0,0], targets).mean() loss.name = 'loss' loss_100 = (1-alpha) * categorical_crossentropy(output_100[:,:,0,0], targets_100).mean() loss_100.name = 'loss_100' loss_test = categorical_crossentropy(output_test_10[:,:,0,0], targets).mean() loss.name = 'loss_test' loss_100_test = categorical_crossentropy(output_test_100[:,:,0,0], targets_100).mean() loss_100_test.name = 'loss_100_test' error = tensor.neq(tensor.argmax(output_10[:,:,0,0], axis=1), tensor.argmax(targets, axis=1)).mean() error.name = 'error' error_test = tensor.neq(tensor.argmax(output_test_10[:,:,0,0], axis=1), tensor.argmax(targets, axis=1)).mean()
y = T.imatrix('y') index = T.lscalar() # index to a [mini]batch layer0_input = x.reshape((batch_size, 1, img_rows, img_cols)) layer0 = ConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, 1, img_rows, img_cols), filter_shape=(32, 1, 3, 3), poolsize=(2, 2)) layer1_input = layer0.output.flatten(2) layer1 = HiddenLayer(rng, input=layer1_input, n_in=32 * 13 * 13, n_out=num_classes, activation=softmax) cost = T.mean(categorical_crossentropy(layer1.output, y)) acc = T.mean(T.eq(T.argmax(layer1.output, axis=1), T.argmax(y, axis=1))) params = layer1.params + layer0.params grads = T.grad(cost, params) updates = [(param_i, param_i - lr * grad_i) for param_i, grad_i in zip(params, grads)] train_model = theano.function( [index], [cost, acc], updates=updates, givens={ x: train_x[index * batch_size:(index + 1) * batch_size], y: train_y[index * batch_size:(index + 1) * batch_size] }) test_model = theano.function( [index], [cost, acc], givens={
def cost(self, net): "Return the cross entropy cost function" return T.mean(categorical_crossentropy(self.output_dropout,net.y))
def apply(self, facts, facts_mask, question, question_mask, y): """ layout: (10, 5) (10, 5) (13, 1) (13, 1) (1,) return: answer, cost """ table = lookup_table(self.n_in, self.vocab_size) self.params += table.params memory = Memory(facts, facts_mask, self.vocab_size, self.n_in, self.n_grus, table=table) quest = Question(question, question_mask, self.vocab_size, self.n_in, self.n_grus, table=table) self.params += memory.params self.params += quest.params self.exct_net = Executor(self.n_qf, self.n_hts, self.n_label) self.params += self.exct_net.params self.loc_net = LocationNet(n_hids=self.n_lhids,n_layers=1,n_in=self.n_qf+self.n_hts) self.params += self.loc_net.params #init operations # mem = memory.output #Fact Memory (5,n_grus=4) que = quest.output #(1,n_grus=4) l_idx = 0 htm1 = None stops_dist = [] answers_dist = [] lts_dist = [] stops = [] answers = [] lts = [] rewards = [] end_t = self.T-1 for t in xrange(self.T): sf, _ = memory.read(l_idx) #(1,n_grus=4) qf = T.concatenate([que, sf], axis = 1) #layout: (1, 2*n_grus=8) ht, stop_dist, answer_dist = self.exct_net.step_forward(qf, htm1, init_flag=(t==0)) htm1 = ht lt_dist = self.loc_net.apply(que, ht, memory) l_idx = T.argmax(lt_dist) #hard attention #htm1, stop, answer, l_idx = _step(memory, l_idx, que, htm1) answer = T.argmax(answer_dist) #TODO: implement a real sampling terminal = self._terminate(stop_dist[0,0]) end_t = T.switch(terminal, T.minimum(t, end_t), end_t) reward = self.env.step(answer, terminal, y, t, end_t) stops_dist.append(stop_dist) answers_dist.append(answer_dist) lts_dist.append(lt_dist) stops.append(terminal) answers.append(answer) lts.append(l_idx) rewards.append(reward) stops_dist = T.concatenate(stops_dist,axis=0)#ndim=2 answers_dist = T.concatenate(answers_dist,axis=0)#ndim=2 lts_dist = T.concatenate(lts_dist,axis=0)#ndim=2 stops = T.stack(stops,axis=0)#ndim=1 answers = T.stack(answers,axis=0)#ndim=1 lts = T.stack(lts,axis=0)#ndim=1 rewards = T.stack(rewards,axis=0)#ndim=1 # rewards = theano.printing.Print('226 line reward:')(rewards) # stops = theano.printing.Print('227 line reward:')(stops) returns=[] for idx in xrange(self.T): returns.append(T.sum(rewards[idx:])) returns = T.stack(returns, axis=0) # ndim=1 returns = theano.printing.Print('233 line returns:')(returns) self.decoder_cost = memory.cost + quest.cost # answer_dist = theano.printing.Print('230 line answer_dist:')(answer_dist) y = theano.tensor.extra_ops.to_one_hot(y,answer_dist.shape[1]) # y = theano.printing.Print('231 line y:')(y) # answers = theano.printing.Print('233 line answers:')(answers) # TODO: Now, final answer can't simply select the last one! self.sl_cost = T.mean(categorical_crossentropy(answer_dist, y)) stop_cost=self.log_likelihood_sym(actions_var=stops, dist_info_vars={'prob': stops_dist},bernoulli=True) * returns answer_cost=self.log_likelihood_sym(actions_var=answers, dist_info_vars={'prob': answers_dist}) * returns lt_cost=self.log_likelihood_sym(actions_var=lts, dist_info_vars={'prob': lts_dist}) * returns self.rl_cost = -T.mean(stop_cost+answer_cost+lt_cost) #TODO: we need to improve this rl_cost to introduce anti-variance measures return self.rl_cost, self.sl_cost, self.decoder_cost
# scan loops through input sequence and applies step function to each time step (S_h_r, S_c_r, S_y_r), _ = theano.scan(fn=step, sequences=S_x, outputs_info=[S_h, S_c, None], non_sequences=[ W_xi, W_hi, W_ci, b_i, W_xf, W_hf, W_cf, b_f, W_xc, W_hc, b_c, W_xo, W_ho, W_co, b_o, W_hy, b_y ]) # END code inspired by Christian Herta # cost and gradient descent cost = T.mean(categorical_crossentropy(softmax(S_y_r), Y)) def gradient_descent(cost, weights, lr=0.05): grads = T.grad(cost=cost, wrt=weights) updates = [] for w, g in zip(weights, grads): updates.append([w, w - lr * g]) return updates updates = gradient_descent(cost, [ W_xi, W_hi, W_ci, b_i, W_xf, W_hf, W_cf, b_f, W_xc, W_hc, b_c, W_xo, W_ho, W_co, b_o, W_hy, b_y ])
def _cost(target_seq, pred_seq): pred_seq = tensor.clip(pred_seq, EPS, 1.0 - EPS) cce = categorical_crossentropy(coding_dist=pred_seq, true_dist=target_seq).mean(axis=0).mean(axis=0) return cce
# scan loops through input sequence and applies step function to each time step (S_h_r, S_c_r, S_y_r ), _ = theano.scan(fn = step, sequences = S_x, outputs_info = [S_h, S_c, None], non_sequences = [W_xi, W_hi, W_ci, b_i, W_xf, W_hf, W_cf, b_f, W_xc, W_hc, b_c, W_xo, W_ho, W_co, b_o, W_hy, b_y]) # END code inspired by Christian Herta # cost and gradient descent cost = T.mean(categorical_crossentropy(softmax(S_y_r), Y)) def gradient_descent(cost, weights, lr=0.05): grads = T.grad(cost=cost, wrt=weights) updates = [] for w, g in zip(weights, grads): updates.append([w, w - lr * g]) return updates updates = gradient_descent(cost, [W_xi, W_hi, W_ci, b_i, W_xf, W_hf, W_cf, b_f, W_xc, W_hc, b_c, W_xo, W_ho, W_co, b_o, W_hy, b_y])
# shapegap = (10, 4, 1, 1) # wgap = theano.shared(utils.floatX(np.arange(np.prod(shapegap)).reshape(shapegap)), borrow=True) shape21 = (5, 4, 5, 3, 3) shape22 = (5, 4, 5) w21 = theano.shared(basicUtils.floatX(np.arange(np.prod(shape21)).reshape(shape21)), borrow=True) w22 = theano.shared(basicUtils.floatX(np.arange(np.prod(shape22)).reshape(shape22)), borrow=True) shapegap = (10, 5, 1, 1) wgap = theano.shared(basicUtils.floatX(np.arange(np.prod(shapegap)).reshape(shapegap)), borrow=True) layer1 = nin1(X, [w11, w12]) layer1 = nin1(layer1, [w21, w22]) layer1 = gap(layer1, wgap) YDropProb1 = softmax(layer1) trNeqs = basicUtils.neqs(YDropProb1, Y) trCrossEntropy = categorical_crossentropy(YDropProb1, Y) trCost1 = T.mean(trCrossEntropy) updates1 = basicUtils.sgd(trCost1, [w11, w12, wgap], 0.001) f1 = theano.function([X, Y], trCost1, updates=updates1, allow_input_downcast=True) layer2 = nin2(X, [w11, w12], shape11) layer2 = nin2(layer2, [w21, w22], shape21) layer2 = gap(layer2, wgap) YDropProb2 = softmax(layer2) trNeqs = basicUtils.neqs(YDropProb2, Y) trCrossEntropy = categorical_crossentropy(YDropProb2, Y) trCost2 = T.mean(trCrossEntropy) updates2 = basicUtils.sgd(trCost2, [w11, w12, wgap], 0.001) f2 = theano.function([X, Y], trCost2, updates=updates2, allow_input_downcast=True) x = np.random.randint(0, 100, (500, 3, 10, 10))
def __init__(self, i_size, h_size, o_size, weights=None): if not weights: self.W_xi = _init_weights((i_size, h_size)) self.W_hi = _init_weights((h_size, h_size)) self.W_ci = _init_weights((h_size, h_size)) self.b_i = _init_zero_vec(h_size) self.W_xf = _init_weights((i_size, h_size)) self.W_hf = _init_weights((h_size, h_size)) self.W_cf = _init_weights((h_size, h_size)) self.b_f = _init_zero_vec(h_size) self.W_xc = _init_weights((i_size, h_size)) self.W_hc = _init_weights((h_size, h_size)) self.b_c = _init_zero_vec(h_size) self.W_xo = _init_weights((i_size, h_size)) self.W_ho = _init_weights((h_size, h_size)) self.W_co = _init_weights((h_size, h_size)) self.b_o = _init_zero_vec(h_size) self.W_hy = _init_weights((h_size, o_size)) self.b_y = _init_zero_vec(o_size) else: self.W_xi = weights['W_xi'] self.W_hi = weights['W_hi'] self.W_ci = weights['W_ci'] self.b_i = weights['b_i'] self.W_xf = weights['W_xf'] self.W_hf = weights['W_hf'] self.W_cf = weights['W_cf'] self.b_f = weights['b_f'] self.W_xc = weights['W_xc'] self.W_hc = weights['W_hc'] self.b_c = weights['b_c'] self.W_xo = weights['W_xo'] self.W_ho = weights['W_ho'] self.W_co = weights['W_co'] self.b_o = weights['b_o'] self.W_hy = weights['W_hy'] self.b_y = weights['b_y'] S_h = _init_zero_vec(h_size) # init values for hidden units S_c = _init_zero_vec(h_size) # init values for cell units S_x = T.matrix() # inputs Y = T.matrix() # targets (S_h_r, S_c_r, S_y_r ), _ = theano.scan(fn = _step, sequences = S_x, outputs_info = [S_h, S_c, None], non_sequences = [self.W_xi, self.W_hi, self.W_ci, self.b_i, self.W_xf, self.W_hf, self.W_cf, self.b_f, self.W_xc, self.W_hc, self.b_c, self.W_xo, self.W_ho, self.W_co, self.b_o, self.W_hy, self.b_y]) cost = T.mean(categorical_crossentropy(softmax(S_y_r), Y)) updates = _gradient_descent(cost, [self.W_xi, self.W_hi, self.W_ci, self.b_i, self.W_xf, self.W_hf, self.W_cf, self.b_f, self.W_xc, self.W_hc, self.b_c, self.W_xo, self.W_ho, self.W_co, self.b_o, self.W_hy, self.b_y]) self.train = theano.function(inputs=[S_x, Y], outputs=cost, updates=updates, allow_input_downcast=True) self.predict = theano.function(inputs=[S_x], outputs=S_y_r, allow_input_downcast=True) S_h_v = T.vector() S_c_v = T.vector() S_h_s, S_c_s, S_y_s = _step(S_x, S_h_v, S_c_v, self.W_xi, self.W_hi, self.W_ci, self.b_i, self.W_xf, self.W_hf, self.W_cf, self.b_f, self.W_xc, self.W_hc, self.b_c, self.W_xo, self.W_ho, self.W_co, self.b_o, self.W_hy, self.b_y) self.sampling = theano.function(inputs = [S_x, S_h_v, S_c_v], outputs = [S_h_s, S_c_s, S_y_s], allow_input_downcast=True)
dtype=theano.config.floatX)) w_ho = theano.shared( np.array(np.random.normal(0, 0.1, (n_outputs, n_hidden)), dtype=theano.config.floatX)) b_ih = theano.shared(np.array(np.random.normal(0, 0.1, (n_hidden, 1)), dtype=theano.config.floatX), broadcastable=(False, True)) b_ho = theano.shared(np.array(np.random.normal(0, 0.1, (n_outputs, 1)), dtype=theano.config.floatX), broadcastable=(False, True)) # Forward pass h_hidden = nnet.sigmoid(T.dot(w_ih, inputdata) + b_ih) h_output = (T.dot(w_ho, h_hidden) + b_ho) out_softmax = nnet.softmax(h_output.T).T cost_expression = nnet.categorical_crossentropy(out_softmax.T, target.T).sum() accuracy_train = accuracy_calc(T.argmax(out_softmax, axis=0), T.argmax(target, axis=0)) # Backward pass deriv_cost_w_ho = T.grad(cost_expression, w_ho) / batchSize deriv_cost_w_ih = T.grad(cost_expression, w_ih) / batchSize deriv_cost_b_ho = T.grad(cost_expression, b_ho) / batchSize deriv_cost_b_ih = T.grad(cost_expression, b_ih) / batchSize updates = [(w_ho, w_ho - learningRate * deriv_cost_w_ho), (w_ih, w_ih - learningRate * deriv_cost_w_ih), (b_ho, b_ho - learningRate * deriv_cost_b_ho), (b_ih, b_ih - learningRate * deriv_cost_b_ih)]
def _cost(target_seq, prob_pred_seq): prob_pred_seq = tensor.clip(prob_pred_seq, EPS, 1.0 - EPS) cce = categorical_crossentropy( prob_pred_seq, target_seq).mean(axis=2).mean(axis=0).mean(axis=0) return cce
def __init__(self, fin, f1, f2, f3, hiddens, outputs, lr=0.001, C=0.001, pDropConv=0.2, pDropHidden=0.5): self.params = [] # 所有需要优化的参数放入列表中,分别是连接权重和偏置 # 卷积层,w=(本层特征图个数,上层特征图个数,卷积核行数,卷积核列数),b=(本层特征图个数) # conv: (32, 32) = (32, 32) # pool: (32/2, 32/2) = (16, 16) wconv1 = initial.weightInit((f1, fin, 3, 3), 'wconv1') bconv1 = initial.biasInit((f1,), 'bconv1') self.params.append([wconv1, bconv1]) # conv: (16, 16) = (16, 16) # pool: (16/2, 16/2) = (8, 8) wconv2 = initial.weightInit((f2, f1, 3, 3), 'wconv2') bconv2 = initial.biasInit((f2,), 'bconv2') self.params.append([wconv2, bconv2]) # conv: (8, 8) = (8, 8) # pool: (8/2, 8/2) = (4, 4) wconv3 = initial.weightInit((f3, f2, 3, 3), 'wconv3') bconv3 = initial.biasInit((f3,), 'bconv3') self.params.append([wconv3, bconv3]) # 全连接层,需要计算卷积最后一层的神经元个数作为MLP的输入 wfull = initial.weightInit((f3 * 4 * 4, hiddens), 'wfull') bfull = initial.biasInit((hiddens,), 'bfull') self.params.append([wfull, bfull]) wout = initial.weightInit((hiddens, outputs), 'wout') bout = initial.biasInit((outputs,), 'bout') self.params.append([wout, bout]) # 定义 Theano 符号变量,并构建 Theano 表达式 X = T.tensor4('X') Y = T.matrix('Y') YDropProb = model(X, self.params, pDropConv, pDropHidden) YFullProb = model(X, self.params, 0., 0.) YPred = T.argmax(YFullProb, axis=1) # 训练集代价函数 trCrossEntropy = categorical_crossentropy(YDropProb, Y) trCost = T.mean(trCrossEntropy) + C * basicUtils.regularizer(flatten(self.params)) updates = gradient.rmsprop(trCost, flatten(self.params), lr=lr) # 测试验证集代价函数 vateCrossEntropy = categorical_crossentropy(YFullProb, Y) vateCost = T.mean(vateCrossEntropy) + C * basicUtils.regularizer(flatten(self.params)) # 编译函数 # 训练函数,输入训练集,输出训练损失和误差 self.train = function( inputs=[In(X, borrow=True, allow_downcast=True), In(Y, borrow=True, allow_downcast=True)], outputs=[Out(trCost, borrow=True), Out(basicUtils.neqs(YDropProb, Y), borrow=True)], # 减少返回参数节省时间 updates=updates, allow_input_downcast=True ) # 验证或测试函数,输入验证或测试集,输出损失和误差,不进行更新 self.valtest = function( inputs=[In(X, borrow=True, allow_downcast=True), In(Y, borrow=True, allow_downcast=True)], outputs=[Out(vateCost, borrow=True), Out(basicUtils.neqs(YFullProb, Y), borrow=True)], # 减少返回参数节省时间 allow_input_downcast=True ) # 预测函数,只输入X,输出预测结果 self.predict = function( inputs=[In(X, borrow=True, allow_downcast=True)], outputs=Out(YPred, borrow=True), allow_input_downcast=True )
def main(num_epochs=NUM_EPOCHS): print("Building network ...") l_in = lasagne.layers.InputLayer(shape=(None, None, vocab_size)) l_forward_1 = lasagne.layers.LSTMLayer( l_in, N_HIDDEN, grad_clipping=GRAD_CLIP, nonlinearity=lasagne.nonlinearities.tanh) l_forward_2 = lasagne.layers.LSTMLayer( l_forward_1, N_HIDDEN, grad_clipping=GRAD_CLIP, nonlinearity=lasagne.nonlinearities.tanh, only_return_final=True) l_out = lasagne.layers.DenseLayer(l_forward_2, num_units=vocab_size, W=lasagne.init.Normal(), nonlinearity=softmax) target_values = T.ivector('target_output') network_output = lasagne.layers.get_output(l_out) cost = categorical_crossentropy(network_output, target_values).mean() all_params = lasagne.layers.get_all_params(l_out, trainable=True) print("Computing updates ...") updates = lasagne.updates.adagrad(cost, all_params, LEARNING_RATE) print("Compiling functions ...") train = theano.function([l_in.input_var, target_values], cost, updates=updates, allow_input_downcast=True) probs = theano.function([l_in.input_var], network_output, allow_input_downcast=True) def try_it_out(N=SEQ_OUT_LEN): """ Generates output for the predefined generation_phrase. More general example of generation function is in the application script. """ assert (len(generation_phrase) >= SEQ_LENGTH) sample_ix = [] x, _ = gen_data( len(generation_phrase) - SEQ_LENGTH, 1, generation_phrase, False) for i in range(N): ix = np.argmax(probs(x).ravel()) sample_ix.append(ix) x[:, 0:SEQ_LENGTH - 1, :] = x[:, 1:, :] x[:, SEQ_LENGTH - 1, :] = 0 x[0, SEQ_LENGTH - 1, sample_ix[-1]] = 1. random_snippet = generation_phrase + ''.join(ix_to_char[ix] for ix in sample_ix) print("----\n %s \n----" % random_snippet) print("Training ...") print("Seed used for text generation is: " + generation_phrase) p = 0 try: for it in range(int(data_size * num_epochs / BATCH_SIZE)): try_it_out() avg_cost = 0 for _ in range(PRINT_FREQ): x, y = gen_data(p) p += SEQ_LENGTH + BATCH_SIZE - 1 if (p + BATCH_SIZE + SEQ_LENGTH >= data_size): print('Carriage Return') p = 0 avg_cost += train(x, y) print("Epoch {} average loss = {}".format( it * 1.0 * PRINT_FREQ / data_size * BATCH_SIZE, avg_cost / PRINT_FREQ)) netname = 'epoch-{:.5f}.pkl' \ .format(it * 1.0 * PRINT_FREQ / data_size * BATCH_SIZE) with open('nets/' + netname, 'wb') as f: pickle.dump(lasagne.layers.get_all_param_values(l_out), f) except KeyboardInterrupt: pass
def cost(self, net): "Return the cross entropy cost function" return T.mean(categorical_crossentropy(self.output_dropout, net.y))
wconv3 = initial.weightInit((f3, f2, 3, 3), 'wconv3') bconv3 = initial.biasInitCNN2((f3,), 'bconv3') prams.append([wconv3, bconv3]) # 全连接层,需要计算卷积最后一层的神经元个数作为MLP的输入 wfull = initial.weightInit2MLP((f3 * 3 * 3, hiddens), 'wfull') bfull = initial.biasInitCNN2((hiddens,), 'bfull') prams.append([wfull, bfull]) wout = initial.weightInit2MLP((hiddens, outputs), 'wout') bout = initial.biasInitCNN2((outputs,), 'bout') prams.append([wout, bout]) # 构建 Theano 表达式 YDropProb = model(X, prams, 0.2, 0.5) YFullProb = model(X, prams, 0., 0.) YPred = T.argmax(YFullProb, axis=1) crossEntropy = categorical_crossentropy(YDropProb, Y) cost = T.mean(crossEntropy) + C * basicUtils.regularizer(flatten(prams)) updates = gradient.rmsprop(cost, flatten(prams), lr=learningRate) # 编译函数 # 训练函数,输入训练集,输出测试误差 train = function( inputs=[In(X, borrow=True, allow_downcast=True), In(Y, borrow=True, allow_downcast=True)], outputs=Out(basicUtils.neqs(YDropProb, Y), borrow=True), # 减少返回参数节省时间 updates=updates, allow_input_downcast=True ) # 测试或验证函数,输入测试或验证集,输出测试或验证误差,不进行更新 test = function( inputs=[In(X, borrow=True, allow_downcast=True),
def _cost(target_seq, prob_pred_seq): prob_pred_seq = tensor.clip(prob_pred_seq, EPS, 1.0 - EPS) cce = categorical_crossentropy(prob_pred_seq, target_seq).mean(axis=2).mean(axis=0).mean(axis=0) return cce
iteration_scheme=SequentialScheme(train_dataset_100.num_examples, batch_size)) train_stream_100 = OneHotEncode100(train_stream_100, which_sources=('fine_labels', )) lr_cifar100 = learning_rate # * num_train_example/num_train_cifar100 ## build computational graph X = tensor.ftensor4('features') targets = tensor.fmatrix('targets') targets_100 = tensor.fmatrix('fine_labels') output_10, output_test_10, output_100, output_test_100, all_parameters, acc_parameters = get_model( X, batch_size, (32, 32)) loss = alpha * categorical_crossentropy(output_10[:, :, 0, 0], targets).mean() loss.name = 'loss' loss_100 = (1 - alpha) * categorical_crossentropy(output_100[:, :, 0, 0], targets_100).mean() loss_100.name = 'loss_100' loss_test = categorical_crossentropy(output_test_10[:, :, 0, 0], targets).mean() loss.name = 'loss_test' loss_100_test = categorical_crossentropy(output_test_100[:, :, 0, 0], targets_100).mean() loss_100_test.name = 'loss_100_test' error = tensor.neq(tensor.argmax(output_10[:, :, 0, 0], axis=1),
def nll(self, target): """Return the negative log-likelihood of the prediction of this model under a given target distribution. Passing symbolic integers here means 1-hot. WRITEME """ return nnet.categorical_crossentropy(self.output, target)