Esempio n. 1
0
    def __init__(self, fin, h1, piece1, h2, piece2, outputs,
                 lr, C, pDropHidden1=0.2, pDropHidden2=0.5):
        # 超参数
        self.lr = lr
        self.C = C
        self.pDropHidden1 = pDropHidden1
        self.pDropHidden2 = pDropHidden2
        # 所有需要优化的参数放入列表中,分别是连接权重和偏置
        self.params = []
        hiddens = []
        pieces = []
        # maxout层,指定piece表示分段线性函数的段数,即使用隐隐层的个数,维度与一般MLP相同,使用跨通道最大池化
        self.params.append(layerMLPParams((fin, h1 * piece1)))
        hiddens.append(h1)
        pieces.append(piece1)
        self.params.append(layerMLPParams((h1, h2 * piece2)))
        hiddens.append(h2)
        pieces.append(piece2)
        self.params.append(layerMLPParams((h2, outputs)))

        # 定义 Theano 符号变量,并构建 Theano 表达式
        self.X = T.matrix('X')
        self.Y = T.matrix('Y')
        # 训练集代价函数
        YDropProb = model(self.X, self.params, hiddens, pieces, pDropHidden1, pDropHidden2)
        self.trNeqs = basicUtils.neqs(YDropProb, self.Y)
        trCrossEntropy = categorical_crossentropy(YDropProb, self.Y)
        self.trCost = T.mean(trCrossEntropy) + C * basicUtils.regularizer(flatten(self.params))

        # 测试验证集代价函数
        YFullProb = model(self.X, self.params, hiddens, pieces, 0., 0.)
        self.vateNeqs = basicUtils.neqs(YFullProb, self.Y)
        self.YPred = T.argmax(YFullProb, axis=1)
        vateCrossEntropy = categorical_crossentropy(YFullProb, self.Y)
        self.vateCost = T.mean(vateCrossEntropy) + C * basicUtils.regularizer(flatten(self.params))
Esempio n. 2
0
    def __init__(self, fin, h1, h2, outputs,
                 lr, C, pDropHidden1=0.2, pDropHidden2=0.5):
        # 超参数
        self.lr = lr
        self.C = C
        self.pDropHidden1 = pDropHidden1
        self.pDropHidden2 = pDropHidden2
        # 所有需要优化的参数放入列表中,分别是连接权重和偏置
        self.params = []
        # 全连接层,需要计算卷积最后一层的神经元个数作为MLP的输入
        self.params.append(layerMLPParams((fin, h1)))
        self.params.append(layerMLPParams((h1, h2)))
        self.params.append(layerMLPParams((h2, outputs)))

        # 定义 Theano 符号变量,并构建 Theano 表达式
        self.X = T.matrix('X')
        self.Y = T.matrix('Y')
        # 训练集代价函数
        YDropProb = model(self.X, self.params, pDropHidden1, pDropHidden2)
        self.trNeqs = basicUtils.neqs(YDropProb, self.Y)
        trCrossEntropy = categorical_crossentropy(YDropProb, self.Y)
        self.trCost = T.mean(trCrossEntropy) + C * basicUtils.regularizer(flatten(self.params))

        # 测试验证集代价函数
        YFullProb = model(self.X, self.params, 0., 0.)
        self.vateNeqs = basicUtils.neqs(YFullProb, self.Y)
        self.YPred = T.argmax(YFullProb, axis=1)
        vateCrossEntropy = categorical_crossentropy(YFullProb, self.Y)
        self.vateCost = T.mean(vateCrossEntropy) + C * basicUtils.regularizer(flatten(self.params))
Esempio n. 3
0
    def __init__(self, fin, f1, nin1, f2, nin2, f3, nin3, expand, h1, outputs,
                 lr, C, pDropConv=0.2, pDropHidden=0.5):
        # 超参数
        self.lr = lr
        self.C = C
        self.pDropConv = pDropConv
        self.pDropHidden = pDropHidden
        # 所有需要优化的参数放入列表中,分别是连接权重和偏置
        self.params = []
        self.paramsNIN = []
        self.paramsConv = []
        # 卷积层,w=(本层特征图个数,上层特征图个数,卷积核行数,卷积核列数),b=(本层特征图个数)
        self.paramsNIN.append(layerNINParams((f1, fin, nin1, 3, 3), expand))
        self.paramsNIN.append(layerNINParams((f2, f1 * expand, nin2, 3, 3), expand))
        self.paramsNIN.append(layerNINParams((f3, f2 * expand, nin3, 3, 3), expand))
        # 全局平均池化层
        self.paramsConv.append(layerConvParams((h1, f3 * expand, 1, 1)))
        self.paramsConv.append(layerConvParams((outputs, h1, 1, 1)))
        self.params = self.paramsNIN + self.paramsConv

        # 定义 Theano 符号变量,并构建 Theano 表达式
        self.X = T.tensor4('X')
        self.Y = T.matrix('Y')
        # 训练集代价函数
        YDropProb = model(self.X, self.params, pDropConv, pDropHidden)
        self.trNeqs = basicUtils.neqs(YDropProb, self.Y)
        trCrossEntropy = categorical_crossentropy(YDropProb, self.Y)
        self.trCost = T.mean(trCrossEntropy) + C * basicUtils.regularizer(flatten(self.params))

        # 测试验证集代价函数
        YFullProb = model(self.X, self.params, 0., 0.)
        self.vateNeqs = basicUtils.neqs(YFullProb, self.Y)
        self.YPred = T.argmax(YFullProb, axis=1)
        vateCrossEntropy = categorical_crossentropy(YFullProb, self.Y)
        self.vateCost = T.mean(vateCrossEntropy) + C * basicUtils.regularizer(flatten(self.params))
Esempio n. 4
0
    def cost(self, targets, mask=None):
        prediction = self.p_y_given_x

        if prediction.ndim == 3:
        # prediction = prediction.dimshuffle(1,2,0).flatten(2).dimshuffle(1,0)
            prediction_flat = prediction.reshape(((prediction.shape[0] *
                                                prediction.shape[1]),
                                                prediction.shape[2]), ndim=2)
            targets_flat = targets.flatten()
            mask_flat = mask.flatten()
            ce = categorical_crossentropy(prediction_flat, targets_flat) * mask_flat
        else:
            ce = categorical_crossentropy(prediction, targets)
        return T.sum(ce)
    def cost(self, p_y_given_x, targets, mask=None):
        prediction = p_y_given_x
        if prediction.ndim == 3:
            prediction_flat = prediction.reshape(((prediction.shape[0] *
                                                   prediction.shape[1]),
                                                  prediction.shape[2]), ndim=2)
            targets_flat = targets.flatten()
            mask_flat = mask.flatten()
            ce = categorical_crossentropy(prediction_flat, targets_flat) * mask_flat

            return T.sum(ce)

        assert mask is None
        ce = categorical_crossentropy(prediction, targets)
        return T.sum(ce)
Esempio n. 6
0
    def cost_entry(self, targets, mask=None):
        prediction = self.p_y_given_x  # (9,5,24)

        if prediction.ndim == 3:
            # prediction = prediction.dimshuffle(1,2,0).flatten(2).dimshuffle(1,0)
            prediction_flat = prediction.reshape(((prediction.shape[0] *
                                                   prediction.shape[1]),
                                                  prediction.shape[2]), ndim=2)  # (45,24)
            targets_flat = targets.flatten()
            mask_flat = mask.flatten()
            ce = categorical_crossentropy(prediction_flat, targets_flat) * mask_flat
        else:
            ce = categorical_crossentropy(prediction, targets)
        ce_entry = ce.reshape((prediction.shape[0], prediction.shape[1]), ndim=2).sum(axis=0)  # (5)
        return ce_entry
Esempio n. 7
0
    def __init__(self, fin, f1, piece1, f2, piece2, f3, piece3, h1, pieceh1, h2, pieceh2, outputs,
                 lr, C, pDropConv=0.2, pDropHidden=0.5):
        # 超参数
        self.lr = lr
        self.C = C
        self.pDropConv = pDropConv
        self.pDropHidden = pDropHidden
        # 所有需要优化的参数放入列表中,分别是连接权重和偏置
        self.params = []
        self.paramsCNN = []
        self.paramsMLP = []
        mapunits = []
        pieces = []
        # 卷积层,w=(本层特征图个数,上层特征图个数,卷积核行数,卷积核列数),b=(本层特征图个数)
        self.paramsCNN.append(layerCNNParams((f1 * piece1, fin, 3, 3)))  # conv: (32, 32) pool: (16, 16)
        mapunits.append(f1)
        pieces.append(piece1)
        self.paramsCNN.append(layerCNNParams((f2 * piece2, f1, 3, 3)))  # conv: (16, 16) pool: (8, 8)
        mapunits.append(f2)
        pieces.append(piece2)
        self.paramsCNN.append(layerCNNParams((f3 * piece3, f2, 3, 3)))  # conv: (8, 8) pool: (4, 4)
        mapunits.append(f3)
        pieces.append(piece3)
        # 全连接层,需要计算卷积最后一层的神经元个数作为MLP的输入
        self.paramsMLP.append(layerMLPParams((f3 * 4 * 4, h1 * pieceh1)))
        mapunits.append(h1)
        pieces.append(pieceh1)
        self.paramsMLP.append(layerMLPParams((h1, h2 * pieceh2)))
        mapunits.append(h2)
        pieces.append(pieceh2)
        self.paramsMLP.append(layerMLPParams((h2, outputs)))
        self.params = self.paramsCNN + self.paramsMLP

        # 定义 Theano 符号变量,并构建 Theano 表达式
        self.X = T.tensor4('X')
        self.Y = T.matrix('Y')
        # 训练集代价函数
        YDropProb = model(self.X, self.params, mapunits, pieces, pDropConv, pDropHidden)
        self.trNeqs = basicUtils.neqs(YDropProb, self.Y)
        trCrossEntropy = categorical_crossentropy(YDropProb, self.Y)
        self.trCost = T.mean(trCrossEntropy) + C * basicUtils.regularizer(flatten(self.params))

        # 测试验证集代价函数
        YFullProb = model(self.X, self.params, mapunits, pieces, 0., 0.)
        self.vateNeqs = basicUtils.neqs(YFullProb, self.Y)
        self.YPred = T.argmax(YFullProb, axis=1)
        vateCrossEntropy = categorical_crossentropy(YFullProb, self.Y)
        self.vateCost = T.mean(vateCrossEntropy) + C * basicUtils.regularizer(flatten(self.params))
Esempio n. 8
0
def categorical_crossentropy_segm(prediction_proba, targets):
    '''
    MODIFICATIONS:
        - reshape from image-size to array and back
    '''
    shape = T.shape(prediction_proba)
    pred_mod1 = T.transpose(prediction_proba, (0,2,3,1))
    pred_mod = T.reshape(pred_mod1, (-1,shape[1]))
    if prediction_proba.ndim == targets.ndim:
        targ_mod1 = T.transpose(targets,(0,2,3,1))
        targ_mod = T.reshape(targ_mod1,(-1,shape[1]))
    else:
        targ_mod = T.reshape(targets, (-1,))
    results = categorical_crossentropy(pred_mod, targ_mod)


    results = T.reshape(results, (shape[0],shape[2],shape[3]))



    # QUICK IMPLEMENTATION FOR TWO SPECIFIC CLASSES. NEEDS GENERALIZATION
    # Weights depending on class occurency:
    weights = (1.02275, 44.9647)
    cars_indx, not_cars_indx = T.nonzero(targets), T.nonzero(T.eq(targets,0))
    T.set_subtensor(results[cars_indx], results[cars_indx]*float32(weights[1]) )
    T.set_subtensor(results[not_cars_indx], results[not_cars_indx]*float32(weights[0]) )


    return T.sum(results, axis=(1,2))
Esempio n. 9
0
def get_expression(model, train, img_in, label_in):
    conv1 = relu(model.conv(img_in, name='conv1', shape=(32, 3, 3, 3, 1, 1)))
    conv2 = relu(model.conv(conv1, name='conv2', shape=(32, 32, 3, 3, 1, 1)))
    pool1 = model.pooling(conv2, name='pool1', shape=(2, 2))
    if train:
        pool1 = drop1.drop(pool1)

    conv3 = relu(model.conv(pool1, name='conv3', shape=(64, 32, 3, 3, 1, 1)))
    conv4 = relu(model.conv(conv3, name='conv4', shape=(64, 64, 3, 3, 1, 1)))
    pool2 = model.pooling(conv4, name='pool2', shape=(2, 2))
    if train:
        pool2 = drop2.drop(pool2)

    pool2 = pool2.reshape((batch_size, -1))
    fc1 = relu(model.fc(pool2, name='fc1', shape=(4096, 512)))
    if train:
        fc1 = drop3.drop(fc1)
    fc2 = softmax(model.fc(fc1, name='fc2', shape=(512, 10)))

    loss = T.mean(NN.categorical_crossentropy(fc2, label_in))

    if train:
        grads = rmsprop(loss,
                        model.get_params(),
                        lr=var_lr,
                        epsilon=var_lr**2,
                        return_norm=False)
        return loss, fc2, grads
    else:
        return loss, fc2
Esempio n. 10
0
def create_train_func(layers, lr=0.01):
    # dims: batch, sequence, vocabulary
    X = T.tensor3('X')
    X_batch = T.tensor3('X_batch')

    # dims: target
    y = T.ivector('y')
    y_batch = T.ivector('y_batch')

    y_hat = get_output(layers['l_out'], X, deterministic=False)

    train_loss = T.mean(categorical_crossentropy(y_hat, y), axis=0)
    params = get_all_params(layers['l_out'], trainable=True)

    updates = adagrad(train_loss, params, lr)

    train_func = theano.function(
        inputs=[theano.In(X_batch), theano.In(y_batch)],
        outputs=train_loss,
        updates=updates,
        givens={
            X: X_batch,
            y: y_batch,
        },
    )

    return train_func
Esempio n. 11
0
 def masked_softmax_cross_entropy(self, preds, labels, mask):
     """Softmax cross-entropy loss with masking."""
     loss = nnet.categorical_crossentropy(preds, labels)
     mask = mask.astype('float32')
     mask /= T.mean(mask)
     loss *= mask
     return T.mean(loss)
def sequence_categorical_crossentropy(prediction, targets, mask):
    prediction_flat = prediction.reshape(
        ((prediction.shape[0] * prediction.shape[1]), prediction.shape[2]),
        ndim=2)
    targets_flat = targets.flatten()
    mask_flat = mask.flatten()
    ce = categorical_crossentropy(prediction_flat, targets_flat)
    return T.sum(ce * mask_flat)
def sequence_categorical_crossentropy(prediction, targets, mask):
    prediction_flat = prediction.reshape(((prediction.shape[0] *
                                           prediction.shape[1]),
                                          prediction.shape[2]), ndim=2)
    targets_flat = targets.flatten()
    mask_flat = mask.flatten()
    ce = categorical_crossentropy(prediction_flat, targets_flat)
    return T.sum(ce * mask_flat)
Esempio n. 14
0
    def __init__(self, fin, f1, f2, f3, f4, f5, f6, h1, h2, outputs,
                 lr, C, pDropConv=0.2, pDropHidden=0.5, batchSize=128):
        # 超参数
        self.lr = lr
        self.C = C
        self.pDropConv = pDropConv
        self.pDropHidden = pDropHidden
        self.batchSize = batchSize
        # 所有需要优化的参数放入列表中,分别是连接权重和偏置
        self.params = []
        self.paramsCNN = []
        self.paramsMLP = []
        self.indices = []
        # 卷积层,w=(本层特征图个数,上层特征图个数,卷积核行数,卷积核列数),b=(本层特征图个数)
        inputShape = (batchSize, fin, 32, 32)
        layerShape = addConvLayer(inputShape, (f1, fin, 3, 3),
                                  self.paramsCNN, self.indices, 'half', (1, 1))
        layerShape = addPoolLayer(layerShape, (2, 2), 'valid')
        layerShape = addConvLayer(layerShape, (f2, f1, 3, 3),
                                  self.paramsCNN, self.indices, 'half', (1, 1))
        layerShape = addPoolLayer(layerShape, (2, 2), 'valid')
        layerShape = addConvLayer(layerShape, (f3, f2, 3, 3),
                                  self.paramsCNN, self.indices, 'half', (1, 1))
        layerShape = addPoolLayer(layerShape, (2, 2), 'valid')
        # 全连接层,需要计算卷积最后一层的神经元个数作为MLP的输入
        self.paramsMLP.append(layerMLPParams((f3 * np.prod(layerShape[-2:]), h1)))
        self.paramsMLP.append(layerMLPParams((h1, h2)))
        self.paramsMLP.append(layerMLPParams((h2, outputs)))
        self.params = self.paramsCNN + self.paramsMLP

        # 定义 Theano 符号变量,并构建 Theano 表达式
        self.X = T.tensor4('X')
        self.Y = T.matrix('Y')
        # 训练集代价函数
        YDropProb = model(self.X, self.params, self.indices, pDropConv, pDropHidden)
        self.trNeqs = basicUtils.neqs(YDropProb, self.Y)
        trCrossEntropy = categorical_crossentropy(YDropProb, self.Y)
        self.trCost = T.mean(trCrossEntropy) + C * basicUtils.regularizer(flatten(self.params))

        # 测试验证集代价函数
        YFullProb = model(self.X, self.params, self.indices, 0., 0.)
        self.vateNeqs = basicUtils.neqs(YFullProb, self.Y)
        self.YPred = T.argmax(YFullProb, axis=1)
        vateCrossEntropy = categorical_crossentropy(YFullProb, self.Y)
        self.vateCost = T.mean(vateCrossEntropy) + C * basicUtils.regularizer(flatten(self.params))
Esempio n. 15
0
def cross_entropy(yhat, y):
  last_dim_len = y.shape[-1]
  if y.ndim == yhat.ndim:
    #y is one-hot
    yhat = T.reshape(-1, last_dim_len)
    y = T.reshape(-1, last_dim_len)
  elif y.ndim == yhat.ndim + 1:
    yhat = T.reshape(-1, last_dim_len)
    y = T.flatten(y)
  return T.mean(nnet.categorical_crossentropy(yhatt, yt))
Esempio n. 16
0
def cross_entropy(yhat, y):
    last_dim_len = y.shape[-1]
    if y.ndim == yhat.ndim:
        #y is one-hot
        yhat = T.reshape(-1, last_dim_len)
        y = T.reshape(-1, last_dim_len)
    elif y.ndim == yhat.ndim + 1:
        yhat = T.reshape(-1, last_dim_len)
        y = T.flatten(y)
    return T.mean(nnet.categorical_crossentropy(yhatt, yt))
Esempio n. 17
0
    def __init__(self, fin, f1, nin1, f2, nin2, f3, nin3, h1, outputs,
                 lr, C, pDropConv=0.2, pDropHidden=0.5):
        # 超参数
        self.lr = lr
        self.C = C
        self.pDropConv = pDropConv
        self.pDropHidden = pDropHidden
        # 所有需要优化的参数放入列表中,分别是连接权重和偏置
        self.params = []
        self.paramsNIN = []
        self.paramsFCorConv = []
        # 卷积层,w=(本层特征图个数,上层特征图个数,卷积核行数,卷积核列数),b=(本层特征图个数)
        inputShape = (32, 32)
        layerShape = addNINLayer(inputShape, (f1, fin, nin1, 3, 3), self.paramsNIN, 'half')
        layerShape = addPoolLayer(layerShape, (2, 2))
        layerShape = addNINLayer(layerShape, (f2, f1, nin2, 3, 3), self.paramsNIN, 'half')
        layerShape = addPoolLayer(layerShape, (2, 2))
        layerShape = addNINLayer(layerShape, (f3, f2, nin3, 3, 3), self.paramsNIN, 'half')
        layerShape = addPoolLayer(layerShape, (2, 2))
        # 全连接层,需要计算卷积最后一层的神经元个数作为MLP的输入
        # self.paramsFCorGAP.append(layerMLPParams((f3 * np.prod(layerShape), h1)))
        # self.paramsFCorGAP.append(layerMLPParams((h1, outputs)))
        # 全局平均池化层
        self.paramsFCorConv.append(layerConvParams((h1, f3, 1, 1)))
        self.paramsFCorConv.append(layerConvParams((outputs, h1, 1, 1)))
        self.params = self.paramsNIN + self.paramsFCorConv

        # 定义 Theano 符号变量,并构建 Theano 表达式
        self.X = T.tensor4('X')
        self.Y = T.matrix('Y')
        # 训练集代价函数
        YDropProb = model(self.X, self.params, pDropConv, pDropHidden)
        self.trNeqs = basicUtils.neqs(YDropProb, self.Y)
        trCrossEntropy = categorical_crossentropy(YDropProb, self.Y)
        self.trCost = T.mean(trCrossEntropy) + C * basicUtils.regularizer(flatten(self.params))

        # 测试验证集代价函数
        YFullProb = model(self.X, self.params, 0., 0.)
        self.vateNeqs = basicUtils.neqs(YFullProb, self.Y)
        self.YPred = T.argmax(YFullProb, axis=1)
        vateCrossEntropy = categorical_crossentropy(YFullProb, self.Y)
        self.vateCost = T.mean(vateCrossEntropy) + C * basicUtils.regularizer(flatten(self.params))
    def get_loss(self):
        """
        The mean of the categorical cross-entropy tensor.

        Returns
        -------
        theano expression
            The loss function.
        """
        input = self.inputs[0]
        target = self.targets[0]
        return mean(nnet.categorical_crossentropy(input, target))
    def get_loss(self):
        """
        The mean of the categorical cross-entropy tensor.

        Returns
        -------
        theano expression
            The loss function.
        """
        input = self.inputs[0]
        target = self.targets[0]
        return mean(nnet.categorical_crossentropy(input, target))
Esempio n. 20
0
    def __init__(self, n_inputs, n_hidden, n_outputs, **kwargs):
        property_defaults = {
            'epochs': 1000,
            'print_every': 100,
            'reg': ..001,
            'alpha': .01,
            'batch': 0,
            'noise_scale': 1.0,
            'nonlin': T.nnet.relu
        }
        for (prop, default) in property_defaults.items():
            setattr(self, prop, kwargs.get(prop, default))

        self.arch = [n_inputs] + n_hidden + [n_outputs]
        final = len(self.arch) - 1  # the true "number of layers"

        self.X = T.dmatrix('X')
        self.y = T.dmatrix('y') # one-hot outputs

        # Construct layers
        layer_outputs,self.parameters,weights = [self.X],[],[]
        for index, layer in enumerate(n_hidden+[n_outputs]):
            nonlin = T.nnet.softmax if index == final-1 else self.nonlin
            layer = Layer(n_inputs=self.arch[index],
                          n_nodes=self.arch[index+1],
                          inputs=layer_outputs[index],
                          layer=index+1,
                          noise_scale=self.noise_scale,
                          nonlin=nonlin)
            layer_outputs.append(layer.output)
            self.parameters.extend([layer.W,layer.b])
            weights.append(layer.W)

        # Expressions for building theano functions
        output = layer_outputs[-1]
        prediction = np.argmax(output,axis=1)
        crossentropy = categorical_crossentropy(output,self.y).mean()
        regularization = self.reg * sum([(W**2).sum() for W in weights])
        cost = crossentropy + regularization

        # gradients
        grads = T.grad(cost,self.parameters)
        updates = [(p,p - self.alpha*g) for p,g in zip(self.parameters,
                                                       grads)]

        # build theano functions for gradient descent and model tuning
        self.epoch = theano.function(inputs = [self.X,self.y],
                                     outputs = [],
                                     updates = updates)
        self.count_cost = theano.function(inputs = [self.X,self.y],
                                          outputs = cost)
        self.predict = theano.function(inputs=[self.X],
                                       outputs=prediction)
Esempio n. 21
0
def create_train_func(layers, rnn):

    if rnn == "LSTM":
        import model.LSTM
        rnn = model.LSTM
    elif rnn == "GRU":
        import model.GRU
        rnn = model.GRU
    elif rnn == "Recurrent":
        import model.Recurrent
        rnn = model.Recurrent

    # dims: batch, sequence, vocabulary
    X = T.tensor3('X')
    X_batch = T.tensor3('X_batch')

    # dims: target
    y = T.ivector('y')
    y_batch = T.ivector('y_batch')

    y_hat = lasagne.layers.get_output(layers['l_out'], X, deterministic=True)

    train_loss = T.mean(categorical_crossentropy(y_hat, y), axis=0)

    # define lr
    lr = T.scalar(name='lr')
    # ML: if quantized, W updates. Canot work
    W = lasagne.layers.get_all_params(layers['l_out'], binary=True)
    W_grads = rnn.compute_rnn_grads(train_loss, layers['l_out'])
    updates = lasagne.updates.adam(
        loss_or_grads=W_grads, params=W,
        learning_rate=lr)  # ML: the default upgrade mode is ada
    # ML: lack of cliping

    params = lasagne.layers.get_all_params(layers['l_out'],
                                           trainable=True,
                                           binary=False)
    updates = OrderedDict(updates.items() + lasagne.updates.adam(
        loss_or_grads=train_loss, params=params, learning_rate=lr).items())
    '''params = lasagne.layers.get_all_params(layers['l_out'], trainable=True)
    updates = lasagne.updates.adagrad(train_loss, params, lr)'''

    train_func = theano.function(
        inputs=[theano.In(X_batch), theano.In(y_batch), lr],
        outputs=train_loss,
        updates=updates,
        givens={
            X: X_batch,
            y: y_batch,
        },
    )

    return train_func
Esempio n. 22
0
 def forward_pass(self, sentence, label):
     """
     Given sentence, forward pass
     """
     inpt_tree = self.mgr.get_tree(sentence)
     golden = label
     one_hot_golden = np.ones(shape=(self.num_classes, 1)) * 1e-9
     one_hot_golden[golden] = 1
     stack = self.mgr.get_tree_stack(sentence)
     node_hidden = [np.zeros(shape=self.mem_dim)] * (len(stack) + 1)
     node_c = [np.zeros(shape=self.mem_dim)] * (len(stack) + 1)
     #level-order traversal
     for node in stack:
         if node.is_leaf():
             # print(node.word)
             x = self.mgr.get_glove_vec(node.word)
             node_c[node.idx] = self.leaf_i(x) * self.leaf_u(x)
             node_hidden[node.idx] = node_c[node.idx] * self.get_tanh(
                 node_c[node.idx])
             # node_hidden[node.idx] = self.leaf_o(x) * self.outer_activation(node_c[node.idx])
             # node_hidden[node.idx] = self.leaf_o(node_c[node.idx])
             # print(node_c[node.idx])
             # print(node_hidden[node.idx])
         else:
             child_l, child_r = node.get_child()
             node_c[node.idx] = (
                 (self.composer_i(node_hidden[child_r.idx],
                                  node_hidden[child_l.idx]) *
                  self.composer_u(node_hidden[child_r.idx],
                                  node_hidden[child_l.idx])) +
                 (self.composer_f(node_hidden[child_r.idx],
                                  node_hidden[child_l.idx]) *
                  self.combine_c(node_c[child_r.idx], node_c[child_l.idx])))
             node_hidden[node.idx] = (self.composer_o(
                 node_hidden[child_r.idx], node_hidden[child_l.idx]) *
                                      self.get_tanh(node_c[node.idx]))
             # node_c[node.idx]=((self.composer_i(node_c[child_r.idx],node_c[child_l.idx])*
             # self.composer_u(node_c[child_r.idx],node_c[child_l.idx]))+
             # (self.composer_f(node_c[child_r.idx],node_c[child_l.idx]) *
             # self.combine_c(node_c[child_r.idx],node_c[child_l.idx])))
             # node_hidden[node.idx]=(self.composer_o(node_c[child_r.idx],node_c[child_l.idx]))
     #apply softmax
     pred = self.softmax(node_hidden[inpt_tree.root.idx])
     # print("pred:{} \n golden:{}".format(pred,one_hot_golden))
     # pred = self.softmax(node_c[inpt_tree.root.idx])
     self.error = categorical_crossentropy(one_hot_golden,
                                           pred) + self.param_error
     # print(self.error)
     return self.error, pred
Esempio n. 23
0
def seq_cat_crossent(pred, targ, mask, normalize=False):
    # dim 0 is time, dim 1 is batch, dim 2 is category
    pred_flat = pred.reshape(((pred.shape[0] * pred.shape[1]), pred.shape[2]), ndim=2)
    targ_flat = targ.flatten()
    mask_flat = mask.flatten()
    ce = categorical_crossentropy(pred_flat, targ_flat)
    # normalize by batch size and seq length
    cost = T.sum(ce * mask_flat)
    if normalize:
        # normalize by batch and length
        cost = cost / T.sum(mask_flat)
    else:
        # just normalize by batch size
        cost = cost / pred.shape[1]
    return cost
Esempio n. 24
0
def cross_entropy(obj):
    obj.out = T.clip(obj.out, _EPSILON, 1.0 - _EPSILON)
    obj.loss = nnet.categorical_crossentropy(obj.out, obj.y).mean()

    # one-hot to serial
    obj.out = obj.out.argmax(axis=1)[:, None]
    obj.y = obj.y.argmax(axis=1)[:, None]

    # classification accuracy
    obj.train_acc = (T.eq(obj.out, obj.y).sum().astype(theano.config.floatX) /
                     obj.n_batch)
    obj.valid_acc = (T.eq(obj.out, obj.y).sum().astype(theano.config.floatX) /
                     obj.x_test_arr.shape[0])

    return obj
Esempio n. 25
0
    def __call__(self, prediction_proba, targets):
        if not self.boosting_weights:
            raise ValueError("Boosting residuals not set up")

        shape = T.shape(prediction_proba)
        pred_mod1 = T.transpose(prediction_proba, (0,2,3,1))
        pred_mod = T.reshape(pred_mod1, (-1,shape[1]))
        if prediction_proba.ndim == targets.ndim:
            targ_mod1 = T.transpose(targets,(0,2,3,1))
            targ_mod = T.reshape(targ_mod1,(-1,shape[1]))
        else:
            targ_mod = T.reshape(targets, (-1,))
        results = categorical_crossentropy(pred_mod, targ_mod)
        results *= self.boosting_weights[:results.shape[0]]
        results = T.reshape(results, (shape[0],shape[2],shape[3]))
        return T.sum(results, axis=(1,2))
Esempio n. 26
0
def seq_cat_crossent(pred, targ, mask, normalize=False):
    # dim 0 is time, dim 1 is batch, dim 2 is category
    pred_flat = pred.reshape(((pred.shape[0] * pred.shape[1]), pred.shape[2]),
                             ndim=2)
    targ_flat = targ.flatten()
    mask_flat = mask.flatten()
    ce = categorical_crossentropy(pred_flat, targ_flat)
    # normalize by batch size and seq length
    cost = T.sum(ce * mask_flat)
    if normalize:
        # normalize by batch and length
        cost = cost / T.sum(mask_flat)
    else:
        # just normalize by batch size
        cost = cost / pred.shape[1]
    return cost
Esempio n. 27
0
 def __init__(self, filterShape, C):
     self.C = C
     filterRand = myUtils.elm.convfilterinit(filterShape)
     self.filterShared = theano.shared(floatX(filterRand), borrow=True)
     self.X = T.ftensor4()
     self.Y = T.fmatrix()
     self.forwardout = self._forward()
     self.forwardfn = theano.function([self.X], self.forwardout, allow_input_downcast=True)
     self.sharedBeta = theano.shared(floatX(np.zeros((5760, 10))), borrow=True)
     predictout = self.forwardout.dot(self.sharedBeta)
     predictout = softmax(predictout)  # 输出必须有softmax限制在0和1之间
     self.predictfn = theano.function([self.X], predictout, allow_input_downcast=True)
     crossentropy = categorical_crossentropy(predictout, self.Y)
     cost = T.mean(crossentropy) + basicUtils.regularizer([self.filterShared])
     updates = gradient.sgdm(cost, [self.filterShared])
     self.trainfn = theano.function([self.X, self.Y], cost, updates=updates, allow_input_downcast=True)
Esempio n. 28
0
def test_asymptotic_32():
    """
    This test makes sure that our functions behave sensibly when huge values are present
    """

    #TODO: consider adding the optimization of crossentropy into the current mode for the
    # purpose of running this test

    for dtype in 'float32', 'float64':
        if dtype == 'float32':
            x = tensor.fmatrix()
            x2 = tensor.fvector()
        else:
            x = tensor.dmatrix()
            x2 = tensor.dvector()
        y = tensor.lvector()

        c = categorical_crossentropy(softmax(x + x2), y)
        f = theano.function([x, y, x2],
                            [c.sum(), tensor.grad(c.sum(), x)],
                            mode='FAST_RUN')
        if 0:
            for i, n in enumerate(f.maker.env.toposort()):
                print i, n

        xval = numpy.zeros((5, 5), dtype=dtype)
        x2val = numpy.zeros(5, dtype=xval.dtype)
        for i in xrange(100):

            cval, gxval = f(xval, numpy.arange(5), x2val)
            xval -= 100.3 * gxval
            #print cval, gxval
        assert cval == 0  # no problem going to zero error

        #what about when x gets really big?

        xval = numpy.zeros((5, 5), dtype=dtype)
        x2val = numpy.zeros(5, dtype=xval.dtype)
        for i in xrange(100):

            cval, gxval = f(xval, numpy.arange(5), x2val)
            xval += 100000.3 * gxval
            #print cval, gxval

        assert cval > 61750000
        assert gxval[0, 0] == -1.0
        assert gxval[0, 1] == 0.25
Esempio n. 29
0
def test_asymptotic_32():
    """
    This test makes sure that our functions behave sensibly when
    huge values are present
    """

    #TODO: consider adding the optimization of crossentropy into the current
    # mode for the purpose of running this test

    for dtype in 'float32', 'float64':
        if dtype == 'float32':
            x = tensor.fmatrix()
            x2 = tensor.fvector()
        else:
            x = tensor.dmatrix()
            x2 = tensor.dvector()
        y = tensor.lvector()

        c = categorical_crossentropy(softmax(x + x2), y)
        f = theano.function([x, y, x2], [c.sum(),
                            tensor.grad(c.sum(), x)], mode='FAST_RUN')
        if 0:
            for i, n in enumerate(f.maker.fgraph.toposort()):
                print i, n

        xval = numpy.zeros((5, 5), dtype=dtype).astype(dtype)
        x2val = numpy.zeros(5, dtype=xval.dtype).astype(dtype)
        for i in xrange(100):
            cval, gxval = f(xval, numpy.arange(5), x2val)
            xval -= 100.3 * gxval
            #print cval, gxval
        assert cval == 0  # no problem going to zero error

        #what about when x gets really big?

        xval = numpy.zeros((5, 5), dtype=dtype)
        x2val = numpy.zeros(5, dtype=xval.dtype)
        for i in xrange(100):

            cval, gxval = f(xval, numpy.arange(5), x2val)
            xval += 100000.3 * gxval
            #print cval, gxval

        assert cval > 61750000
        assert gxval[0, 0] == -1.0
        assert gxval[0, 1] == 0.25
Esempio n. 30
0
def test11():
    x = T.vector("x")
    x2 = T.matrix("x2")
    y = T.ivector("y")
    #z = T.vector("z")
    #z = T.nnet.softmax(x)
    z2 = categorical_crossentropy(x2, y)
    #fn = theano.function(inputs=[x], outputs=[z])
    fn2 = theano.function(inputs=[x2, y], outputs=[z2])
    x_in = [1, 2, 3, 4]
    x2_in = [
                [1,2,3],
                [1,2,3],
                [1,2,3],
                [1,2,3]
            ]
    y_in = [1, 0, 1, 0]
    #print fn(x_in)
    print fn2(x2_in, y_in)
Esempio n. 31
0
def create_vali_func(layers):
    # dims: batch, sequence, vocabulary
    X = T.tensor3('X')
    X_batch = T.tensor3('X_batch')

    # dims: target
    y = T.ivector('y')
    y_batch = T.ivector('y_batch')

    y_hat = lasagne.layers.get_output(layers['l_out'], X, deterministic=True)

    vali_loss = T.mean(categorical_crossentropy(y_hat, y), axis=0)

    vali_func = theano.function(
        inputs=[theano.In(X_batch), theano.In(y_batch)],
        outputs=vali_loss,
        updates=None,
        givens={
            X: X_batch,
            y: y_batch,
        },
    )

    return vali_func
Esempio n. 32
0
params.append([w31, w32, b31, b32])
# 全局平均池化
wgap1 = initial.weightInitCNN3((h1, f3 * expand, 1, 1), 'wgap')
bgap1 = initial.biasInit((h1,), 'bgap')
params.append([wgap1, bgap1])
wgap2 = initial.weightInitCNN3((outputs, h1, 1, 1), 'wgap')
bgap2 = initial.biasInit((outputs,), 'bgap')
params.append([wgap2, bgap2])

# 定义 Theano 符号变量,并构建 Theano 表达式
X = T.tensor4('X')
Y = T.matrix('Y')
# 训练集代价函数
YDropProb = model(X, params, 0.2, 0.5)
trNeqs = basicUtils.neqs(YDropProb, Y)
trCrossEntropy = categorical_crossentropy(YDropProb, Y)
trCost = T.mean(trCrossEntropy) + C * basicUtils.regularizer(flatten(params))

# 测试验证集代价函数
YFullProb = model(X, params, 0., 0.)
vateNeqs = basicUtils.neqs(YFullProb, Y)
YPred = T.argmax(YFullProb, axis=1)
vateCrossEntropy = categorical_crossentropy(YFullProb, Y)
vateCost = T.mean(vateCrossEntropy) + C * basicUtils.regularizer(flatten(params))
updates = gradient.sgdm(trCost, flatten(params), lr, nesterov=True)
train = function(
    inputs=[X, Y],
    outputs=[trCost, trNeqs],  # 减少返回参数节省时间
    updates=updates,
    allow_input_downcast=True
)
Esempio n. 33
0
lasagne.layers.set_all_param_values(net['prob'], model['param values'])

googlenet_features = lasagne.layers.get_output(net['pool5/7x7_s1'], X)

# add a mlp on top of this
W = theano.shared(
    numpy.random.uniform(low=-0.1, high=0.1,
                         size=(1024, 10)).astype(numpy.float32),
    'linear_weights')
b = theano.shared(numpy.zeros(10).astype(numpy.float32))
all_parameters = [W, b]

output = tensor.dot(googlenet_features, W) + b
pred = tensor.nnet.softmax(output)

loss = categorical_crossentropy(pred, targets).mean()
loss.name = 'loss'

loss_test = categorical_crossentropy(pred, targets).mean()
loss.name = 'loss_test'

error = tensor.neq(tensor.argmax(pred, axis=1), tensor.argmax(targets,
                                                              axis=1)).mean()
error.name = 'error'

error_test = tensor.neq(tensor.argmax(pred, axis=1),
                        tensor.argmax(targets, axis=1)).mean()
error.name = 'error_test'

# construct update rule
learning_rate = 0.01
Esempio n. 34
0
 def cost_validation(self,net):
 	return T.mean(categorical_crossentropy(self.output,net.y))
Esempio n. 35
0
train_stream = RandomHorizontalFlip(train_stream, which_sources=('features', ))

test_dataset = CIFAR10(('train', ), subset=slice_test)
test_stream = DataStream.default_stream(test_dataset,
                                        iteration_scheme=SequentialScheme(
                                            test_dataset.num_examples,
                                            batch_size))
test_stream = OneHotEncode(test_stream, which_sources=('targets', ))

X = tensor.ftensor4('features')
targets = tensor.fmatrix('targets')

output, output_test, all_parameters, acc_parameters = get_model(
    X, batch_size, (32, 32))

loss = categorical_crossentropy(output[:, :, 0, 0], targets).mean()
loss.name = 'loss'

loss_test = categorical_crossentropy(output_test[:, :, 0, 0], targets).mean()
loss.name = 'loss_test'

error = tensor.neq(tensor.argmax(output[:, :, 0, 0], axis=1),
                   tensor.argmax(targets, axis=1)).mean()
error.name = 'error'

error_test = tensor.neq(tensor.argmax(output_test[:, :, 0, 0], axis=1),
                        tensor.argmax(targets, axis=1)).mean()
error.name = 'error_test'

# construct update rule
learning_rate = 0.1
Esempio n. 36
0
 def cost_validation(self, net):
     return T.mean(categorical_crossentropy(self.output, net.y))
Esempio n. 37
0
    def __init__(self, i_size, h_size, o_size, weights=None):
        if not weights:
            self.W_xi = _init_weights((i_size, h_size))
            self.W_hi = _init_weights((h_size, h_size))
            self.W_ci = _init_weights((h_size, h_size))
            self.b_i = _init_zero_vec(h_size)

            self.W_xf = _init_weights((i_size, h_size))
            self.W_hf = _init_weights((h_size, h_size))
            self.W_cf = _init_weights((h_size, h_size))
            self.b_f = _init_zero_vec(h_size)

            self.W_xc = _init_weights((i_size, h_size))
            self.W_hc = _init_weights((h_size, h_size))
            self.b_c = _init_zero_vec(h_size)

            self.W_xo = _init_weights((i_size, h_size))
            self.W_ho = _init_weights((h_size, h_size))
            self.W_co = _init_weights((h_size, h_size))
            self.b_o = _init_zero_vec(h_size)

            self.W_hy = _init_weights((h_size, o_size))
            self.b_y = _init_zero_vec(o_size)
        else:
            self.W_xi = weights['W_xi']
            self.W_hi = weights['W_hi']
            self.W_ci = weights['W_ci']
            self.b_i = weights['b_i']

            self.W_xf = weights['W_xf']
            self.W_hf = weights['W_hf']
            self.W_cf = weights['W_cf']
            self.b_f = weights['b_f']

            self.W_xc = weights['W_xc']
            self.W_hc = weights['W_hc']
            self.b_c = weights['b_c']

            self.W_xo = weights['W_xo']
            self.W_ho = weights['W_ho']
            self.W_co = weights['W_co']
            self.b_o = weights['b_o']

            self.W_hy = weights['W_hy']
            self.b_y = weights['b_y']

        S_h = _init_zero_vec(h_size)  # init values for hidden units
        S_c = _init_zero_vec(h_size)  # init values for cell units

        S_x = T.matrix()  # inputs
        Y = T.matrix()  # targets

        (S_h_r, S_c_r, S_y_r), _ = theano.scan(
            fn=_step,
            sequences=S_x,
            outputs_info=[S_h, S_c, None],
            non_sequences=[
                self.W_xi, self.W_hi, self.W_ci, self.b_i, self.W_xf,
                self.W_hf, self.W_cf, self.b_f, self.W_xc, self.W_hc, self.b_c,
                self.W_xo, self.W_ho, self.W_co, self.b_o, self.W_hy, self.b_y
            ])

        cost = T.mean(categorical_crossentropy(softmax(S_y_r), Y))

        updates = _gradient_descent(cost, [
            self.W_xi, self.W_hi, self.W_ci, self.b_i, self.W_xf, self.W_hf,
            self.W_cf, self.b_f, self.W_xc, self.W_hc, self.b_c, self.W_xo,
            self.W_ho, self.W_co, self.b_o, self.W_hy, self.b_y
        ])

        self.train = theano.function(inputs=[S_x, Y],
                                     outputs=cost,
                                     updates=updates,
                                     allow_input_downcast=True)

        self.predict = theano.function(inputs=[S_x],
                                       outputs=S_y_r,
                                       allow_input_downcast=True)

        S_h_v = T.vector()
        S_c_v = T.vector()

        S_h_s, S_c_s, S_y_s = _step(S_x, S_h_v, S_c_v, self.W_xi, self.W_hi,
                                    self.W_ci, self.b_i, self.W_xf, self.W_hf,
                                    self.W_cf, self.b_f, self.W_xc, self.W_hc,
                                    self.b_c, self.W_xo, self.W_ho, self.W_co,
                                    self.b_o, self.W_hy, self.b_y)

        self.sampling = theano.function(inputs=[S_x, S_h_v, S_c_v],
                                        outputs=[S_h_s, S_c_s, S_y_s],
                                        allow_input_downcast=True)
Esempio n. 38
0
train_stream = OneHotEncode(train_stream, which_sources=('targets',))
train_stream = RandomHorizontalFlip(train_stream, which_sources=('features',))

test_dataset = CIFAR10(('train',), subset=slice_test)
test_stream = DataStream.default_stream(
    test_dataset,
    iteration_scheme=SequentialScheme(test_dataset.num_examples, batch_size)
)
test_stream = OneHotEncode(test_stream, which_sources=('targets',))

X = tensor.ftensor4('features')
targets = tensor.fmatrix('targets')

output, output_test, all_parameters, acc_parameters = get_model(X, batch_size, (32, 32))

loss = categorical_crossentropy(output[:,:,0,0], targets).mean()
loss.name = 'loss'

loss_test = categorical_crossentropy(output_test[:,:,0,0], targets).mean()
loss.name = 'loss_test'

error = tensor.neq(tensor.argmax(output[:,:,0,0], axis=1), tensor.argmax(targets, axis=1)).mean()
error.name = 'error'

error_test = tensor.neq(tensor.argmax(output_test[:,:,0,0], axis=1), tensor.argmax(targets, axis=1)).mean()
error.name = 'error_test'

# construct update rule
learning_rate = 0.1
updates, updates_stats = [], []
for param in all_parameters:
Esempio n. 39
0
 def _cost(target_seq, pred_seq):
     pred_seq = tensor.clip(pred_seq, EPS, 1.0 - EPS)
     cce = categorical_crossentropy(
         coding_dist=pred_seq,
         true_dist=target_seq).mean(axis=0).mean(axis=0)
     return cce
Esempio n. 40
0
    train_dataset_100,
    iteration_scheme=SequentialScheme(train_dataset_100.num_examples, batch_size)
)
train_stream_100 = OneHotEncode100(train_stream_100, which_sources=('fine_labels',))

lr_cifar100 = learning_rate# * num_train_example/num_train_cifar100

## build computational graph
X = tensor.ftensor4('features')
targets = tensor.fmatrix('targets')
targets_100 = tensor.fmatrix('fine_labels')

output_10, output_test_10, output_100, output_test_100, all_parameters, acc_parameters = get_model(X, batch_size, (32, 32))


loss = alpha * categorical_crossentropy(output_10[:,:,0,0], targets).mean()
loss.name = 'loss'

loss_100 = (1-alpha) * categorical_crossentropy(output_100[:,:,0,0], targets_100).mean()
loss_100.name = 'loss_100'

loss_test = categorical_crossentropy(output_test_10[:,:,0,0], targets).mean()
loss.name = 'loss_test'

loss_100_test = categorical_crossentropy(output_test_100[:,:,0,0], targets_100).mean()
loss_100_test.name = 'loss_100_test'

error = tensor.neq(tensor.argmax(output_10[:,:,0,0], axis=1), tensor.argmax(targets, axis=1)).mean()
error.name = 'error'

error_test = tensor.neq(tensor.argmax(output_test_10[:,:,0,0], axis=1), tensor.argmax(targets, axis=1)).mean()
Esempio n. 41
0
y = T.imatrix('y')
index = T.lscalar()  # index to a [mini]batch
layer0_input = x.reshape((batch_size, 1, img_rows, img_cols))
layer0 = ConvPoolLayer(rng,
                       input=layer0_input,
                       image_shape=(batch_size, 1, img_rows, img_cols),
                       filter_shape=(32, 1, 3, 3),
                       poolsize=(2, 2))
layer1_input = layer0.output.flatten(2)
layer1 = HiddenLayer(rng,
                     input=layer1_input,
                     n_in=32 * 13 * 13,
                     n_out=num_classes,
                     activation=softmax)

cost = T.mean(categorical_crossentropy(layer1.output, y))
acc = T.mean(T.eq(T.argmax(layer1.output, axis=1), T.argmax(y, axis=1)))
params = layer1.params + layer0.params
grads = T.grad(cost, params)
updates = [(param_i, param_i - lr * grad_i)
           for param_i, grad_i in zip(params, grads)]
train_model = theano.function(
    [index], [cost, acc],
    updates=updates,
    givens={
        x: train_x[index * batch_size:(index + 1) * batch_size],
        y: train_y[index * batch_size:(index + 1) * batch_size]
    })
test_model = theano.function(
    [index], [cost, acc],
    givens={
Esempio n. 42
0
 def cost(self, net):
     "Return the cross entropy cost function"
     return T.mean(categorical_crossentropy(self.output_dropout,net.y))
Esempio n. 43
0
    def apply(self, facts, facts_mask, question, question_mask, y):
        """
        layout: (10, 5) (10, 5) (13, 1) (13, 1) (1,)
        return: answer, cost
        """

        table = lookup_table(self.n_in, self.vocab_size)
        self.params += table.params

        memory = Memory(facts, facts_mask, self.vocab_size,
                                     self.n_in, self.n_grus, table=table)
        quest = Question(question, question_mask, self.vocab_size,
                                         self.n_in, self.n_grus, table=table)

        self.params += memory.params
        self.params += quest.params

        self.exct_net = Executor(self.n_qf, self.n_hts, self.n_label)
        self.params += self.exct_net.params

        self.loc_net = LocationNet(n_hids=self.n_lhids,n_layers=1,n_in=self.n_qf+self.n_hts)
        self.params += self.loc_net.params



        #init operations
        # mem = memory.output #Fact Memory (5,n_grus=4)
        que = quest.output #(1,n_grus=4)

        l_idx = 0
        htm1 = None

        stops_dist = []
        answers_dist = []
        lts_dist = []
        stops = []
        answers = []
        lts = []
        rewards = []
        end_t = self.T-1

        for t in xrange(self.T):
            sf, _ = memory.read(l_idx) #(1,n_grus=4)
            qf = T.concatenate([que, sf], axis = 1) #layout: (1, 2*n_grus=8)
            ht, stop_dist, answer_dist = self.exct_net.step_forward(qf, htm1, init_flag=(t==0))
            htm1 = ht
            lt_dist = self.loc_net.apply(que, ht, memory)
            l_idx = T.argmax(lt_dist) #hard attention
            #htm1, stop, answer, l_idx = _step(memory, l_idx, que, htm1)
            answer = T.argmax(answer_dist)

            #TODO: implement a real sampling
            terminal = self._terminate(stop_dist[0,0])
            end_t = T.switch(terminal, T.minimum(t, end_t), end_t)
            reward = self.env.step(answer, terminal, y, t, end_t)

            stops_dist.append(stop_dist)
            answers_dist.append(answer_dist)
            lts_dist.append(lt_dist)
            stops.append(terminal)
            answers.append(answer)
            lts.append(l_idx)
            rewards.append(reward)


        stops_dist = T.concatenate(stops_dist,axis=0)#ndim=2
        answers_dist = T.concatenate(answers_dist,axis=0)#ndim=2
        lts_dist = T.concatenate(lts_dist,axis=0)#ndim=2
        stops = T.stack(stops,axis=0)#ndim=1
        answers = T.stack(answers,axis=0)#ndim=1
        lts = T.stack(lts,axis=0)#ndim=1
        rewards = T.stack(rewards,axis=0)#ndim=1
        # rewards = theano.printing.Print('226 line reward:')(rewards)
        # stops = theano.printing.Print('227 line reward:')(stops)

        returns=[]
        for idx in xrange(self.T):
            returns.append(T.sum(rewards[idx:]))
        returns = T.stack(returns, axis=0)  # ndim=1
        returns = theano.printing.Print('233 line returns:')(returns)

        self.decoder_cost = memory.cost + quest.cost
        # answer_dist = theano.printing.Print('230 line answer_dist:')(answer_dist)
        y = theano.tensor.extra_ops.to_one_hot(y,answer_dist.shape[1])
        # y = theano.printing.Print('231 line y:')(y)
        # answers = theano.printing.Print('233 line answers:')(answers)
        # TODO: Now, final answer can't simply select the last one!
        self.sl_cost = T.mean(categorical_crossentropy(answer_dist, y))



        stop_cost=self.log_likelihood_sym(actions_var=stops, dist_info_vars={'prob': stops_dist},bernoulli=True) * returns
        answer_cost=self.log_likelihood_sym(actions_var=answers, dist_info_vars={'prob': answers_dist}) * returns
        lt_cost=self.log_likelihood_sym(actions_var=lts, dist_info_vars={'prob': lts_dist}) * returns

        self.rl_cost = -T.mean(stop_cost+answer_cost+lt_cost)
        #TODO: we need to improve this rl_cost to introduce anti-variance measures


        return self.rl_cost, self.sl_cost, self.decoder_cost
Esempio n. 44
0
# scan loops through input sequence and applies step function to each time step

(S_h_r, S_c_r, S_y_r), _ = theano.scan(fn=step,
                                       sequences=S_x,
                                       outputs_info=[S_h, S_c, None],
                                       non_sequences=[
                                           W_xi, W_hi, W_ci, b_i, W_xf, W_hf,
                                           W_cf, b_f, W_xc, W_hc, b_c, W_xo,
                                           W_ho, W_co, b_o, W_hy, b_y
                                       ])

# END code inspired by Christian Herta

# cost and gradient descent

cost = T.mean(categorical_crossentropy(softmax(S_y_r), Y))


def gradient_descent(cost, weights, lr=0.05):
    grads = T.grad(cost=cost, wrt=weights)
    updates = []
    for w, g in zip(weights, grads):
        updates.append([w, w - lr * g])
    return updates


updates = gradient_descent(cost, [
    W_xi, W_hi, W_ci, b_i, W_xf, W_hf, W_cf, b_f, W_xc, W_hc, b_c, W_xo, W_ho,
    W_co, b_o, W_hy, b_y
])
Esempio n. 45
0
 def _cost(target_seq, pred_seq):
     pred_seq = tensor.clip(pred_seq, EPS, 1.0 - EPS)
     cce = categorical_crossentropy(coding_dist=pred_seq, true_dist=target_seq).mean(axis=0).mean(axis=0)
     return cce
Esempio n. 46
0
# scan loops through input sequence and applies step function to each time step

(S_h_r, S_c_r, S_y_r ), _ = theano.scan(fn = step,
                                        sequences = S_x,
                                        outputs_info = [S_h, S_c, None],
                                        non_sequences = [W_xi, W_hi, W_ci, b_i, 
                                                         W_xf, W_hf, W_cf, b_f, 
                                                         W_xc, W_hc, b_c, 
                                                         W_xo, W_ho, W_co, b_o, 
                                                         W_hy, b_y])
                                                         
# END code inspired by Christian Herta

# cost and gradient descent

cost = T.mean(categorical_crossentropy(softmax(S_y_r), Y))

def gradient_descent(cost, weights, lr=0.05):
    grads = T.grad(cost=cost, wrt=weights)
    updates = []
    for w, g in zip(weights, grads):
        updates.append([w, w - lr * g])
    return updates

updates = gradient_descent(cost, 
                           [W_xi, W_hi, W_ci, b_i, 
                            W_xf, W_hf, W_cf, b_f, 
                            W_xc, W_hc, b_c, 
                            W_xo, W_ho, W_co, b_o, 
                            W_hy, b_y])
Esempio n. 47
0
# shapegap = (10, 4, 1, 1)
# wgap = theano.shared(utils.floatX(np.arange(np.prod(shapegap)).reshape(shapegap)), borrow=True)

shape21 = (5, 4, 5, 3, 3)
shape22 = (5, 4, 5)
w21 = theano.shared(basicUtils.floatX(np.arange(np.prod(shape21)).reshape(shape21)), borrow=True)
w22 = theano.shared(basicUtils.floatX(np.arange(np.prod(shape22)).reshape(shape22)), borrow=True)
shapegap = (10, 5, 1, 1)
wgap = theano.shared(basicUtils.floatX(np.arange(np.prod(shapegap)).reshape(shapegap)), borrow=True)

layer1 = nin1(X, [w11, w12])
layer1 = nin1(layer1, [w21, w22])
layer1 = gap(layer1, wgap)
YDropProb1 = softmax(layer1)
trNeqs = basicUtils.neqs(YDropProb1, Y)
trCrossEntropy = categorical_crossentropy(YDropProb1, Y)
trCost1 = T.mean(trCrossEntropy)
updates1 = basicUtils.sgd(trCost1, [w11, w12, wgap], 0.001)
f1 = theano.function([X, Y], trCost1, updates=updates1, allow_input_downcast=True)

layer2 = nin2(X, [w11, w12], shape11)
layer2 = nin2(layer2, [w21, w22], shape21)
layer2 = gap(layer2, wgap)
YDropProb2 = softmax(layer2)
trNeqs = basicUtils.neqs(YDropProb2, Y)
trCrossEntropy = categorical_crossentropy(YDropProb2, Y)
trCost2 = T.mean(trCrossEntropy)
updates2 = basicUtils.sgd(trCost2, [w11, w12, wgap], 0.001)
f2 = theano.function([X, Y], trCost2, updates=updates2, allow_input_downcast=True)

x = np.random.randint(0, 100, (500, 3, 10, 10))
Esempio n. 48
0
    def __init__(self, i_size, h_size, o_size, weights=None):
        if not weights:
            self.W_xi = _init_weights((i_size, h_size))
            self.W_hi = _init_weights((h_size, h_size))
            self.W_ci = _init_weights((h_size, h_size))
            self.b_i = _init_zero_vec(h_size)

            self.W_xf = _init_weights((i_size, h_size))
            self.W_hf = _init_weights((h_size, h_size))
            self.W_cf = _init_weights((h_size, h_size))
            self.b_f = _init_zero_vec(h_size)

            self.W_xc = _init_weights((i_size, h_size))
            self.W_hc = _init_weights((h_size, h_size))
            self.b_c = _init_zero_vec(h_size)

            self.W_xo = _init_weights((i_size, h_size))
            self.W_ho = _init_weights((h_size, h_size))
            self.W_co = _init_weights((h_size, h_size))
            self.b_o = _init_zero_vec(h_size)

            self.W_hy = _init_weights((h_size, o_size))
            self.b_y = _init_zero_vec(o_size)
        else:
            self.W_xi = weights['W_xi']
            self.W_hi = weights['W_hi']
            self.W_ci = weights['W_ci']
            self.b_i = weights['b_i']

            self.W_xf = weights['W_xf']
            self.W_hf = weights['W_hf']
            self.W_cf = weights['W_cf']
            self.b_f = weights['b_f']

            self.W_xc = weights['W_xc']
            self.W_hc = weights['W_hc']
            self.b_c = weights['b_c']

            self.W_xo = weights['W_xo']
            self.W_ho = weights['W_ho']
            self.W_co = weights['W_co']
            self.b_o = weights['b_o']

            self.W_hy = weights['W_hy']
            self.b_y = weights['b_y']

        S_h = _init_zero_vec(h_size) # init values for hidden units
        S_c = _init_zero_vec(h_size) # init values for cell units

        S_x = T.matrix() # inputs
        Y = T.matrix() # targets

        (S_h_r, S_c_r, S_y_r ), _ = theano.scan(fn = _step,
                                                sequences = S_x,
                                                outputs_info = [S_h, S_c, None],
                                                non_sequences = [self.W_xi, self.W_hi, self.W_ci, self.b_i,
                                                                 self.W_xf, self.W_hf, self.W_cf, self.b_f,
                                                                 self.W_xc, self.W_hc, self.b_c,
                                                                 self.W_xo, self.W_ho, self.W_co, self.b_o,
                                                                 self.W_hy, self.b_y])

        cost = T.mean(categorical_crossentropy(softmax(S_y_r), Y))

        updates = _gradient_descent(cost,
                                    [self.W_xi, self.W_hi, self.W_ci, self.b_i,
                                     self.W_xf, self.W_hf, self.W_cf, self.b_f,
                                     self.W_xc, self.W_hc, self.b_c,
                                     self.W_xo, self.W_ho, self.W_co, self.b_o,
                                     self.W_hy, self.b_y])

        self.train = theano.function(inputs=[S_x, Y],
                                     outputs=cost,
                                     updates=updates,
                                     allow_input_downcast=True)

        self.predict = theano.function(inputs=[S_x],
                                       outputs=S_y_r,
                                       allow_input_downcast=True)

        S_h_v = T.vector()
        S_c_v = T.vector()

        S_h_s, S_c_s, S_y_s = _step(S_x, S_h_v, S_c_v,
                                    self.W_xi, self.W_hi, self.W_ci, self.b_i,
                                    self.W_xf, self.W_hf, self.W_cf, self.b_f,
                                    self.W_xc, self.W_hc, self.b_c,
                                    self.W_xo, self.W_ho, self.W_co, self.b_o,
                                    self.W_hy, self.b_y)

        self.sampling = theano.function(inputs = [S_x, S_h_v, S_c_v],
                                        outputs = [S_h_s, S_c_s, S_y_s],
                                        allow_input_downcast=True)
Esempio n. 49
0
             dtype=theano.config.floatX))
w_ho = theano.shared(
    np.array(np.random.normal(0, 0.1, (n_outputs, n_hidden)),
             dtype=theano.config.floatX))
b_ih = theano.shared(np.array(np.random.normal(0, 0.1, (n_hidden, 1)),
                              dtype=theano.config.floatX),
                     broadcastable=(False, True))
b_ho = theano.shared(np.array(np.random.normal(0, 0.1, (n_outputs, 1)),
                              dtype=theano.config.floatX),
                     broadcastable=(False, True))

# Forward pass
h_hidden = nnet.sigmoid(T.dot(w_ih, inputdata) + b_ih)
h_output = (T.dot(w_ho, h_hidden) + b_ho)
out_softmax = nnet.softmax(h_output.T).T
cost_expression = nnet.categorical_crossentropy(out_softmax.T, target.T).sum()

accuracy_train = accuracy_calc(T.argmax(out_softmax, axis=0),
                               T.argmax(target, axis=0))

# Backward pass
deriv_cost_w_ho = T.grad(cost_expression, w_ho) / batchSize
deriv_cost_w_ih = T.grad(cost_expression, w_ih) / batchSize
deriv_cost_b_ho = T.grad(cost_expression, b_ho) / batchSize
deriv_cost_b_ih = T.grad(cost_expression, b_ih) / batchSize

updates = [(w_ho, w_ho - learningRate * deriv_cost_w_ho),
           (w_ih, w_ih - learningRate * deriv_cost_w_ih),
           (b_ho, b_ho - learningRate * deriv_cost_b_ho),
           (b_ih, b_ih - learningRate * deriv_cost_b_ih)]
Esempio n. 50
0
    def _cost(target_seq, prob_pred_seq):

        prob_pred_seq = tensor.clip(prob_pred_seq, EPS, 1.0 - EPS)
        cce = categorical_crossentropy(
            prob_pred_seq, target_seq).mean(axis=2).mean(axis=0).mean(axis=0)
        return cce
Esempio n. 51
0
    def __init__(self, fin, f1, f2, f3, hiddens, outputs,
                 lr=0.001, C=0.001, pDropConv=0.2, pDropHidden=0.5):
        self.params = []  # 所有需要优化的参数放入列表中,分别是连接权重和偏置
        # 卷积层,w=(本层特征图个数,上层特征图个数,卷积核行数,卷积核列数),b=(本层特征图个数)
        # conv: (32, 32) = (32, 32)
        # pool: (32/2, 32/2) = (16, 16)
        wconv1 = initial.weightInit((f1, fin, 3, 3), 'wconv1')
        bconv1 = initial.biasInit((f1,), 'bconv1')
        self.params.append([wconv1, bconv1])
        # conv: (16, 16) = (16, 16)
        # pool: (16/2, 16/2) = (8, 8)
        wconv2 = initial.weightInit((f2, f1, 3, 3), 'wconv2')
        bconv2 = initial.biasInit((f2,), 'bconv2')
        self.params.append([wconv2, bconv2])
        # conv: (8, 8) = (8, 8)
        # pool: (8/2, 8/2) = (4, 4)
        wconv3 = initial.weightInit((f3, f2, 3, 3), 'wconv3')
        bconv3 = initial.biasInit((f3,), 'bconv3')
        self.params.append([wconv3, bconv3])
        # 全连接层,需要计算卷积最后一层的神经元个数作为MLP的输入
        wfull = initial.weightInit((f3 * 4 * 4, hiddens), 'wfull')
        bfull = initial.biasInit((hiddens,), 'bfull')
        self.params.append([wfull, bfull])
        wout = initial.weightInit((hiddens, outputs), 'wout')
        bout = initial.biasInit((outputs,), 'bout')
        self.params.append([wout, bout])

        # 定义 Theano 符号变量,并构建 Theano 表达式
        X = T.tensor4('X')
        Y = T.matrix('Y')
        YDropProb = model(X, self.params, pDropConv, pDropHidden)
        YFullProb = model(X, self.params, 0., 0.)
        YPred = T.argmax(YFullProb, axis=1)
        # 训练集代价函数
        trCrossEntropy = categorical_crossentropy(YDropProb, Y)
        trCost = T.mean(trCrossEntropy) + C * basicUtils.regularizer(flatten(self.params))
        updates = gradient.rmsprop(trCost, flatten(self.params), lr=lr)
        # 测试验证集代价函数
        vateCrossEntropy = categorical_crossentropy(YFullProb, Y)
        vateCost = T.mean(vateCrossEntropy) + C * basicUtils.regularizer(flatten(self.params))

        # 编译函数
        # 训练函数,输入训练集,输出训练损失和误差
        self.train = function(
            inputs=[In(X, borrow=True, allow_downcast=True),
                    In(Y, borrow=True, allow_downcast=True)],
            outputs=[Out(trCost, borrow=True),
                     Out(basicUtils.neqs(YDropProb, Y), borrow=True)],  # 减少返回参数节省时间
            updates=updates,
            allow_input_downcast=True
        )
        # 验证或测试函数,输入验证或测试集,输出损失和误差,不进行更新
        self.valtest = function(
            inputs=[In(X, borrow=True, allow_downcast=True),
                    In(Y, borrow=True, allow_downcast=True)],
            outputs=[Out(vateCost, borrow=True),
                     Out(basicUtils.neqs(YFullProb, Y), borrow=True)],  # 减少返回参数节省时间
            allow_input_downcast=True
        )
        # 预测函数,只输入X,输出预测结果
        self.predict = function(
            inputs=[In(X, borrow=True, allow_downcast=True)],
            outputs=Out(YPred, borrow=True),
            allow_input_downcast=True
        )
Esempio n. 52
0
def main(num_epochs=NUM_EPOCHS):
    print("Building network ...")

    l_in = lasagne.layers.InputLayer(shape=(None, None, vocab_size))

    l_forward_1 = lasagne.layers.LSTMLayer(
        l_in,
        N_HIDDEN,
        grad_clipping=GRAD_CLIP,
        nonlinearity=lasagne.nonlinearities.tanh)

    l_forward_2 = lasagne.layers.LSTMLayer(
        l_forward_1,
        N_HIDDEN,
        grad_clipping=GRAD_CLIP,
        nonlinearity=lasagne.nonlinearities.tanh,
        only_return_final=True)

    l_out = lasagne.layers.DenseLayer(l_forward_2,
                                      num_units=vocab_size,
                                      W=lasagne.init.Normal(),
                                      nonlinearity=softmax)
    target_values = T.ivector('target_output')
    network_output = lasagne.layers.get_output(l_out)
    cost = categorical_crossentropy(network_output, target_values).mean()
    all_params = lasagne.layers.get_all_params(l_out, trainable=True)

    print("Computing updates ...")
    updates = lasagne.updates.adagrad(cost, all_params, LEARNING_RATE)

    print("Compiling functions ...")
    train = theano.function([l_in.input_var, target_values],
                            cost,
                            updates=updates,
                            allow_input_downcast=True)

    probs = theano.function([l_in.input_var],
                            network_output,
                            allow_input_downcast=True)

    def try_it_out(N=SEQ_OUT_LEN):
        """
        Generates output for the predefined generation_phrase.
        More general example of generation function is in the application
        script.
        """
        assert (len(generation_phrase) >= SEQ_LENGTH)
        sample_ix = []
        x, _ = gen_data(
            len(generation_phrase) - SEQ_LENGTH, 1, generation_phrase, False)

        for i in range(N):
            ix = np.argmax(probs(x).ravel())
            sample_ix.append(ix)
            x[:, 0:SEQ_LENGTH - 1, :] = x[:, 1:, :]
            x[:, SEQ_LENGTH - 1, :] = 0
            x[0, SEQ_LENGTH - 1, sample_ix[-1]] = 1.

        random_snippet = generation_phrase + ''.join(ix_to_char[ix]
                                                     for ix in sample_ix)
        print("----\n %s \n----" % random_snippet)

    print("Training ...")
    print("Seed used for text generation is: " + generation_phrase)
    p = 0
    try:
        for it in range(int(data_size * num_epochs / BATCH_SIZE)):
            try_it_out()
            avg_cost = 0
            for _ in range(PRINT_FREQ):
                x, y = gen_data(p)
                p += SEQ_LENGTH + BATCH_SIZE - 1
                if (p + BATCH_SIZE + SEQ_LENGTH >= data_size):
                    print('Carriage Return')
                    p = 0

                avg_cost += train(x, y)
            print("Epoch {} average loss = {}".format(
                it * 1.0 * PRINT_FREQ / data_size * BATCH_SIZE,
                avg_cost / PRINT_FREQ))
            netname = 'epoch-{:.5f}.pkl' \
                .format(it * 1.0 * PRINT_FREQ / data_size * BATCH_SIZE)
            with open('nets/' + netname, 'wb') as f:
                pickle.dump(lasagne.layers.get_all_param_values(l_out), f)

    except KeyboardInterrupt:
        pass
Esempio n. 53
0
 def cost(self, net):
     "Return the cross entropy cost function"
     return T.mean(categorical_crossentropy(self.output_dropout, net.y))
Esempio n. 54
0
wconv3 = initial.weightInit((f3, f2, 3, 3), 'wconv3')
bconv3 = initial.biasInitCNN2((f3,), 'bconv3')
prams.append([wconv3, bconv3])
# 全连接层,需要计算卷积最后一层的神经元个数作为MLP的输入
wfull = initial.weightInit2MLP((f3 * 3 * 3, hiddens), 'wfull')
bfull = initial.biasInitCNN2((hiddens,), 'bfull')
prams.append([wfull, bfull])
wout = initial.weightInit2MLP((hiddens, outputs), 'wout')
bout = initial.biasInitCNN2((outputs,), 'bout')
prams.append([wout, bout])

# 构建 Theano 表达式
YDropProb = model(X, prams, 0.2, 0.5)
YFullProb = model(X, prams, 0., 0.)
YPred = T.argmax(YFullProb, axis=1)
crossEntropy = categorical_crossentropy(YDropProb, Y)
cost = T.mean(crossEntropy) + C * basicUtils.regularizer(flatten(prams))
updates = gradient.rmsprop(cost, flatten(prams), lr=learningRate)

# 编译函数
# 训练函数,输入训练集,输出测试误差
train = function(
    inputs=[In(X, borrow=True, allow_downcast=True),
            In(Y, borrow=True, allow_downcast=True)],
    outputs=Out(basicUtils.neqs(YDropProb, Y), borrow=True),  # 减少返回参数节省时间
    updates=updates,
    allow_input_downcast=True
)
# 测试或验证函数,输入测试或验证集,输出测试或验证误差,不进行更新
test = function(
    inputs=[In(X, borrow=True, allow_downcast=True),
Esempio n. 55
0
    def _cost(target_seq, prob_pred_seq):

        prob_pred_seq = tensor.clip(prob_pred_seq, EPS, 1.0 - EPS)
        cce = categorical_crossentropy(prob_pred_seq, target_seq).mean(axis=2).mean(axis=0).mean(axis=0)
        return cce
Esempio n. 56
0
    iteration_scheme=SequentialScheme(train_dataset_100.num_examples,
                                      batch_size))
train_stream_100 = OneHotEncode100(train_stream_100,
                                   which_sources=('fine_labels', ))

lr_cifar100 = learning_rate  # * num_train_example/num_train_cifar100

## build computational graph
X = tensor.ftensor4('features')
targets = tensor.fmatrix('targets')
targets_100 = tensor.fmatrix('fine_labels')

output_10, output_test_10, output_100, output_test_100, all_parameters, acc_parameters = get_model(
    X, batch_size, (32, 32))

loss = alpha * categorical_crossentropy(output_10[:, :, 0, 0], targets).mean()
loss.name = 'loss'

loss_100 = (1 - alpha) * categorical_crossentropy(output_100[:, :, 0, 0],
                                                  targets_100).mean()
loss_100.name = 'loss_100'

loss_test = categorical_crossentropy(output_test_10[:, :, 0, 0],
                                     targets).mean()
loss.name = 'loss_test'

loss_100_test = categorical_crossentropy(output_test_100[:, :, 0, 0],
                                         targets_100).mean()
loss_100_test.name = 'loss_100_test'

error = tensor.neq(tensor.argmax(output_10[:, :, 0, 0], axis=1),
Esempio n. 57
0
 def nll(self, target):
     """Return the negative log-likelihood of the prediction of this model under a given
     target distribution.  Passing symbolic integers here means 1-hot.
     WRITEME
     """
     return nnet.categorical_crossentropy(self.output, target)