コード例 #1
0
ファイル: layers.py プロジェクト: aled1027/Syntaur
 def errors(self, y, mean = False):
     if not self.CONNECTED:
         raise RuntimeError("Asked to compute errors, but I'm not connected atm")
     if mean:
         return T.mean(T.neq(self.y_pred, y))
     else:
         return T.neq(self.y_pred, y)
コード例 #2
0
ファイル: lang_model.py プロジェクト: piergiaj/generate-image
    def step(self, y_m, yb_m, hf, cf, hb, cb):
        # y_m/yb_m are what shape? should be batch_size (x 1)
        print y_m.ndim
        # one-hot encode y,yb (NEED TO SAVE PREVIOUS VALUES FOR MASKING!!!)
        y = to_one_hot(y_m, self.bs, self.K)
        yb = to_one_hot(yb_m, self.bs, self.K)

        # get forward and backward inputs values
        y_f_in = self.forward_in.run(y)
        y_b_in = self.backward_in.run(yb)
        
        # run forward and backward LSTMs
        hf_t,cf_t = self.forward_lstm.run(y_f_in, hf, cf)
        hb_t,cb_t = self.backward_lstm.run(y_b_in, hb, cb)

        # but only if y/yb is not 0 (apply mask)
        mask_y = y_m.reshape((self.bs, 1))#.repeat(self.m//2, axis=1) # these lines *shouldnt* be needed...
        mask_yb = yb_m.reshape((self.bs, 1))#.repeat(self.m//2, axis=1)
        hf = T.switch(T.neq(mask_y, 0), hf_t, hf)
        cf = T.switch(T.neq(mask_y, 0), cf_t, cf)
        # and backward
        hb = T.switch(T.neq(mask_yb, 0), hb_t, hb)
        cb = T.switch(T.neq(mask_yb, 0), cb_t, cb)

        # return the new values
        return hf,cf,hb,cb
コード例 #3
0
ファイル: ber.py プロジェクト: Alexis211/transpose_features
def ber(y, pred):
    a = (tensor.neq(y, 1) * tensor.neq(pred, 1)).sum()
    b = (tensor.neq(y, 1) * tensor.eq(pred, 1)).sum()
    c = (tensor.eq(y, 1) * tensor.neq(pred, 1)).sum()
    d = (tensor.eq(y, 1) * tensor.eq(pred, 1)).sum()
    [a, b, c, d] = [tensor.cast(x, dtype=theano.config.floatX) for x in [a, b, c, d]]
    return (b / (a + b) + c / (c + d)) / numpy.float32(2)
コード例 #4
0
    def getRpRnTpTnForTrain0OrVal1(self, y, training0OrValidation1):
        # The returned list has (numberOfClasses)x4 integers: >numberOfRealPositives, numberOfRealNegatives, numberOfTruePredictedPositives, numberOfTruePredictedNegatives< for each class (incl background).
        # Order in the list is the natural order of the classes (ie class-0 RP,RN,TPP,TPN, class-1 RP,RN,TPP,TPN, class-2 RP,RN,TPP,TPN ...)
        # param y: y = T.itensor4('y'). Dimensions [batchSize, r, c, z]
        
        yPredToUse = self.y_pred_train if  training0OrValidation1 == 0 else self.y_pred_val
        checkDimsOfYpredAndYEqual(y, yPredToUse, "training" if training0OrValidation1 == 0 else "validation")
        
        returnedListWithNumberOfRpRnTpTnForEachClass = []
        
        for class_i in xrange(0, self._numberOfOutputClasses) :
            #Number of Real Positive, Real Negatives, True Predicted Positives and True Predicted Negatives are reported PER CLASS (first for WHOLE).
            tensorOneAtRealPos = T.eq(y, class_i)
            tensorOneAtRealNeg = T.neq(y, class_i)

            tensorOneAtPredictedPos = T.eq(yPredToUse, class_i)
            tensorOneAtPredictedNeg = T.neq(yPredToUse, class_i)
            tensorOneAtTruePos = T.and_(tensorOneAtRealPos,tensorOneAtPredictedPos)
            tensorOneAtTrueNeg = T.and_(tensorOneAtRealNeg,tensorOneAtPredictedNeg)
                    
            returnedListWithNumberOfRpRnTpTnForEachClass.append( T.sum(tensorOneAtRealPos) )
            returnedListWithNumberOfRpRnTpTnForEachClass.append( T.sum(tensorOneAtRealNeg) )
            returnedListWithNumberOfRpRnTpTnForEachClass.append( T.sum(tensorOneAtTruePos) )
            returnedListWithNumberOfRpRnTpTnForEachClass.append( T.sum(tensorOneAtTrueNeg) )
            
        return returnedListWithNumberOfRpRnTpTnForEachClass
コード例 #5
0
    def __init__(self, rng, batchsize, epochs=100, alpha=0.001, beta1=0.9, beta2=0.999, eps=1e-08, l1_weight=0.0, l2_weight=0.1, cost='mse'):
        self.alpha = alpha
        self.beta1 = beta1
        self.beta2 = beta2
        self.eps = eps
        self.l1_weight = l1_weight
        self.l2_weight = l2_weight
        self.rng = rng
        self.theano_rng = RandomStreams(rng.randint(2 ** 30))
        self.epochs = epochs
        self.batchsize = batchsize

        # Where cost is always the cost which is minimised in supervised training
        # the T.nonzero term ensures that the cost is only calculated for examples with a label
        #
        # Convetion: We mark unlabelled examples with a vector of zeros in lieu of a one-hot vector
        if   cost == 'mse':
            self.y_pred = lambda network, x: network(x)
            self.error = lambda network, y_pred, y: T.zeros((1,))
            self.cost = lambda network, x, y: T.mean((network(x)[T.nonzero(y)] - y[T.nonzero(y)]**2))
        elif cost == 'binary_cross_entropy':
            self.y_pred = lambda network, x: network(x)
            self.cost   = lambda network, y_pred, y: T.nnet.binary_crossentropy(y_pred[T.nonzero(y)], y[T.nonzero(y)]).mean()
            # classification error
            self.error  = lambda network, y_pred, y: T.mean(T.neq(T.argmax(y_pred, axis=1), T.argmax(y, axis=1)))
        elif cost == 'cross_entropy':
            self.y_pred = lambda network, x: network(x)
            self.cost   = lambda network, y_pred, y: T.nnet.categorical_crossentropy(y_pred[T.nonzero(y)], y[T.nonzero(y)]).mean()
            # classification error
            self.error  = lambda network, y_pred, y: T.mean(T.neq(T.argmax(y_pred, axis=1), T.argmax(y, axis=1)))
        else:
            self.y_pred = lambda network, x: network(x)
            self.error = lambda network, y_pred, y: T.zeros((1,))
            self.cost = cost
コード例 #6
0
ファイル: logistic_sgd.py プロジェクト: mommi84/DeepLearning
    def errors(self, y):
        """Return a float representing the number of errors in the minibatch
        over the total number of examples of the minibatch ; zero one
        loss over the size of the minibatch

        :type y: theano.tensor.TensorType
        :param y: corresponds to a vector that gives for each example the
                  correct label
        """

        # check if y has same dimension of y_pred
        if y.ndim != self.y_pred.ndim:
            raise TypeError(
                'y should have the same shape as self.y_pred',
                ('y', y.type, 'y_pred', self.y_pred.type)
            )
        # check if y is of the correct datatype
        if y.dtype.startswith('int'):
            # the T.neq operator returns a vector of 0s and 1s, where 1
            # represents a mistake in prediction
            return T.mean(T.neq(self.y_pred, y))
        else:
            #raise NotImplementedError()
#            print y.shape[0]
#            for i in range(1, y.shape[0].eval()):
#                print('%f | %f' % (self.y_pred[i], y[i]))
            #print T.mean(T.neq(self.y_pred, y))
            #print self.y_pred.eval()
            return T.mean(T.neq(self.y_pred, y))
コード例 #7
0
ファイル: pdnnet.py プロジェクト: petered/pdnn-test
def matrix_weight_grad_calculator(xs, es, kp_x, kd_x, kp_e, kd_e, shapes, epsilon=1e-7):
    """
    :param xs:
    :param es:
    :param kp_x:
    :param kd_x:
    :param kp_e:
    :param kd_e:
    :param shapes:
    :param epsilon:
    :return:
    """
    kp_x, kd_x, kp_e, kd_e = [as_floatx(k) for k in (kp_x, kd_x, kp_e, kd_e)]
    n_samples, n_in, n_out = shapes
    v1 = create_shared_variable(np.zeros((n_samples, n_in, n_out)))
    rx = kd_x/(kp_x+kd_x)
    re = kd_e/(kp_e+kd_e)
    xr = create_shared_variable(np.zeros((n_samples, n_in)))
    er = create_shared_variable(np.zeros((n_samples, n_out)))

    x_spikes = tt.neq(xs, 0)
    e_spikes = tt.neq(es, 0)
    xr_decayed = xr*rx
    er_decayed = er*re
    spikes = tt.bitwise_or(x_spikes[:, :, None], e_spikes[:, None, :])
    v2 = xr_decayed[:, :, None]*er_decayed[:, None, :]
    dws = (spikes*(v2-v1))/(rx*re-1)
    new_xr = xr_decayed + xs/(kp_x+kd_x)
    new_er = er_decayed + es/(kp_e+kd_e)

    add_update(v1, tt.switch(spikes, new_xr[:, :, None]*new_er[:, None, :], v1))
    add_update(xr, new_xr)
    add_update(er, new_er)

    return dws.sum(axis=0)
コード例 #8
0
ファイル: dbm.py プロジェクト: dnouri/pylearn2
    def get_monitoring_channels(self, model, X, Y = None):
        rval = OrderedDict()

        history = model.mf(X, return_history = True)
        q = history[-1]

        if self.supervised:
            assert Y is not None
            Y_hat = q[-1]
            true = T.argmax(Y,axis=1)
            pred = T.argmax(Y_hat, axis=1)

            #true = Print('true')(true)
            #pred = Print('pred')(pred)

            wrong = T.neq(true, pred)
            err = T.cast(wrong.mean(), X.dtype)
            rval['misclass'] = err

            if len(model.hidden_layers) > 1:
                q = model.mf(X, Y = Y)
                pen = model.hidden_layers[-2].upward_state(q[-2])
                Y_recons = model.hidden_layers[-1].mf_update(state_below = pen)
                pred = T.argmax(Y_recons, axis=1)
                wrong = T.neq(true, pred)

                rval['recons_misclass'] = T.cast(wrong.mean(), X.dtype)


        return rval
コード例 #9
0
ファイル: metrics.py プロジェクト: XiongDuan/FC-DenseNet
def theano_metrics(y_pred, y_true, n_classes, void_labels):
    """
    Returns the intersection I and union U (to compute the jaccard I/U) and the accuracy.

    :param y_pred: tensor of predictions. shape  (b*0*1, c) with c = n_classes
    :param y_true: groundtruth, shape  (b,0,1) or (b,c,0,1) with c=1
    :param n_classes: int
    :param void_labels: list of indexes of void labels
    :return: return tensors I and U of size (n_classes), and scalar acc
    """

    # Put y_pred and y_true under the same shape
    y_true = T.flatten(y_true)
    y_pred = T.argmax(y_pred, axis=1)

    # We use not_void in case the prediction falls in the void class of the groundtruth
    for i in range(len(void_labels)):
        if i == 0:
            not_void = T.neq(y_true, void_labels[i])
        else:
            not_void = not_void * T.neq(y_true, void_labels[i])

    I = T.zeros(n_classes)
    U = T.zeros(n_classes)

    for i in range(n_classes):
        y_true_i = T.eq(y_true, i)
        y_pred_i = T.eq(y_pred, i)
        I = T.set_subtensor(I[i], T.sum(y_true_i * y_pred_i))
        U = T.set_subtensor(U[i], T.sum(T.or_(y_true_i, y_pred_i) * not_void))

    accuracy = T.sum(I) / T.sum(not_void)

    return I, U, accuracy
コード例 #10
0
ファイル: nnet.py プロジェクト: Pandoro/tools
 def errors(self, y):
     if y.dtype.startswith('int') and y.ndim == 3:
         mask = T.neq(y, -1)
         total = T.sum(mask, dtype='float32')
         return T.sum(T.neq(self.y_pred, y)*mask)/total
     else:
         raise NotImplementedError()
コード例 #11
0
ファイル: MLPCost.py プロジェクト: LeonBai/lisa_emotiw-1
 def __call__(self, model, X, Y, ** kwargs):
     if self.use_dropout:
         Y_hat = model.dropout_fprop(X, default_input_include_prob=self.default_input_include_prob,
                 input_include_probs=self.input_include_probs, default_input_scale=self.default_input_scale,
                 input_scales=self.input_scales
                 )
     else:
         Y_hat = model.fprop(X)
     
     if self.missing_target_value is not None:
         assert (self.cost_type == 'default')
         costMatrix = model.layers[-1].cost_matrix(Y, Y_hat)
         costMatrix *= T.neq(Y, self.missing_target_value)  # This sets to zero all elements where Y == -1
         cost = costMatrix.sum()/(T.neq(Y, -1).sum())
         cost = T.cast(cost, 'float32')
         #cost = model.cost_from_cost_matrix(costMatrix)
     else:
         if self.cost_type == 'default':
             cost = model.cost(Y, Y_hat)
         elif self.cost_type == 'nll':
             cost = (-Y * T.log(Y_hat)).sum(axis=1).mean()
         elif self.cost_type == 'crossentropy':
             cost = (-Y * T.log(Y_hat) - (1 - Y) \
                 * T.log(1 - Y_hat)).sum(axis=1).mean()
         else:
             raise NotImplementedError()
     return cost
コード例 #12
0
    def get_tagging_channels_from_state(self, state, target):

        missingValuesFilter = T.neq(target, -1)

        rval = OrderedDict()
        y_hat = state > 0.5
        y = target > 0.5
        wrong_bit = T.cast(T.neq(y, y_hat), state.dtype) * missingValuesFilter
        rval['mistagging'] = T.cast(wrong_bit.sum() / missingValuesFilter.sum(),
                                 state.dtype)

        y = T.cast(y, state.dtype)
        y_hat = T.cast(y_hat, state.dtype)
        tp = (y * y_hat * missingValuesFilter).sum()
        fp = ((1-y) * y_hat * missingValuesFilter).sum()
        precision = tp / T.maximum(1., tp + fp)
        recall = tp / T.maximum(1., (y * missingValuesFilter).sum())
        rval['precision'] = precision
        rval['recall'] = recall
        rval['f1'] = 2. * precision * recall / T.maximum(1, precision + recall)

        tp = (y * y_hat * missingValuesFilter).sum(axis=0)
        fp = ((1-y) * y_hat * missingValuesFilter).sum(axis=0)
        precision = tp / T.maximum(1., tp + fp)

        rval['per_output_precision.max'] = precision.max()
        rval['per_output_precision.mean'] = precision.mean()
        rval['per_output_precision.min'] = precision.min()

        recall = tp / T.maximum(1., (y * missingValuesFilter).sum(axis=0))

        rval['per_output_recall.max'] = recall.max()
        rval['per_output_recall.mean'] = recall.mean()
        rval['per_output_recall.min'] = recall.min()

        f1 = 2. * precision * recall / T.maximum(1, precision + recall)

        rval['per_output_f1.max'] = f1.max()
        rval['per_output_f1.mean'] = f1.mean()
        rval['per_output_f1.min'] = f1.min()
        
        # Add computation of the mean average recision
        from pylearn2_ECCV2014 import meanAvgPrec
        (rval['min_avg_prec'],
         rval['mean_avg_prec'],
         rval['max_avg_prec'],
         rval['mean_avg_prec_AnswerPhone'],
         rval['mean_avg_prec_DriveCar'],
         rval['mean_avg_prec_Eat'],
         rval['mean_avg_prec_FightPerson'],
         rval['mean_avg_prec_GetOutCar'],
         rval['mean_avg_prec_HandShake'],
         rval['mean_avg_prec_HugPerson'],
         rval['mean_avg_prec_Kiss'],
         rval['mean_avg_prec_Run'],
         rval['mean_avg_prec_SitDown'],
         rval['mean_avg_prec_SitUp'],
         rval['mean_avg_prec_StandUp']) = meanAvgPrec.meanAveragePrecisionTheano(target, state)

        return rval
コード例 #13
0
ファイル: cnnLayerTypes.py プロジェクト: pliu007/deepmedic
    def multiclassRealPosAndNegAndTruePredPosNegTraining0OrValidation1(self, y, training0OrValidation1):
	"""
	The returned list has (numberOfClasses)x4 integers: >numberOfRealPositives, numberOfRealNegatives, numberOfTruePredictedPositives, numberOfTruePredictedNegatives< for each class (incl background).
	Order in the list is the natural order of the classes (ie class-0 RP,RN,TPP,TPN, class-1 RP,RN,TPP,TPN, class-2 RP,RN,TPP,TPN ...)
	"""
	returnedListWithNumberOfRpRnPpPnForEachClass = []

	for class_i in xrange(0, self.numberOfOutputClasses) :
		#Number of Real Positive, Real Negatives, True Predicted Positives and True Predicted Negatives are reported PER CLASS (first for WHOLE).
		vectorOneAtRealPositives = T.eq(y, class_i)
		vectorOneAtRealNegatives = T.neq(y, class_i)

		if training0OrValidation1 == 0 : #training:
			yPredToUse = self.y_pred
		else: #validation
			yPredToUse = self.y_pred_inference

		vectorOneAtPredictedPositives = T.eq(yPredToUse, class_i)
		vectorOneAtPredictedNegatives = T.neq(yPredToUse, class_i)
		vectorOneAtTruePredictedPositives = T.and_(vectorOneAtRealPositives,vectorOneAtPredictedPositives)
		vectorOneAtTruePredictedNegatives = T.and_(vectorOneAtRealNegatives,vectorOneAtPredictedNegatives)
		    
		returnedListWithNumberOfRpRnPpPnForEachClass.append( T.sum(vectorOneAtRealPositives) )
		returnedListWithNumberOfRpRnPpPnForEachClass.append( T.sum(vectorOneAtRealNegatives) )
		returnedListWithNumberOfRpRnPpPnForEachClass.append( T.sum(vectorOneAtTruePredictedPositives) )
		returnedListWithNumberOfRpRnPpPnForEachClass.append( T.sum(vectorOneAtTruePredictedNegatives) )

	return returnedListWithNumberOfRpRnPpPnForEachClass
コード例 #14
0
def trainer(X,Y,alpha,lr,predictions,updates,data,labels):
	data   = U.create_shared(data,  dtype=np.int8)
	labels = U.create_shared(labels,dtype=np.int8)
	index_start = T.lscalar('start')
	index_end   = T.lscalar('end')
	print "Compiling function..."
	train_model = theano.function(
			inputs  = [index_start,index_end,alpha,lr],
			outputs = T.mean(T.neq(T.argmax(predictions, axis=1), Y)),
			updates = updates,
			givens  = {
				X:   data[index_start:index_end],
				Y: labels[index_start:index_end]
			}
		)
	test_model = theano.function(
			inputs  = [index_start,index_end],
			outputs = T.mean(T.neq(T.argmax(predictions, axis=1), Y)),
			givens  = {
				X:   data[index_start:index_end],
				Y: labels[index_start:index_end]
			}
		)
	print "Done."
	return train_model,test_model
コード例 #15
0
        def each_loss(outpt, inpt):
            # y 是填充了blank之后的ans
            blank = 26
            y_nblank = T.neq(inpt, blank)
            n = T.dot(y_nblank, y_nblank)  # 真实的字符长度
            N = 2 * n + 1  # 填充后的字符长度,去除尾部多余的填充
            labels = inpt[:N]
            labels2 = T.concatenate((labels, [blank, blank]))
            sec_diag = T.neq(labels2[:-2], labels2[2:]) * T.eq(labels2[1:-1], blank)
            recurrence_relation = \
                T.eye(N) + \
                T.eye(N, k=1) + \
                T.eye(N, k=2) * sec_diag.dimshuffle((0, 'x'))

            pred_y = outpt[:, labels]

            fwd_pbblts, _ = theano.scan(
                lambda curr, accum: T.switch(T.eq(curr*T.dot(accum, recurrence_relation), 0.0),
                                             T.dot(accum, recurrence_relation)
                                             , curr*T.dot(accum, recurrence_relation)),
                sequences=[pred_y],
                outputs_info=[T.eye(N)[0]]
            )
            #return fwd_pbblts
            #liklihood = fwd_pbblts[0, 0]
            liklihood = fwd_pbblts[-1, -1] + fwd_pbblts[-1, -2]
            #liklihood = T.switch(T.lt(liklihood, 1e-35), 1e-35, liklihood)
            #loss = -T.log(T.cast(liklihood, "float32"))
            #loss = 10 * (liklihood - 1) * (liklihood - 100)
            loss = (T.le(liklihood, 1.0)*(10*(liklihood-1)*(liklihood-100)))+(T.gt(liklihood, 1.0)*(-T.log(T.cast(liklihood, "float32"))))
            return loss
コード例 #16
0
    def f1_score(self, y, labels=[0, 2]):
      """
      Mean F1 score between two classes (positive and negative as specified by the labels array).
      """
      y_tr = y
      y_pr = self.y_pred

      correct = T.eq(y_tr, y_pr)
      wrong = T.neq(y_tr, y_pr)

      label = labels[0]
      tp_neg = T.sum(correct * T.eq(y_tr, label))
      fp_neg = T.sum(wrong * T.eq(y_pr, label))
      fn_neg = T.sum(T.eq(y_tr, label) * T.neq(y_pr, label))
      tp_neg = T.cast(tp_neg, theano.config.floatX)
      prec_neg = tp_neg / T.maximum(1, tp_neg + fp_neg)
      recall_neg = tp_neg / T.maximum(1, tp_neg + fn_neg)
      f1_neg = 2. * prec_neg * recall_neg / T.maximum(1, prec_neg + recall_neg)

      label = labels[1]
      tp_pos = T.sum(correct * T.eq(y_tr, label))
      fp_pos = T.sum(wrong * T.eq(y_pr, label))
      fn_pos = T.sum(T.eq(y_tr, label) * T.neq(y_pr, label))
      tp_pos = T.cast(tp_pos, theano.config.floatX)
      prec_pos = tp_pos / T.maximum(1, tp_pos + fp_pos)
      recall_pos = tp_pos / T.maximum(1, tp_pos + fn_pos)
      f1_pos = 2. * prec_pos * recall_pos / T.maximum(1, prec_pos + recall_pos)

      return 0.5 * (f1_pos + f1_neg) * 100
コード例 #17
0
    def get_train(self, batchsize=None, testsize=None):
        sx = tt.tensor4()
        sy = tt.ivector()

        yc = self._propup(sx, batchsize, noise=False)
        if 1:
            cost = -tt.log(tt.nnet.softmax(yc))[tt.arange(sy.shape[0]), sy].mean()
        else:
            from hinge import multi_hinge_margin
            cost = multi_hinge_margin(yc, sy).mean()

        error = tt.neq(tt.argmax(yc, axis=1), sy).mean()

        # get updates
        params = self.params
        grads = dict(zip(params, theano.grad(cost, params)))
        updates = collections.OrderedDict()
        for layer in self.layers:
            updates.update(layer.updates(grads))

        train = theano.function(
            [sx, sy], [cost, error], updates=updates)

        # --- make test function
        y_pred = tt.argmax(self._propup(sx, testsize, noise=False), axis=1)
        error = tt.mean(tt.neq(y_pred, sy))
        test = theano.function([sx, sy], error)

        return train, test
コード例 #18
0
ファイル: siamese_cbowUtils.py プロジェクト: zukki259/sandbox
  def get_output_for(self, input, **kwargs):
    '''
    The input is a batch of matrices of word vectors.
    The output the sum of the word embeddings divided by the number of
    non-zero word embeddings in the input.

    The idea with the normalisers is similar as in the normal averageLayer
    '''

    # Sums of word embeddings (so the zero embeddings don't matter here)
    sums = input.sum(axis=2) 

    # Can we do this cheaper (as in, more efficient)?
    # NOTE that we explicitly cast the output of the last sum() to floatX
    # as otherwise Theano will cast the result of 'sums / normalizers' to
    # float64
    normalisers = T.neq((T.neq(input, 0.0)).sum(axis=3, dtype='int32'), 0.0).sum(axis=2, dtype='floatX').reshape((-1, self.iNrOfSentences, 1))
    
    averages = sums / normalisers

    if self.fGradientClippingBound is not None:
      averages = theano.gradient.grad_clip(averages,
                                           - self.fGradientClippingBound,
                                           self.fGradientClippingBound)

    return averages
コード例 #19
0
ファイル: costs.py プロジェクト: BKJackson/Attentive_reader
def nll_simple(Y, Y_hat,
               cost_mask=None,
               cost_ent_mask=None,
               cost_ent_desc_mask=None):

    probs = Y_hat
    pred = TT.argmax(probs, axis=1).reshape(Y.shape)
    errors = TT.neq(pred, Y)
    ent_errors = None
    if cost_ent_mask is not None:
        pred_ent = TT.argmax(probs * cost_ent_mask.dimshuffle('x', 0),
                             axis=1).reshape(Y.shape)
        ent_errors = TT.neq(pred_ent, Y).mean()

    ent_desc_errors = None
    if cost_ent_desc_mask is not None:
        pred_desc_ent = TT.argmax(probs * cost_ent_desc_mask,
                             axis=1).reshape(Y.shape)
        ent_desc_errors = TT.neq(pred_desc_ent, Y).mean()

    LL = TT.log(_grab_probs(probs, Y) + 1e-8).reshape(Y.shape)

    if cost_mask is not None:
        total = cost_mask * LL
        errors = cost_mask * errors
        ncosts = TT.sum(cost_mask)
        mean_errors = TT.sum(errors) / (ncosts)
        ave = -TT.sum(total) / Y.shape[1]
    else:
        mean_errors = TT.mean(errors)
        ave = -TT.sum(LL) / Y.shape[0]
    return ave, mean_errors, ent_errors, ent_desc_errors
コード例 #20
0
ファイル: ctc.py プロジェクト: CityU-HAN/CTC-LSTM
    def apply_log_domain(self, l, probs, l_len=None, probs_mask=None):
        # Does the same computation as apply, but alpha is in the log domain
        # This avoids numerical underflow issues that were not corrected in the previous version.

        def _log(a):
            return tensor.log(tensor.clip(a, 1e-12, 1e12))

        def _log_add(a, b):
            maximum = tensor.maximum(a, b)
            return (maximum + tensor.log1p(tensor.exp(a + b - 2 * maximum)))

        def _log_mul(a, b):
            return a + b

        # See comments above
        B = probs.shape[1]
        C = probs.shape[2]-1
        L = l.shape[0]
        S = 2*L+1
        
        l_blk = C * tensor.ones((S, B), dtype='int32')
        l_blk = tensor.set_subtensor(l_blk[1::2,:], l)
        l_blk = l_blk.T     # now l_blk is B x S

        alpha0 = tensor.concatenate([   tensor.ones((B, 1)),
                                        tensor.zeros((B, S-1))
                                    ], axis=1)
        alpha0 = _log(alpha0)

        l_blk_2 = tensor.concatenate([-tensor.ones((B,2)), l_blk[:,:-2]], axis=1)
        l_case2 = tensor.neq(l_blk, C) * tensor.neq(l_blk, l_blk_2)

        def recursion(p, p_mask, prev_alpha):
            prev_alpha_1 = tensor.concatenate([tensor.zeros((B,1)),prev_alpha[:,:-1]], axis=1)
            prev_alpha_2 = tensor.concatenate([tensor.zeros((B,2)),prev_alpha[:,:-2]], axis=1)

            alpha_bar1 = tensor.set_subtensor(prev_alpha[:,1:], _log_add(prev_alpha[:,1:],prev_alpha[:,:-1]))
            alpha_bar2 = tensor.set_subtensor(alpha_bar1[:,2:], _log_add(alpha_bar1[:,2:],prev_alpha[:,:-2]))

            alpha_bar = tensor.switch(l_case2, alpha_bar2, alpha_bar1)

            probs = _log(p[tensor.arange(B)[:,None].repeat(S,axis=1).flatten(), l_blk.flatten()].reshape((B,S)))
            next_alpha = _log_mul(alpha_bar, probs)
            next_alpha = tensor.switch(p_mask[:,None], next_alpha, prev_alpha)
            
            return next_alpha

        alpha, _ = scan(fn=recursion,
                             sequences=[probs, probs_mask],
                             outputs_info=[alpha0])

        last_alpha = alpha[-1]
        # last_alpha = theano.printing.Print('a-1')(last_alpha)

        prob = _log_add(last_alpha[tensor.arange(B), 2*l_len.astype('int32')-1],
                        last_alpha[tensor.arange(B), 2*l_len.astype('int32')])

        # return the negative log probability of the labellings
        return -prob
コード例 #21
0
def make_report(pars, trainer, data):
    data = h5.File('/nthome/maugust/thesis/train_val_test_crafted_real_int.hdf5','r')
    TX = data['test_set/test_set']
    TZ = data['test_labels/real_test_labels']
    TZ = one_hot(TZ,13)
    current_pars = trainer.model.parameters.data
    trainer.model.parameters.data[...] = trainer.best_pars

    n_wrong = 1 - T.eq(T.argmax(trainer.model.exprs['output'], axis=1),
                               T.argmax(trainer.model.exprs['target'], axis=1)).mean()
    f_n_wrong = trainer.model.function(['inpt', 'target'], n_wrong)

    f_pos = T.mean(T.neq(T.argmax(trainer.model.exprs['output'], axis=1),0) * T.eq(T.argmax(trainer.model.exprs['target'], axis=1), 0))
    f_f_pos = trainer.model.function(['inpt', 'target'], f_pos)

    f_neg = T.mean(T.eq(T.argmax(trainer.model.exprs['output'], axis=1),0) * T.neq(T.argmax(trainer.model.exprs['target'], axis=1), 0))
    f_f_neg = trainer.model.function(['inpt', 'target'], f_neg)


    emp_loss = f_n_wrong(TX,TZ)
    f_p = f_f_pos(TX,TZ)
    f_n = f_f_neg(TX,TZ)

    P_pos = np.argmax(trainer.model.predict(TX),axis=1)
    Z_pos = np.argmax(TZ, axis=1)

    neighbour_fails = .0
    relevant_fails = 0

    for i in np.arange(len(P_pos)):
        if P_pos[i] > 0 and Z_pos[i] > 0 and P_pos[i] != Z_pos[i]:
            relevant_fails += 1
            if is_neighbour(P_pos[i],Z_pos[i]):
                neighbour_fails += 1

    if relevant_fails > 0:
        neighbour_fails /= relevant_fails



    emp_loss_s = 'model achieved %f%% classification error on the test set' %emp_loss
    f_p_s = '\nmodel achieved %f%% false positives on the test set' %f_p
    f_n_s = '\nmodel achieved %f%% false negatives on the test set' %f_n
    neigh_s = '\nmodel achieved %f%% neighbour misspredictions on the test set' %neighbour_fails

    print emp_loss_s
    print f_p_s
    print f_n_s
    print neigh_s
    with open(os.path.join('.','eval_result.txt'),'w') as f:
        f.write(emp_loss_s)
        f.write(f_p_s)
        f.write(f_n_s)
        f.write(neigh_s)
    trainer.model.parameters.data[...] = current_pars

    return {'train_loss': trainer.score(*trainer.eval_data['train']),
            'val_loss': trainer.score(*trainer.eval_data['val']),
            'best_emp_test_loss': emp_loss}
コード例 #22
0
ファイル: CNN.py プロジェクト: RaymondPaley/DRD
 def errors(self, y):
     if y.ndim != self.y_pred.ndim:
         raise TypeError("y should have the same shape as self.y_pred", ("y", y.type, "y_pred", self.y_pred.type))
     if y.dtype.startswith("int"):
         return T.mean(T.neq(self.y_pred, y))
     else:
         print ("!!! y should be of int type")
         return T.mean(T.neq(self.y_pred, numpy.asarray(y, dtype="int")))
コード例 #23
0
ファイル: NetworkOutputLayer.py プロジェクト: atuxhe/returnn
 def errors(self):
   """
   :rtype: theano.Variable
   """
   if self.y_data_flat.type == T.ivector().type:
     return self.norm * T.sum(T.neq(T.argmax(self.y_m[self.i], axis=-1), self.y_data_flat[self.i]))
   else:
     return self.norm * T.sum(T.neq(T.argmax(self.y_m[self.i], axis=-1), T.argmax(self.y_data_flat[self.i], axis=-1)))
    def __init__(self, layer_sizes, n_samples, alpha, learning_rate, v_prior, batch_size, X_train, y_train, N_train, X_val, y_val, N_val):

        self.batch_size = batch_size
        self.N_train = N_train
        self.X_train = X_train
        self.y_train = y_train

        self.N_val = N_val
        self.X_val = X_val
        self.y_val = y_val

        # We create the network

        self.network = network.Network(layer_sizes, n_samples, v_prior, N_train)

        # index to a batch

        index = T.lscalar()  

        # We create the input and output variables. The input will be a minibatch replicated n_samples times

        self.x = T.matrix('x')
        self.y = T.vector('y', dtype = 'int32')

        # The logarithm of the values for the likelihood factors
        
        ll = self.network.log_likelihood_values(self.x, self.y)

        # The energy function for black-box alpha

        self.estimate_marginal_ll = -1.0 * N_train / (self.x.shape[ 0 ] * alpha) * \
            T.sum(LogSumExp(alpha * (ll - self.network.log_f_hat()), 0) + T.log(1.0 / n_samples)) - self.network.log_normalizer_q() + \
            self.network.log_Z_prior()

        # We create a theano function for updating q
        
        self.process_minibatch = theano.function([ index ], self.estimate_marginal_ll, \
            updates = adam(self.estimate_marginal_ll, self.network.params, learning_rate), \
            givens = { self.x: self.X_train[ index * batch_size: (index + 1) * batch_size ], \
            self.y: self.y_train[ index * batch_size: (index + 1) * batch_size ] })

        # We create a theano function for making predictions

        self.error_minibatch_train = theano.function([ index ],
            T.mean(T.neq(T.argmax((LogSumExp(self.network.output(self.x), 0) + T.log(1.0 / n_samples))[ 0, :, : ], axis = 1), self.y)),
            givens = { self.x: self.X_train[ index * batch_size: (index + 1) * batch_size ],
            self.y: self.y_train[ index * batch_size: (index + 1) * batch_size ] })

        self.error_minibatch_val = theano.function([ index ], 
            T.mean(T.neq(T.argmax((LogSumExp(self.network.output(self.x), 0) + T.log(1.0 / n_samples))[ 0, :, : ], axis = 1), self.y)),
            givens = { self.x: self.X_val[ index * batch_size: (index + 1) * batch_size ],
            self.y: self.y_val[ index * batch_size: (index + 1) * batch_size ] })

        self.ll_minibatch_val = theano.function([ index ], T.mean(LogSumExp(ll, 0) + T.log(1.0 / n_samples)), \
            givens = { self.x: self.X_val[ index * batch_size: (index + 1) * batch_size ], \
            self.y: self.y_val[ index * batch_size: (index + 1) * batch_size ] })

        self.network.update_randomness()
コード例 #25
0
ファイル: NetworkOutputLayer.py プロジェクト: chagge/returnn
 def errors(self):
   """
   :rtype: theano.Variable
   """
   self.y_m = self.z.reshape((self.z.shape[0]*self.z.shape[1],self.z.shape[2]))
   if self.y_data_flat.type == T.ivector().type:
     return self.norm * T.sum(T.neq(T.argmax(self.y_m[self.i], axis=-1), self.y_data_flat[self.i]))
   else:
     return self.norm * T.sum(T.neq(T.argmax(self.y_m[self.i], axis=-1), T.argmax(self.y_data_flat[self.i], axis = -1)))
コード例 #26
0
    def get_tagging_channels_from_state(self, state, target):

        # Before using the state and targets, log them with the accumulator
        state = self.outputs_accumulator(state)
        target = self.targets_accumulator(target)

        missingValuesFilter = T.neq(target, -1)

        rval = OrderedDict()
        y_hat = state > 0.5
        y = target > 0.5
        wrong_bit = T.cast(T.neq(y, y_hat), state.dtype) * missingValuesFilter
        rval["mistagging"] = T.cast(wrong_bit.sum() / missingValuesFilter.sum(), state.dtype)

        y = T.cast(y, state.dtype)
        y_hat = T.cast(y_hat, state.dtype)
        tp = (y * y_hat * missingValuesFilter).sum()
        fp = ((1 - y) * y_hat * missingValuesFilter).sum()
        precision = tp / T.maximum(1.0, tp + fp)
        recall = tp / T.maximum(1.0, (y * missingValuesFilter).sum())
        rval["precision"] = precision
        rval["recall"] = recall
        rval["f1"] = 2.0 * precision * recall / T.maximum(1, precision + recall)

        tp = (y * y_hat * missingValuesFilter).sum(axis=0)
        fp = ((1 - y) * y_hat * missingValuesFilter).sum(axis=0)
        precision = tp / T.maximum(1.0, tp + fp)

        rval["per_output_precision.max"] = precision.max()
        rval["per_output_precision.mean"] = precision.mean()
        rval["per_output_precision.min"] = precision.min()

        recall = tp / T.maximum(1.0, (y * missingValuesFilter).sum(axis=0))

        rval["per_output_recall.max"] = recall.max()
        rval["per_output_recall.mean"] = recall.mean()
        rval["per_output_recall.min"] = recall.min()

        f1 = 2.0 * precision * recall / T.maximum(1, precision + recall)

        rval["per_output_f1.max"] = f1.max()
        rval["per_output_f1.mean"] = f1.mean()
        rval["per_output_f1.min"] = f1.min()

        # Define dummy channels with dummy values that will eventually receive
        # meanAvgPrec values from the TrainExtension that computes it
        for dummy in self.dummy_channels:
            rval[dummy] = f1.max()  # Use f1.max() because it's already been
            # computed so it costs nothing

        # Add computation of the mean average recision
        # from pylearn2_ICML2014 import meanAvgPrec
        # (rval['min_avg_prec'],
        # rval['mean_avg_prec'],
        # rval['max_avg_prec']) = meanAvgPrec.meanAveragePrecisionTheano(target, state)

        return rval
コード例 #27
0
ファイル: classification.py プロジェクト: fdoperezi/santander
def jaccard_similarity(y_true, y_predicted):
    """
    y_true: tensor ({1, 0})
    y_predicted: tensor ({1, 0})
    note - we round predicted because float probabilities would not work
    """
    y_predicted = T.round(y_predicted).astype(theano.config.floatX)
    either_nonzero = T.or_(T.neq(y_true, 0), T.neq(y_predicted, 0))
    return T.and_(T.neq(y_true, y_predicted), either_nonzero).sum(axis=-1, dtype=theano.config.floatX) / either_nonzero.sum(axis=-1, dtype=theano.config.floatX)
コード例 #28
0
def masked_categorical_accuracy(y_true, y_pred, mask):

    y_true = K.argmax(y_true, axis=-1)
    y_pred = K.argmax(y_pred, axis=-1)

    error = K.equal(y_true, y_pred)

    mask_template = T.and_(T.neq(y_true,  mask), T.neq(y_true, 0)).nonzero()

    return K.mean(error[mask_template])
コード例 #29
0
ファイル: pdnnet.py プロジェクト: petered/pdnn-test
def past_weight_grad_step(xs, es, kp_x, kd_x, kp_e, kd_e, shape, dws=None):
    """
    Do an efficient update of the weights given the two spike-update.

    (This still runs FING SLOWLY!)

    :param xs: An (n_in) vector
    :param es: An (n_out) vector
    :param kp_x:
    :param kd_x:
    :param kp_e:
    :param kd_e:
    :param shapes: (n_in, n_out)
    :return:
    """
    kp_x, kd_x, kp_e, kd_e = [as_floatx(k) for k in (kp_x, kd_x, kp_e, kd_e)]
    n_in, n_out = shape
    rx = kd_x/(kp_x+kd_x)
    re = kd_e/(kp_e+kd_e)

    tx_last = create_shared_variable(np.zeros(n_in)+1)
    te_last = create_shared_variable(np.zeros(n_out)+1)
    x_last = create_shared_variable(np.zeros(n_in))
    e_last = create_shared_variable(np.zeros(n_out))
    x_spikes = tt.neq(xs, 0)
    e_spikes = tt.neq(es, 0)
    x_spike_ixs, = tt.nonzero(x_spikes)
    e_spike_ixs, = tt.nonzero(e_spikes)

    if dws is None:
        dws = tt.zeros(shape)

    t_last = tt.minimum(tx_last[x_spike_ixs, None], te_last)  # (n_x_spikes, n_out)
    dws = tt.inc_subtensor(dws[x_spike_ixs, :], x_last[x_spike_ixs, None]*e_last
        * rx**(tx_last[x_spike_ixs, None]-t_last)
        * re**(te_last[None, :]-t_last)
        * geoseries_sum(re*rx, t_end=t_last, t_start=1)
        )

    new_x_last = tt.set_subtensor(x_last[x_spike_ixs], x_last[x_spike_ixs]*rx**tx_last[x_spike_ixs]+ xs[x_spike_ixs]/as_floatx(kd_x))
    new_tx_last = tt.switch(x_spikes, 0, tx_last)

    t_last = tt.minimum(new_tx_last[:, None], te_last[e_spike_ixs])  # (n_in, n_e_spikes)
    dws = tt.inc_subtensor(dws[:, e_spike_ixs], new_x_last[:, None]*e_last[e_spike_ixs]
        * rx**(new_tx_last[:, None]-t_last)
        * re**(te_last[None, e_spike_ixs]-t_last)
        * geoseries_sum(re*rx, t_end=t_last, t_start=1)
        )

    add_update(x_last, new_x_last)
    add_update(e_last, tt.set_subtensor(e_last[e_spike_ixs], e_last[e_spike_ixs]*re**te_last[e_spike_ixs]+ es[e_spike_ixs]/as_floatx(kd_e)))
    add_update(tx_last, new_tx_last+1)
    add_update(te_last, tt.switch(e_spikes, 1, te_last+1))
    return dws
コード例 #30
0
ファイル: logicnn_classes.py プロジェクト: arfu2016/logicnn
 def errors(self, y):
     # check if y has same dimension of y_pred
     if y.ndim != self.q_y_pred.ndim:
         raise TypeError('y should have the same shape as self.y_pred',
             ('y', target.type, 'y_pred', self.q_y_pred.type))
     # check if y is of the correct datatype
     if y.dtype.startswith('int'):
         # the T.neq operator returns a vector of 0s and 1s, where 1
         # represents a mistake in prediction
         return T.mean(T.neq(self.q_y_pred, y)), T.mean(T.neq(self.p_y_pred, y))
     else:
         raise NotImplementedError()
コード例 #31
0
    def __init__(self, state, data):
        self.rng = numpy.random.RandomState(state['seed'])
        self.srng = RandomStreams(self.rng.randint(1e5))
        self.data = data
        self.nin = int(data.xdim)
        self.state = state
        self.nout = int(data.ydim)

        #######################
        # 0. Training functions
        #######################
        self.x = TT.matrix('X')
        self.y = TT.ivector('y')
        self.layer0 = HiddenLayerStandard(self.rng,
                                          self.x,
                                          self.nin,
                                          eval(str(state['nhid'])),
                                          name='layer0')

        self.layer1 = SoftmaxLayerStandard(self.rng, self.layer0.output,
                                           eval(str(state['nhid'])), self.nout)

        self.params = []
        self.params += self.layer0.params
        self.params += self.layer1.params
        self.best_params = [(x.name, x.get_value()) for x in self.params]
        self.params_shape = [
            x.get_value(borrow=True).shape for x in self.params
        ]
        ##### PARAMS
        self.inputs = [self.x, self.y]
        inds = TT.constant(numpy.asarray(range(state['cbs']), dtype='int32'))
        cost = -TT.log(self.layer1.output)[inds, self.y]
        self.train_cost = TT.mean(cost)
        if state['matrix'] == 'KL':
            self.Gvs = lambda *args:\
                    TT.Lop(self.layer1.output,
                           self.params,
                           TT.Rop(self.layer1.output,
                                  self.params,
                                  args) / (self.layer1.output * state['mbs']))
        elif state['matrix'] == 'cov':
            self.Gvs = lambda *args:\
                    TT.Lop(cost,
                           self.params,
                           TT.Rop(cost,
                                  self.params,
                                  args) / (numpy.float32(state['mbs'])))

        pred = TT.argmax(self.layer1.output, axis=1)
        self.error = TT.mean(TT.neq(pred, self.y)) * 100.

        #########################
        # 1. Validation functions
        #########################
        givens = {}
        givens[self.x] = self.data._valid_x
        givens[self.y] = self.data._valid_y
        print("IMPS", [type(x) for x in givens])
        print("IMPS", [x.shape for x in givens])
        self.valid_eval_func = theano.function([],
                                               self.error,
                                               givens=givens,
                                               name='valid_eval_fn',
                                               profile=0)

        givens[self.x] = self.data._test_x
        givens[self.y] = self.data._test_y
        self.test_eval_func = theano.function([],
                                              self.error,
                                              givens=givens,
                                              name='test_fn',
                                              profile=0)
コード例 #32
0
def build_network():
    """Build network.

    Returns
    -------

    """

    import theano.tensor as t
    from collections import OrderedDict

    # alpha is the exponential moving average factor
    alpha = .1
    print("alpha = " + str(alpha))
    epsilon = 1e-4
    print("epsilon = " + str(epsilon))

    # BinaryConnect
    binary = True
    print("binary = " + str(binary))
    stochastic = True
    print("stochastic = " + str(stochastic))
    # (-h,+h) are the two binary values
    # h = "Glorot"
    h = 1.
    print("h = " + str(h))
    # w_lr_scale = 1.
    # "Glorot" means we are using the coefficients from Glorot's paper
    w_lr_scale = "Glorot"
    print("w_lr_scale = " + str(w_lr_scale))

    # Prepare Theano variables for inputs and targets
    input_var = t.tensor4('inputs')
    target = t.matrix('targets')
    lr = t.scalar('lr', dtype=theano.config.floatX)

    cnn = lasagne.layers.InputLayer(shape=(None, 3, 32, 32),
                                    input_var=input_var)

    # 128C3-128C3-P2
    cnn = binary_connect.Conv2DLayer(
        cnn,
        binary=binary,
        stochastic=stochastic,
        H=h,
        W_LR_scale=w_lr_scale,
        num_filters=128,
        filter_size=(3, 3),
        pad=1,
        nonlinearity=lasagne.nonlinearities.identity)

    cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)

    cnn = lasagne.layers.NonlinearityLayer(
        cnn, nonlinearity=lasagne.nonlinearities.rectify)

    cnn = binary_connect.Conv2DLayer(
        cnn,
        binary=binary,
        stochastic=stochastic,
        H=h,
        W_LR_scale=w_lr_scale,
        num_filters=128,
        filter_size=(3, 3),
        pad=1,
        nonlinearity=lasagne.nonlinearities.identity)

    cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2))

    cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)

    cnn = lasagne.layers.NonlinearityLayer(
        cnn, nonlinearity=lasagne.nonlinearities.rectify)

    # 256C3-256C3-P2
    cnn = binary_connect.Conv2DLayer(
        cnn,
        binary=binary,
        stochastic=stochastic,
        H=h,
        W_LR_scale=w_lr_scale,
        num_filters=256,
        filter_size=(3, 3),
        pad=1,
        nonlinearity=lasagne.nonlinearities.identity)

    cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)

    cnn = lasagne.layers.NonlinearityLayer(
        cnn, nonlinearity=lasagne.nonlinearities.rectify)

    cnn = binary_connect.Conv2DLayer(
        cnn,
        binary=binary,
        stochastic=stochastic,
        H=h,
        W_LR_scale=w_lr_scale,
        num_filters=256,
        filter_size=(3, 3),
        pad=1,
        nonlinearity=lasagne.nonlinearities.identity)

    cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2))

    cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)

    cnn = lasagne.layers.NonlinearityLayer(
        cnn, nonlinearity=lasagne.nonlinearities.rectify)

    # 512C3-512C3-P2
    cnn = binary_connect.Conv2DLayer(
        cnn,
        binary=binary,
        stochastic=stochastic,
        H=h,
        W_LR_scale=w_lr_scale,
        num_filters=512,
        filter_size=(3, 3),
        pad=1,
        nonlinearity=lasagne.nonlinearities.identity)

    cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)

    cnn = lasagne.layers.NonlinearityLayer(
        cnn, nonlinearity=lasagne.nonlinearities.rectify)

    cnn = binary_connect.Conv2DLayer(
        cnn,
        binary=binary,
        stochastic=stochastic,
        H=h,
        W_LR_scale=w_lr_scale,
        num_filters=512,
        filter_size=(3, 3),
        pad=1,
        nonlinearity=lasagne.nonlinearities.identity)

    cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2))

    cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)

    cnn = lasagne.layers.NonlinearityLayer(
        cnn, nonlinearity=lasagne.nonlinearities.rectify)

    # 1024FP-1024FP-10FP
    cnn = binary_connect.DenseLayer(
        cnn,
        binary=binary,
        stochastic=stochastic,
        H=h,
        W_LR_scale=w_lr_scale,
        nonlinearity=lasagne.nonlinearities.identity,
        num_units=1024)

    cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)

    cnn = lasagne.layers.NonlinearityLayer(
        cnn, nonlinearity=lasagne.nonlinearities.rectify)

    cnn = binary_connect.DenseLayer(
        cnn,
        binary=binary,
        stochastic=stochastic,
        H=h,
        W_LR_scale=w_lr_scale,
        nonlinearity=lasagne.nonlinearities.identity,
        num_units=1024)

    cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)

    cnn = lasagne.layers.NonlinearityLayer(
        cnn, nonlinearity=lasagne.nonlinearities.rectify)

    cnn = binary_connect.DenseLayer(
        cnn,
        binary=binary,
        stochastic=stochastic,
        H=h,
        W_LR_scale=w_lr_scale,
        nonlinearity=lasagne.nonlinearities.identity,
        num_units=10)

    cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)

    cnn = lasagne.layers.NonlinearityLayer(
        cnn, nonlinearity=lasagne.nonlinearities.identity)

    train_output = lasagne.layers.get_output(cnn, deterministic=False)

    # squared hinge loss
    loss = t.mean(t.sqr(t.maximum(0., 1. - target * train_output)))

    if binary:
        from itertools import chain
        # w updates
        w = lasagne.layers.get_all_params(cnn, binary=True)
        w_grads = binary_connect.compute_grads(loss, cnn)
        updates = lasagne.updates.adam(loss_or_grads=w_grads,
                                       params=w,
                                       learning_rate=lr)
        updates = binary_connect.clipping_scaling(updates, cnn)

        # other parameters updates
        params = lasagne.layers.get_all_params(cnn,
                                               trainable=True,
                                               binary=False)
        updates = OrderedDict(
            chain(
                updates.items(),
                lasagne.updates.adam(loss_or_grads=loss,
                                     params=params,
                                     learning_rate=lr).items()))

    else:
        params = lasagne.layers.get_all_params(cnn, trainable=True)
        updates = lasagne.updates.adam(loss_or_grads=loss,
                                       params=params,
                                       learning_rate=lr)

    test_output = lasagne.layers.get_output(cnn, deterministic=True)
    test_loss = t.mean(t.sqr(t.maximum(0., 1. - target * test_output)))
    test_err = t.mean(t.neq(t.argmax(test_output, axis=1),
                            t.argmax(target, axis=1)),
                      dtype=theano.config.floatX)

    # Compile a function performing a training step on a mini-batch
    # (by giving the updates dictionary)
    # and returning the corresponding training loss:
    train_fn = theano.function([input_var, target, lr],
                               loss,
                               updates=updates,
                               on_unused_input='ignore')

    # Compile a second function computing the validation loss and accuracy:
    val_fn = theano.function([input_var, target], [test_loss, test_err])

    return cnn, train_fn, val_fn
コード例 #33
0
# !/usr/bin/env python3
#########################
# BUILD FINE-TUNE MODEL #
#########################

print "\n\n... building fine-tune model -- contraction 1"
for imodel in model.models_stack:
    imodel.threshold = 0.
model_ft = model + LogisticRegression(
    hid_layer_sizes[-1], 10, npy_rng=npy_rng
)
model_ft.print_layer()

train_set_error_rate = theano.function(
    [],
    T.mean(T.neq(model_ft.models_stack[-1].predict(), train_y)),
    givens = {model_ft.varin : train_x},
)
test_set_error_rate = theano.function(
    [],
    T.mean(T.neq(model_ft.models_stack[-1].predict(), test_y)),
    givens = {model_ft.varin : test_x},
)
print "Done."

print "... training with conjugate gradient: minimize.py"
fun_cost = theano.function(
    [model_ft.varin, model_ft.models_stack[-1].vartruth],
    model_ft.models_stack[-1].cost() + model_ft.models_stack[-1].weightdecay(weightdecay)
)
def return_cost(test_params, input_x, truth_y):
コード例 #35
0
    def build_decoder(self,
                      hs,
                      x,
                      xmask=None,
                      y=None,
                      y_neg=None,
                      mode=EVALUATION,
                      prev_hd=None,
                      step_num=None):
        # Check parameter consistency
        if mode == Decoder.EVALUATION:
            assert not prev_hd
            assert y
        else:
            assert not y
            assert prev_hd

        # if mode == EVALUATION
        #   xd = (timesteps, batch_size, qdim)
        #
        # if mode != EVALUATION
        #   xd = (n_samples, dim)
        xd = self.approx_embedder(x)
        if not xmask:
            xmask = T.neq(x, self.eoq_sym)

        # we must zero out the </s> embedding
        # i.e. the embedding x_{-1} is the 0 vector
        # as well as hd_{-1} which will be reseted in the scan functions
        if xd.ndim != 3:
            assert mode != Decoder.EVALUATION  # So only in beam search
            xd = (xd.dimshuffle((1, 0)) * xmask).dimshuffle((1, 0))
        else:
            assert mode == Decoder.EVALUATION  # So only in beam search
            xd = (xd.dimshuffle((2, 0, 1)) * xmask).dimshuffle((1, 2, 0))

        # Run the decoder
        if mode == Decoder.EVALUATION:
            hd_init = T.alloc(np.float32(0), x.shape[1], self.qdim)
        else:
            hd_init = prev_hd

        if self.query_step_type == "gated":
            f_dec = self.gated_step
            o_dec_info = [hd_init, None, None, None]
        else:
            f_dec = self.plain_step
            o_dec_info = [hd_init]

        # If the mode of the decoder is EVALUATION
        # then we evaluate by default all the sentence
        # xd - i.e. xd.ndim == 3, xd = (timesteps, batch_size, qdim)
        if mode == Decoder.EVALUATION:
            _res, _ = theano.scan(f_dec,
                              sequences=[xd, xmask, hs],\
                              outputs_info=o_dec_info)
        # else we evaluate only one step of the recurrence using the
        # previous hidden states and the previous computed hierarchical
        # states.
        else:
            _res = f_dec(xd, xmask, hs, prev_hd)

        if isinstance(_res, list) or isinstance(_res, tuple):
            hd = _res[0]
        else:
            hd = _res

        pre_activ = self.build_output_layer(hs, xd, hd)

        # EVALUATION  : Return target_probs + all the predicted ranks
        # target_probs.ndim == 3
        if mode == Decoder.EVALUATION:
            target_probs = GrabProbs(self.output_softmax(pre_activ), y)
            return target_probs, hd, _res
        # BEAM_SEARCH : Return output (the softmax layer) + the new hidden states
        elif mode == Decoder.BEAM_SEARCH:
            return self.output_softmax(pre_activ), hd
コード例 #36
0
ファイル: cifar10_3r.py プロジェクト: vd114/galatea
                print 'no ndm'
            print min_informative_str(ipt)
    if found > 0:
        print type(node.op), found
        try:
            print '\t', type(node.op.scalar_op)
        except:
            pass

print count

test = CIFAR10(which_set='test', one_hot=True, gcn=55.)

yl = T.argmax(yb, axis=1)

mf1acc = 1. - T.neq(yl, T.argmax(ymf1, axis=1)).mean()
#mfnacc = 1.-T.neq(yl , T.argmax(mfny,axis=1)).mean()

batch_acc = function([Xb, yb], [mf1acc])


def accs():
    mf1_accs = []
    for i in xrange(10000 / batch_size):
        mf1_accs.append(
            batch_acc(
                test.get_topological_view(test.X[i * batch_size:(i + 1) *
                                                 batch_size, :]),
                test.y[i * batch_size:(i + 1) * batch_size, :])[0])
    return sum(mf1_accs) / float(len(mf1_accs))
コード例 #37
0
def fn(cycles, rng, sm, cum_dmg, sn_0, sn_c, sn_cutoff, fat, n_fat, n_c, m_1,
       m_2, fat_fact, n_0, m_0, n_cutoff, r_y, r_m,
       m_s_th):  # y is previous result
    """
        input function to the loop over all bins
    """
    cum_damage = np.float64(0)
    log10_sn_1 = (tt.log10(fat * fat_fact) +
                  (tt.log10(n_fat) - tt.log10(n_0)) / m_1).astype("float64")
    sn_1 = 10**log10_sn_1
    sn_0 = (10**(log10_sn_1 + tt.log10(n_0) / m_0)).astype("float64")
    sn_c = (10**(tt.log10(fat * fat_fact) -
                 (tt.log10(n_c) - tt.log10(n_fat)) / m_1)).astype("float64")
    sn_cutoff = (
        10**(tt.log10(sn_c) -
             (tt.log10(n_cutoff) - tt.log10(n_c)) / m_2)).astype("float64")
    life = 0
    log10_life = 0
    dmg_per_bin = 0
    s_factor_life_per_bin = 0
    s_factor_stress_per_bin = 0
    s_nb = ifelse(
        tt.neq(cycles, 0),
        ifelse(
            tt.le(cycles, n_0),
            (10**(log10_sn_1 + (tt.log10(n_0) - tt.log10(cycles)) / m_0)),
            ifelse(
                tt.le(cycles,
                      n_c), (10**(tt.log10(sn_c) +
                                  (tt.log10(n_c) - tt.log10(cycles)) / m_1)),
                10**(tt.log10(sn_c) -
                     (tt.log10(cycles) - tt.log10(n_c)) / m_2))), 0 * cycles)

    rng = ifelse(
        tt.neq(m_s_th, 0) & tt.neq(sm, 0),
        ifelse(tt.neq(sm, 0),
               apply_mean_stress_theory(m_s_th, sm, rng, sn_0, r_m, r_y), rng),
        rng)  # double check if 0 = False
    log10_life, life, dmg_per_bin, s_factor_life_per_bin = ifelse(
        tt.lt(sn_0, rng),
        [np.float64(-1),
         np.float64(0),
         np.float64(100),
         np.float64(0)],
        ifelse(
            tt.lt(sn_1, rng),
            [(tt.log10(n_0) - m_0 * (tt.log10(rng) - log10_sn_1)),
             (10**(tt.log10(n_0) - m_0 * (tt.log10(rng) - log10_sn_1))),
             np.float64(0),
             np.float64(0)],
            ifelse(
                tt.lt(sn_c, rng),
                [(tt.log10(n_c) - m_1 * (tt.log10(rng) - tt.log10(sn_c))),
                 (10**(tt.log10(n_c) - m_1 *
                       (tt.log10(rng) - tt.log10(sn_c)))),
                 np.float64(0),
                 np.float64(0)],
                ifelse(tt.lt(0, rng), [(tt.min([
                    tt.log10(n_cutoff),
                    tt.log10(n_c) + m_2 * (tt.log10(sn_c) - tt.log10(rng))
                ])),
                                       (10**tt.min([
                                           tt.log10(n_cutoff),
                                           tt.log10(n_c) + m_2 *
                                           (tt.log10(sn_c) - tt.log10(rng))
                                       ])).astype("float64"),
                                       np.float64(0),
                                       np.float64(0)],
                       [
                           tt.log10(n_cutoff).astype("float64"),
                           n_cutoff.astype("float64"),
                           np.float64(0),
                           np.float64(0)
                       ]))))

    dmg_per_bin = ifelse(tt.lt(0, life), cycles / life,
                         0 * life)  #_rst_dic_per_bin['life'] > 0:
    s_factor_life_per_bin = ifelse(tt.neq(dmg_per_bin, 0.), 1 / dmg_per_bin,
                                   np.float64(1))
    s_factor_stress_per_bin = tt.min([100.0, s_nb / tt.max([1, rng])])

    return dmg_per_bin, sn_0, sn_c, sn_cutoff
コード例 #38
0
ファイル: RBM.py プロジェクト: mwong009/bigdataTO
    def errors(self, visibles, validate_terms):

        output_error = []
        self.output_prediction = []
        output_targets = {}

        for j, (W, v) in enumerate(zip(self.W_params, visibles)):
            if W.name in validate_terms:
                output_targets[W.name] = v
                visibles[j] = shared(np.zeros(v.shape.eval(),
                                              dtype=theano.config.floatX),
                                     name=W.name,
                                     borrow=True)

        gibbs_output = self.gibbs_vhv(visibles)
        gibbs_output = gibbs_output[-len(visibles):]

        for i, (W, v,
                s) in enumerate(zip(self.W_params, visibles, gibbs_output)):
            if W.name in validate_terms:
                visibles[i] = s

        for valid_term in validate_terms:

            energy, valid_feature = self.conditional_energy(
                visibles, validate_terms, valid_term)

            if valid_feature['type'] == 'category':
                # for categorical features (n, feature, category)
                probabilities = T.nnet.softmax(energy)
                y = T.argmax(output_targets[valid_term], axis=-1).flatten()
                p = T.argmax(probabilities, axis=-1)
                error = T.mean(T.neq(y, p))  # accuracy

                self.output_prediction.extend([y])
                self.output_prediction.extend([p])

            elif valid_feature['type'] == 'scale':
                # for scale features (n, feature)
                norm = self.norms[valid_feature['name']]
                y = output_targets[valid_term].flatten() * norm
                y_out = T.nnet.softplus(energy).flatten() * norm
                error = T.sqrt(T.mean(T.sqr(y_out - y)))  # RMSE error

                self.output_prediction.extend([y])
                self.output_prediction.extend([y_out])

            elif valid_feature['type'] == 'binary':
                # for binary features (n, feature)
                y = output_targets[valid_term].flatten()
                prob = T.nnet.sigmoid(energy)
                p = T.ceil(prob * 3) - 2.
                error = T.mean(T.neq(p, y))  # accuracy

                self.output_prediction.extend([y])
                self.output_prediction.extend([p])

            else:
                raise NotImplementedError()

            output_error.append(error)

        return output_error
コード例 #39
0
    def __init__(self, options, channel, data):
        self.rng = numpy.random.RandomState(options['seed'])
        self.srng = RandomStreams(self.rng.randint(1e5))
        self.nin = data['train_x'].shape[1]
        self.options = options
        if isinstance(options['hids'], list):
            self.hids = options['hids']
        else:
            self.hids = eval(str(options['hids']))
        self.nout = numpy.int32(numpy.max(data['train_y']) + 1)

        def gen_mat(nin, nout, name):
            # NOTE : assumes sigmoid
            self.rng = numpy.random.RandomState(123)
            if options['init'] == 'small':
                lim = numpy.sqrt(1. / nin)
                vals = self.rng.uniform(size=(nin, nout), low=-lim,
                                        high=lim).astype('float32')
            else:
                lim = numpy.sqrt(6. / (nin + nout))
                print 'Lim used to generate random numbers', lim
                vals = self.rng.uniform(size=(nin, nout), low=-lim,
                                        high=lim).astype('float32') * 4.
                try:
                    print 'Rank (',nin, ',', nout, '):', \
                            numpy.linalg.matrix_rank(vals)
                except:
                    pass
            var = theano.shared(vals, name=name)
            print_mem(name)
            return var

        def gen_vec(n, name):
            self.rng = numpy.random.RandomState(123)
            vals = self.rng.uniform(size=(n, ), low=-.0005,
                                    high=.0005).astype('float32') * 0.
            var = theano.shared(vals, name=name)
            print_mem(name)
            return var

        ##### PARAMS
        all_hids = [self.nin] + self.hids + [self.nout]
        activs = [TT.nnet.sigmoid] * len(self.hids) + [softmax]
        #activs = [TT.tanh] * len(self.hids) + [softmax]

        self.params = []
        self.cpu_params = []
        self.params_shape = []
        for idx, (in_dim, out_dim) in\
                    enumerate(zip(all_hids[:-1], all_hids[1:])):
            gpu_W = gen_mat(in_dim, out_dim, name='W%d' % idx)
            gpu_b = gen_vec(out_dim, name='b%d' % idx)
            self.params += [gpu_W, gpu_b]
            self.params_shape.append((in_dim, out_dim))
            self.params_shape.append((out_dim, ))

        self.x = TT.matrix('X')
        self.y = TT.ivector('y')
        self.inputs = [self.x, self.y]
        hid = self.x
        for idx, activ in zip(range(len(self.params) // 2), activs):
            W = self.params[idx * 2]
            b = self.params[idx * 2 + 1]
            preactiv = TT.dot(hid, W) + b
            hid = activ(preactiv)
        self.preactiv_out = preactiv
        batch_train_cost = -TT.log(hid)[
            TT.constant(numpy.asarray(range(options['cbs'])).astype('int32')),
            self.y]

        if options['type'] == 'gradCov':
            self.outs = [batch_train_cost]
            self.outs_operator = ['linear']
        elif options['type'] == 'leroux':
            self.outs = [batch_train_cost - batch_train_cost.mean()]
            self.outs_operator = ['linear']
        else:
            self.outs = [hid]
            self.outs_operator = ['softmax']

        self.gf_outs = [hid]
        self.gf_outs_operator = ['softmax']
        self.gc_outs = [batch_train_cost]
        self.gc_outs_operator = ['linear']

        self.train_cost = TT.mean(batch_train_cost)
        pred = TT.argmax(hid, axis=1)
        self.err = TT.mean(TT.neq(pred, self.y))
        self.valid_xdata = theano.shared(data['valid_x'],
                                         name='valid_xdata',
                                         borrow=True)
        self.test_xdata = theano.shared(data['test_x'],
                                        name='test_xdata',
                                        borrow=True)
        mode = gpu_mode
        self.valid_ydata = TT.cast(
            theano.shared(data['valid_y'], name='valid_ydata', borrow=True),
            'int32')
        self.test_ydata = TT.cast(
            theano.shared(data['test_y'], name='test_xdata', borrow=True),
            'int32')

        givens = {}
        givens[self.x] = self.valid_xdata
        givens[self.y] = self.valid_ydata

        self.valid_eval_func = theano.function([],
                                               self.err,
                                               givens=givens,
                                               name='valid_eval_fn',
                                               profile=options['profile'],
                                               mode=mode)

        givens[self.x] = self.test_xdata
        givens[self.y] = self.test_ydata
        self.test_eval_func = theano.function([],
                                              self.err,
                                              givens=givens,
                                              name='test_fn',
                                              profile=options['profile'],
                                              mode=mode)
コード例 #40
0
ファイル: mnist.py プロジェクト: laurentiuduca/star-bnn
        W = lasagne.layers.get_all_params(mlp, binary=True)
        W_grads = binary_net.compute_grads(loss,mlp)
        updates = lasagne.updates.adam(loss_or_grads=W_grads, params=W, learning_rate=LR)
        updates = binary_net.clipping_scaling(updates,mlp)
        
        # other parameters updates
        params = lasagne.layers.get_all_params(mlp, trainable=True, binary=False)
        updates = OrderedDict(updates.items() + lasagne.updates.adam(loss_or_grads=loss, params=params, learning_rate=LR).items())
        
    else:
        params = lasagne.layers.get_all_params(mlp, trainable=True)
        updates = lasagne.updates.adam(loss_or_grads=loss, params=params, learning_rate=LR)

    test_output = lasagne.layers.get_output(mlp, deterministic=True)
    test_loss = T.mean(T.sqr(T.maximum(0.,1.-target*test_output)))
    test_err = T.mean(T.neq(T.argmax(test_output, axis=1), T.argmax(target, axis=1)),dtype=theano.config.floatX)
    
    # Compile a function performing a training step on a mini-batch (by giving the updates dictionary) 
    # and returning the corresponding training loss:
    train_fn = theano.function([input, target, LR], loss, updates=updates)

    # Compile a second function computing the validation loss and accuracy:
    val_fn = theano.function([input, target], [test_loss, test_err])

    print('Training...')
    
    binary_net.train(
            train_fn,val_fn,
            mlp,
            batch_size,
            LR_start,LR_decay,
コード例 #41
0
    def __init__(self, rng, input, is_train, n_in, n_hidden, n_out, p=0.5, dropout=False, input_p=0.1): #, batch_size=20):

        #Need input dropout layer
        if input_p!=None:
            self.input_layer = drop(input, rng=rng, p=input_p)
            self.input_layer = T.switch(T.neq(is_train, 0), self.input_layer, input)
        else:
            self.input_layer=input

        param_to_scale = [] #To scale weights to square length of 15

        self.layer_0 = HiddenLayer(
            rng=rng,
            input=self.input_layer,
            n_in=n_in,
            n_out=n_hidden[0],
            activation=prelu,
            is_train=is_train,
            p=p,
            dropout=dropout
        )

        self.params = self.layer_0.params
        param_to_scale = param_to_scale + [self.layer_0.params[0]]

        #Add more layers accordingly
        layer_number = 1
        if len(n_hidden)>1:

            for layer in n_hidden[1:]:

                current_hidden_layer = HiddenLayer(
                                                    rng=rng,
                                                    input=getattr(self, "layer_" + str(layer_number-1)).output,
                                                    n_in=n_hidden[layer_number-1],
                                                    n_out=n_hidden[layer_number],
                                                    activation=prelu,
                                                    is_train=is_train,
                                                    p=p,
                                                    dropout=dropout
                                                )

                setattr(self, "layer_" + str(layer_number), current_hidden_layer)

                self.params = self.params + getattr(self, "layer_" + str(layer_number)).params

                param_to_scale = param_to_scale + [getattr(self, "layer_" + str(layer_number)).params[0]]

                layer_number = layer_number + 1


        # The logistic regression layer gets as input the hidden units
        # of the hidden layer

        self.linearRegressionLayer = LinearRegression(
            input=getattr(self, "layer_" + str(layer_number-1)).output,
            n_in=n_hidden[layer_number-1],
            n_out=n_out,
            rng=rng #,batch_size=batch_size
        )
        self.params = self.params + self.linearRegressionLayer.params

        #L1 and L2 regularization
        self.L1 = (
            abs(self.layer_0.W).sum() + abs(self.linearRegressionLayer.W).sum()
        )

        self.L2_sqr = (
            (self.layer_0.W ** 2).sum() + (self.linearRegressionLayer.W ** 2).sum()
        )
        #

        # self.negative_log_likelihood = (
        #     self.logRegressionLayer.negative_log_likelihood
        # )
        #
        # self.errors = self.logRegressionLayer.errors
        # self.pred = self.logRegressionLayer.pred
        # self.diff = self.logRegressionLayer.diff
        self.param_to_scale = param_to_scale
        self.errors = self.linearRegressionLayer.errors
        self.loss = self.linearRegressionLayer.loss
        self.NRMSE = self.linearRegressionLayer.NRMSE
        self.pred = self.linearRegressionLayer.pred

        self.input = input #KEEP IN MIND THIS IS DIFFERENT THAN self.input_layer!!!
コード例 #42
0
    def ready(self):
        global total_encode_time 
        #say("in encoder ready: \n")
        #start_encode_time = time.time()
        generator = self.generator
        embedding_layer = self.embedding_layer
        args = self.args
        padding_id = embedding_layer.vocab_map["<padding>"]

        dropout = generator.dropout

        # len*batch
        x = generator.x
        z = generator.z_pred
        z = z.dimshuffle((0,1,"x"))

        # batch*nclasses
        y = self.y = T.fmatrix()

        n_d = args.hidden_dimension
        n_e = embedding_layer.n_d
        activation = get_activation_by_name(args.activation)

        layers = self.layers = [ ]
        depth = args.depth
        layer_type = args.layer.lower()
        for i in xrange(depth):
            if layer_type == "rcnn":
                l = ExtRCNN(
                        n_in = n_e if i == 0 else n_d,
                        n_out = n_d,
                        activation = activation,
                        order = args.order
                    )
            elif layer_type == "lstm":
                l = ExtLSTM(
                        n_in = n_e if i == 0 else n_d,
                        n_out = n_d,
                        activation = activation
                    )
            layers.append(l)

        # len * batch * 1
        masks = T.cast(T.neq(x, padding_id).dimshuffle((0,1,"x")) * z, theano.config.floatX)
        # batch * 1
        cnt_non_padding = T.sum(masks, axis=0) + 1e-8

        # len*batch*n_e
        embs = generator.word_embs

        pooling = args.pooling
        lst_states = [ ]
        h_prev = embs
        for l in layers:
            # len*batch*n_d
            h_next = l.forward_all(h_prev, z)
            if pooling:
                # batch * n_d
                masked_sum = T.sum(h_next * masks, axis=0)
                lst_states.append(masked_sum/cnt_non_padding) # mean pooling
            else:
                lst_states.append(h_next[-1]) # last state
            h_prev = apply_dropout(h_next, dropout)

        if args.use_all:
            size = depth * n_d
            # batch * size (i.e. n_d*depth)
            h_final = T.concatenate(lst_states, axis=1)
        else:
            size = n_d
            h_final = lst_states[-1]
        h_final = apply_dropout(h_final, dropout)

        output_layer = self.output_layer = Layer(
                n_in = size,
                n_out = self.nclasses,
                activation = sigmoid
            )

        # batch * nclasses
        preds = self.preds = output_layer.forward(h_final)

        # batch
        loss_mat = self.loss_mat = (preds-y)**2

        pred_diff = self.pred_diff = T.mean(T.max(preds, axis=1) - T.min(preds, axis=1))

        if args.aspect < 0:
            loss_vec = T.mean(loss_mat, axis=1)
        else:
            assert args.aspect < self.nclasses
            loss_vec = loss_mat[:,args.aspect]
        self.loss_vec = loss_vec

        zsum = generator.zsum
        zdiff = generator.zdiff
        logpz = generator.logpz

        coherent_factor = args.sparsity * args.coherent
        loss = self.loss = T.mean(loss_vec)
        sparsity_cost = self.sparsity_cost = T.mean(zsum) * args.sparsity + \
                                             T.mean(zdiff) * coherent_factor
        cost_vec = loss_vec + zsum * args.sparsity + zdiff * coherent_factor
        cost_logpz = T.mean(cost_vec * T.sum(logpz, axis=0))
        self.obj = T.mean(cost_vec)

        params = self.params = [ ]
        for l in layers + [ output_layer ]:
            for p in l.params:
                params.append(p)
        nparams = sum(len(x.get_value(borrow=True).ravel()) \
                                        for x in params)
        say("total # parameters: {}\n".format(nparams))

        l2_cost = None
        for p in params:
            if l2_cost is None:
                l2_cost = T.sum(p**2)
            else:
                l2_cost = l2_cost + T.sum(p**2)
        l2_cost = l2_cost * args.l2_reg
        self.l2_cost = l2_cost

        self.cost_g = cost_logpz * 10 + generator.l2_cost
        self.cost_e = loss * 10 + l2_cost
コード例 #43
0
 def not_equal(self, x, y):
     return T.neq(x, y)
コード例 #44
0
ファイル: my_lenet_nn.py プロジェクト: yuchunchien/NNDL-HW3
    def __init__(self, rng, is_train, input, n_in, n_out, W=None, b=None,
                 activation=T.tanh, p=0.7):
        """
        Hidden unit activation is given by: activation(dot(input,W) + b)

        :type rng: numpy.random.RandomState
        :param rng: a random number generator used to initialize weights
        
        :type is_train: theano.iscalar   
        :param is_train: indicator pseudo-boolean (int) for switching between training and prediction

        :type input: theano.tensor.dmatrix
        :param input: a symbolic tensor of shape (n_examples, n_in)

        :type n_in: int
        :param n_in: dimensionality of input

        :type n_out: int
        :param n_out: number of hidden units

        :type activation: theano.Op or function
        :param activation: Non linearity to be applied in the hidden
                           layer
                           
        :type p: float or double
        :param p: probability of NOT dropping out a unit   
        """
        self.input = input

        if W is None:
            W_values = numpy.asarray(
                rng.uniform(
                    low=-numpy.sqrt(6. / (n_in + n_out)),
                    high=numpy.sqrt(6. / (n_in + n_out)),
                    size=(n_in, n_out)
                ),
                dtype=theano.config.floatX
            )
            if activation == theano.tensor.nnet.sigmoid:
                W_values *= 4

            W = theano.shared(value=W_values, name='W', borrow=True)

        if b is None:
            b_values = numpy.zeros((n_out,), dtype=theano.config.floatX)
            b = theano.shared(value=b_values, name='b', borrow=True)

        
        self.W = W
        self.b = b

        lin_output = T.dot(input, self.W) + self.b
        
        output = activation(lin_output)
        
        # multiply output and drop -> in an approximation the scaling effects cancel out 
        train_output = drop(output,p)
        
        #is_train is a pseudo boolean theano variable for switching between training and prediction 
        self.output = T.switch(T.neq(is_train, 0), train_output, p*output)
        
        # parameters of the model
        self.params = [self.W, self.b]
コード例 #45
0
    def __init__(self, rng, params, cost_function='mse', optimizer=RMSprop):

        lr = params["lr"]
        n_lstm = params['n_hidden']
        n_out = params['n_output']
        batch_size = params["batch_size"]
        sequence_length = params["seq_length"]

        # minibatch)
        X = T.tensor3()  # batch of sequence of vector
        Y = T.tensor3()  # batch of sequence of vector
        is_train = T.iscalar(
            'is_train'
        )  # pseudo boolean for switching between training and prediction

        #CNN global parameters.
        subsample = (1, 1)
        p_1 = 0.5
        border_mode = "valid"
        cnn_batch_size = batch_size * sequence_length
        pool_size = (2, 2)

        #Layer1: conv2+pool+drop
        filter_shape = (64, 1, 9, 9)
        input_shape = (cnn_batch_size, 1, 120, 60
                       )  #input_shape= (samples, channels, rows, cols)
        input = X.reshape(input_shape)
        c1 = ConvLayer(rng,
                       input,
                       filter_shape,
                       input_shape,
                       border_mode,
                       subsample,
                       activation=nn.relu)
        p1 = PoolLayer(c1.output,
                       pool_size=pool_size,
                       input_shape=c1.output_shape)
        dl1 = DropoutLayer(rng, input=p1.output, prob=p_1, is_train=is_train)
        retain_prob = 1. - p_1
        test_output = p1.output * retain_prob
        d1_output = T.switch(T.neq(is_train, 0), dl1.output, test_output)

        #Layer2: conv2+pool
        filter_shape = (128, p1.output_shape[1], 3, 3)
        c2 = ConvLayer(rng,
                       d1_output,
                       filter_shape,
                       p1.output_shape,
                       border_mode,
                       subsample,
                       activation=nn.relu)
        p2 = PoolLayer(c2.output,
                       pool_size=pool_size,
                       input_shape=c2.output_shape)

        #Layer3: conv2+pool
        filter_shape = (128, p2.output_shape[1], 3, 3)
        c3 = ConvLayer(rng,
                       p2.output,
                       filter_shape,
                       p2.output_shape,
                       border_mode,
                       subsample,
                       activation=nn.relu)
        p3 = PoolLayer(c3.output,
                       pool_size=pool_size,
                       input_shape=c3.output_shape)

        #Layer4: hidden
        n_in = reduce(lambda x, y: x * y, p3.output_shape[1:])
        x_flat = p3.output.flatten(2)
        h1 = HiddenLayer(rng, x_flat, n_in, 1024, activation=nn.relu)
        n_in = 1024
        rnn_input = h1.output.reshape((batch_size, sequence_length, n_in))

        #Layer5: gru
        self.n_in = n_in
        self.n_lstm = n_lstm
        self.n_out = n_out
        self.W_hy = init_weight((self.n_lstm, self.n_out),
                                rng=rng,
                                name='W_hy',
                                sample='glorot')
        self.b_y = init_bias(self.n_out, rng=rng, sample='zero')

        layer1 = LSTMLayer(rng, 0, self.n_in, self.n_lstm)
        layer2 = LSTMLayer(rng, 1, self.n_lstm, self.n_lstm)
        layer3 = LSTMLayer(rng, 2, self.n_lstm, self.n_lstm)

        self.params = layer1.params + layer2.params + layer3.params
        self.params.append(self.W_hy)
        self.params.append(self.b_y)

        def step_lstm(x_t, mask, h_tm1_1, c_tm1_1, h_tm1_2, c_tm1_2, h_tm1_3,
                      c_tm1_3):
            [h_t_1, c_t_1, y_t_1] = layer1.run(x_t, h_tm1_1, c_tm1_1)
            dl1 = DropoutLayer(rng,
                               input=y_t_1,
                               prob=0.5,
                               is_train=is_train,
                               mask=mask)
            [h_t_2, c_t_2, y_t_2] = layer2.run(dl1.output, h_tm1_2, c_tm1_2)
            [h_t_3, c_t_3, y_t_3] = layer3.run(y_t_2, h_tm1_3, c_tm1_3)
            y = T.dot(y_t_3, self.W_hy) + self.b_y
            return [h_t_1, c_t_1, h_t_2, c_t_2, h_t_3, c_t_3, y]

        h0_1 = shared(np.zeros(shape=(batch_size, self.n_lstm),
                               dtype=dtype))  # initial hidden state
        c0_1 = shared(np.zeros(shape=(batch_size, self.n_lstm),
                               dtype=dtype))  # initial cell state
        h0_2 = shared(np.zeros(shape=(batch_size, self.n_lstm),
                               dtype=dtype))  # initial hidden state
        c0_2 = shared(np.zeros(shape=(batch_size, self.n_lstm),
                               dtype=dtype))  # initial cell state
        h0_3 = shared(np.zeros(shape=(batch_size, self.n_lstm),
                               dtype=dtype))  # initial hidden state
        c0_3 = shared(np.zeros(shape=(batch_size, self.n_lstm),
                               dtype=dtype))  # initial cell state

        mask_shape = (sequence_length, batch_size, self.n_lstm)
        p_1 = 0.5
        mask = rng.binomial(size=mask_shape, p=p_1, dtype=X.dtype)

        #(1, 0, 2) -> AxBxC to BxAxC
        #(batch_size,sequence_length, n_in) >> (sequence_length, batch_size ,n_in)
        #T.dot(x_t, self.W_xi)x_t=(sequence_length, batch_size ,n_in), W_xi=  [self.n_in, self.n_lstm]

        [h_t_1, c_t_1, h_t_2, c_t_2, h_t_3, c_t_3, y_vals], _ = theano.scan(
            fn=step_lstm,
            sequences=[rnn_input.dimshuffle(1, 0, 2), mask],
            outputs_info=[h0_1, c0_1, h0_2, c0_2, h0_3, c0_3, None])

        self.output = y_vals.dimshuffle(1, 0, 2)

        self.params = c1.params + c2.params + c3.params + h1.params + self.params

        cost = get_err_fn(self, cost_function, Y)
        _optimizer = optimizer(cost, self.params, lr=lr)
        self.train = theano.function(inputs=[X, Y, is_train],
                                     outputs=cost,
                                     updates=_optimizer.getUpdates(),
                                     allow_input_downcast=True)
        self.predictions = theano.function(inputs=[X, is_train],
                                           outputs=self.output,
                                           allow_input_downcast=True)
        self.n_param = count_params(self.params)
コード例 #46
0
def build_network():
    """

    Returns
    -------

    """

    input_var = t.tensor4('inputs')
    target = t.matrix('targets')

    net = {'input': InputLayer((None, 3, 299, 299), input_var=input_var)}

    net['conv'] = bn_conv(net['input'],
                          num_filters=32,
                          filter_size=3,
                          stride=2)
    net['conv_1'] = bn_conv(net['conv'], num_filters=32, filter_size=3)
    net['conv_2'] = bn_conv(net['conv_1'],
                            num_filters=64,
                            filter_size=3,
                            pad=1)
    net['pool'] = Pool2DLayer(net['conv_2'], pool_size=3, stride=2, mode='max')

    net['conv_3'] = bn_conv(net['pool'], num_filters=80, filter_size=1)

    net['conv_4'] = bn_conv(net['conv_3'], num_filters=192, filter_size=3)

    net['pool_1'] = Pool2DLayer(net['conv_4'],
                                pool_size=3,
                                stride=2,
                                mode='max')
    net['mixed/join'] = inception_a(net['pool_1'],
                                    nfilt=((64, ), (48, 64), (64, 96, 96),
                                           (32, )))
    net['mixed_1/join'] = inception_a(net['mixed/join'],
                                      nfilt=((64, ), (48, 64), (64, 96, 96),
                                             (64, )))

    net['mixed_2/join'] = inception_a(net['mixed_1/join'],
                                      nfilt=((64, ), (48, 64), (64, 96, 96),
                                             (64, )))

    net['mixed_3/join'] = inception_b(net['mixed_2/join'],
                                      nfilt=((384, ), (64, 96, 96)))

    net['mixed_4/join'] = inception_c(net['mixed_3/join'],
                                      nfilt=((192, ), (128, 128, 192),
                                             (128, 128, 128, 128,
                                              192), (192, )))

    net['mixed_5/join'] = inception_c(net['mixed_4/join'],
                                      nfilt=((192, ), (160, 160, 192),
                                             (160, 160, 160, 160,
                                              192), (192, )))

    net['mixed_6/join'] = inception_c(net['mixed_5/join'],
                                      nfilt=((192, ), (160, 160, 192),
                                             (160, 160, 160, 160,
                                              192), (192, )))

    net['mixed_7/join'] = inception_c(net['mixed_6/join'],
                                      nfilt=((192, ), (192, 192, 192),
                                             (192, 192, 192, 192,
                                              192), (192, )))

    net['mixed_8/join'] = inception_d(net['mixed_7/join'],
                                      nfilt=((192, 320), (192, 192, 192, 192)))

    net['mixed_9/join'] = inception_e(net['mixed_8/join'],
                                      nfilt=((320, ), (384, 384, 384),
                                             (448, 384, 384, 384), (192, )),
                                      pool_mode='average_exc_pad')

    net['mixed_10/join'] = inception_e(net['mixed_9/join'],
                                       nfilt=((320, ), (384, 384, 384),
                                              (448, 384, 384, 384), (192, )),
                                       pool_mode='max')

    net['pool3'] = GlobalPoolLayer(net['mixed_10/join'])

    net['softmax'] = DenseLayer(net['pool3'],
                                num_units=1008,
                                nonlinearity=softmax)

    test_output = lasagne.layers.get_output(net['softmax'], deterministic=True)
    test_loss = t.mean(t.sqr(t.maximum(0., 1. - target * test_output)))
    test_err = t.mean(t.neq(t.argmax(test_output, axis=1),
                            t.argmax(target, axis=1)),
                      dtype=theano.config.floatX)

    # Compile a second function computing the validation loss and accuracy:
    val_fn = theano.function([input_var, target], [test_loss, test_err])

    return {'model': net['softmax'], 'val_fn': val_fn}
コード例 #47
0
def nlls(o,y):
    return -T.mean( (T.log(o)[T.arange(y.shape[0]),y]+T.sum(T.log(1-o), axis=1)-T.log(1-o)[T.arange(y.shape[0]),y]) * T.neq(y,-1))
コード例 #48
0
ファイル: policy.py プロジェクト: Trece/gomoku
    def train(self,
              train_sets,
              valid_sets,
              test_sets,
              n_epochs=200,
              learning_rate=0.1):

        train_set_x, train_set_y = train_sets
        valid_set_x, valid_set_y = valid_sets
        test_set_x, test_set_y = test_sets

        n_train_batches = train_set_x.get_value(borrow=True).shape[0]
        n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
        n_test_batches = test_set_x.get_value(borrow=True).shape[0]
        n_train_batches //= self.batch_size
        n_valid_batches //= self.batch_size
        n_test_batches //= self.batch_size

        cost = -T.mean(
            T.log(self.final_output[T.arange(self.y.shape[0]), self.y]))
        error = T.mean(T.neq(T.argmax(self.final_output, axis=1), self.y))

        # find all the parameters and update them using gradient descent
        params = self.params
        grads = T.grad(cost, params)
        updates = [(param_i, param_i - learning_rate * grad_i)
                   for param_i, grad_i in zip(params, grads)]

        index = self.index
        batch_size = self.batch_size
        x = self.x
        y = self.y

        test_model = theano.function(
            [index],
            error,
            givens={
                x: test_set_x[index * batch_size:(index + 1) * batch_size],
                y: test_set_y[index * batch_size:(index + 1) * batch_size]
            })

        validate_model = theano.function(
            [index],
            error,
            givens={
                x: valid_set_x[index * batch_size:(index + 1) * batch_size],
                y: valid_set_y[index * batch_size:(index + 1) * batch_size]
            })

        train_model = theano.function(
            [index],
            cost,
            updates=updates,
            givens={
                x: train_set_x[index * batch_size:(index + 1) * batch_size],
                y: train_set_y[index * batch_size:(index + 1) * batch_size]
            })

        print('... training')
        # early-stopping parameters
        patience = 10000  # look as this many examples regardless
        patience_increase = 2  # wait this much longer when a new best is found
        improvement_threshold = 0.995
        validation_frequency = min(n_train_batches, patience // 2)
        # go through this many
        # minibatche before checking the network
        # on the validation set; in this case we
        # check every epoch

        best_validation_loss = numpy.inf
        best_iter = 0
        test_score = 0.
        start_time = timeit.default_timer()

        epoch = 0
        done_looping = False

        while (epoch < n_epochs):
            epoch = epoch + 1
            for minibatch_index in range(n_train_batches):

                iter = (epoch - 1) * n_train_batches + minibatch_index

                if iter % 100 == 0:
                    print('training @ iter = ', iter, flush=True)
                cost_ij = train_model(minibatch_index)

                if (iter + 1) % validation_frequency == 0:

                    # compute zero-one loss on validation set
                    validation_losses = [
                        validate_model(i) for i in range(n_valid_batches)
                    ]
                    this_validation_loss = numpy.mean(validation_losses)
                    print('epoch {}, minibatch {}/{}, validation error {}%'.
                          format(epoch, minibatch_index + 1, n_train_batches,
                                 this_validation_loss * 100.))
                    with open('model_{}.mod'.format(iter), 'wb') as f:
                        pickle.dump(self.dump(), f)
                    # if we got the best validation score until now
                    if this_validation_loss < best_validation_loss:


                        if this_validation_loss < best_validation_loss *  \
                                improvement_threshold:
                            patience = max(patience, iter * patience_increase)

                            # save best validation score and iteration number
                            best_validation_loss = this_validation_loss
                            best_iter = iter

                    test_losses = [
                        test_model(i) for i in range(n_test_batches)
                    ]
                    test_score = numpy.mean(test_losses)
                    print(('     epoch {}, minibatch {}/{}, test error of '
                           'best model {}%').format(epoch, minibatch_index + 1,
                                                    n_train_batches,
                                                    test_score * 100.))
                    with open('test_{}.res'.format(iter), 'w') as f:
                        print(network.predict(test_set_x), file=f)

        end_time = timeit.default_timer()
        print('Optimization complete.')
        print('Best validation score of %f %% obtained at iteration %i, '
              'with test performance %f %%' %
              (best_validation_loss * 100., best_iter + 1, test_score * 100.))
        print(('The code for file ' + os.path.split(__file__)[1] +
               ' ran for %.2fm' % ((end_time - start_time) / 60.)),
              file=sys.stderr)
コード例 #49
0
ファイル: mlp.py プロジェクト: Rhoana/icon_archived
    def train(self, 
        offline=False, 
        data=None, 
        mean=None,
        std=None
        ):
        print 'mlp.train'

        def gradient_updates_momentum(cost, params, learning_rate, momentum):
            updates = []
            for param in params:
                param_update = theano.shared(param.get_value()*0., broadcastable=param.broadcastable)
                updates.append((param, param - learning_rate*param_update))
                updates.append((param_update, momentum*param_update + (1. - momentum)*T.grad(cost, param)))
            return updates

        patchSize = self.patchSize
        batchSize = self.batchSize
        learning_rate  = self.learning_rate
        momentum = self.momentum

        rng = numpy.random.RandomState(1234)

        tx, ty, vx, vy, reset = data.sample()
        train_samples  = len(ty)
        val_samples    = len(vy)
        train_set_x, train_set_y = shared_dataset((tx, ty), doCastLabels=True)

        if val_samples > 0:
            valid_set_x, valid_set_y = shared_dataset((vx, vy), doCastLabels=True)

        if reset:
            self.best_validation_loss = numpy.inf

        # compute number of minibatches for training, validation and testing
        n_train_batches = train_samples / batchSize
        n_valid_batches = val_samples / 1000 #batchSize

        # allocate symbolic variables for the data
        index = T.lscalar()  # index to a [mini]batch

        # start-snippet-1
        x = self.x #T.matrix('x')   # the data is presented as rasterized images
        y = T.ivector('y')  # the labels are presented as 1D vector of
                            # [int] labels
        cost = self.cost(y)

        lr = T.scalar('learning_rate')
        m = T.scalar('momentum')

        learning_rate_shared = theano.shared(np.float32(learning_rate))
        momentum_shared = theano.shared(np.float32(momentum))

        print 'training data....'
        print 'n_train_batches:',n_train_batches
        print 'n_valid_batches:',n_valid_batches
        print 'train_samples:', train_samples
        print 'val_samples:', val_samples
        print 'best_validation:', self.best_validation_loss

        if val_samples > 0:
            validate_model = theano.function(
                [index],
                self.errors(y),
                givens={
                    x: valid_set_x[index * batchSize: (index + 1) * batchSize],
                    y: valid_set_y[index * batchSize: (index + 1) * batchSize]
                }
            )

        predict_samples = theano.function(
                [],
                outputs=T.neq(self.y_pred, y),
                givens={
                        x: train_set_x,
                        y: train_set_y,
                }
        )

        gparams = []
        for param in self.params:
            gparam = T.grad(cost, param)
            gparams.append(gparam)

        updates = gradient_updates_momentum(cost, self.params, lr, m)

        train_model = theano.function(inputs=[index], outputs=cost,
                updates=updates,
                givens={
                    x: train_set_x[index * batchSize:(index + 1) * batchSize],
                    y: train_set_y[index * batchSize:(index + 1) * batchSize],
                    lr: learning_rate_shared,
                    m: momentum_shared})

  
        ###############
        # TRAIN MODEL #
        ###############
        print '... training'
        validation_frequency = 1
        start_time = time.clock()

        minibatch_avg_costs = []
        iter = 0
        epoch = 0
        self.best_train_error = np.inf
        last_train_error = numpy.inf
        for minibatch_index in xrange(n_train_batches):
            if self.done:
                break

            train_cost = train_model(minibatch_index)
            minibatch_avg_costs.append( train_cost )

            # iteration number
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if n_valid_batches == 0:
                train_error = minibatch_avg_costs[-1].item(0)

                print minibatch_index, '-', train_error
                if train_error < self.best_train_error:
                    self.best_train_error = train_error
                    self.save()
                      

            if n_valid_batches > 0 and (iter + 1) % validation_frequency == 0:
                # compute zero-one loss on validation set
                validation_losses = np.array([validate_model(i) for i
                                     in xrange(n_valid_batches)])
                #this_validation_loss = numpy.sum(validation_losses) * 100.0 / val_samples
                this_validation_loss = numpy.mean(validation_losses*100.0)

                elapsed_time = time.clock() - start_time
 
                data.report_stats(
                    self.id,
                    elapsed_time, 
                    minibatch_index, 
                    this_validation_loss, 
                    minibatch_avg_costs[-1].item(0))

                # if we got the best validation score until now
                if this_validation_loss < self.best_validation_loss:
                    self.best_validation_loss = this_validation_loss
                    self.save()
                    print "New best score!"

        #if n_valid_batches == 0:
        #    self.save()

        if not self.offline:
            probs = predict_samples()
            data.p[ data.i_train ] = probs
            data.save_stats()
コード例 #50
0
    def build_encoder(self, x, xmask=None, **kwargs):
        one_step = False
        if len(kwargs):
            one_step = True

        # if x.ndim == 2 then
        # x = (n_steps, batch_size)
        if x.ndim == 2:
            batch_size = x.shape[1]
        # else x = (word_1, word_2, word_3, ...)
        # or x = (last_word_1, last_word_2, last_word_3, ..)
        # in this case batch_size is
        else:
            batch_size = 1

        # if it is not one_step then we initialize everything to 0
        if not one_step:
            h_0 = T.alloc(np.float32(0), batch_size, self.qdim)
            hr_0 = T.alloc(np.float32(0), batch_size, self.qdim)
            hs_0 = T.alloc(np.float32(0), batch_size, self.sdim)
        # in sampling mode (i.e. one step) we require
        else:
            # in this case x.ndim != 2
            assert x.ndim != 2
            assert 'prev_h' in kwargs
            assert 'prev_hr' in kwargs
            assert 'prev_hs' in kwargs
            h_0 = kwargs['prev_h']
            hr_0 = kwargs['prev_hr']
            hs_0 = kwargs['prev_hs']

        xe = self.approx_embedder(x)
        if xmask == None:
            xmask = T.neq(x, self.eoq_sym)

        # Gated Encoder
        if self.query_step_type == "gated":
            f_enc = self.gated_query_step
            o_enc_info = [h_0, hr_0, None, None, None]
        else:
            f_enc = self.plain_query_step
            o_enc_info = [h_0, hr_0]

        if self.session_step_type == "gated":
            f_hier = self.gated_session_step
            o_hier_info = [hs_0, None, None, None]
        else:
            f_hier = self.plain_session_step
            o_hier_info = [hs_0]

        # Run through all the sentence (encode everything)
        if not one_step:
            _res, _ = theano.scan(f_enc,
                                  sequences=[xe, xmask],
                                  outputs_info=o_enc_info)
        # Make just one step further
        else:
            _res = f_enc(xe, xmask, h_0, hr_0)
        # Get the hidden state sequence
        h = _res[0]
        hr = _res[1]
        # All hierarchical sentence
        # The hs sequence is based on the original mask
        if not one_step:
            _res, _ = theano.scan(f_hier,
                                  sequences=[h, xmask],
                                  outputs_info=o_hier_info)
        # Just one step further
        else:
            _res = f_hier(h, xmask, hs_0)

        if isinstance(_res, list) or isinstance(_res, tuple):
            hs = _res[0]
        else:
            hs = _res
        return (h, hr), hs, (_res[2], _res[3])
コード例 #51
0
ファイル: mlp.py プロジェクト: Rhoana/icon_archived
    def train_online(self, data):

        print 'train online...'
        def gradient_updates_momentum(cost, params, learning_rate, momentum):
            updates = []
            for param in params:
                param_update = theano.shared(param.get_value()*0., broadcastable=param.broadcastable)
                updates.append((param, param - learning_rate*param_update))
                updates.append((param_update, momentum*param_update + (1. - momentum)*T.grad(cost, param)))
            return updates

        # DATA INITIALIZATION
        d       = data.sample()
        train_x = d[0]
        train_y = d[1]
        valid_x = d[2]
        valid_y = d[3]
        reset   = d[4]

        if reset:
            self.best_validation_loss = numpy.inf

        train_samples = len(train_y)
        valid_samples = len(valid_y)

        print 'valid_samples:',valid_samples
        print 'train_samples:', train_samples

        if self.resample:
            self.lr_shared.set_value( np.float32(self.learning_rate) )
            self.m_shared.set_value( np.float32(self.momentum) )

        else:
            self.resample  = True
            self.y         = T.ivector('y')  # the labels are presented as 1D vector of [int] labels
            self.lr        = T.scalar('learning_rate')
            self.m         = T.scalar('momentum')

            self.lr_shared = theano.shared(np.float32(self.learning_rate))
            self.m_shared  = theano.shared(np.float32(self.momentum))



        index          =  T.lscalar()  # index to a [mini]batch
        x              = self.x
        y              = self.y
        lr             = self.lr
        m              = self.m
        lr_shared      = self.lr_shared
        m_shared       = self.m_shared
        patchSize      = self.patchSize
        batchSize      = self.batchSize
        train_set_x, train_set_y = shared_dataset((train_x, train_y), doCastLabels=True)

        if valid_samples > 0:
            valid_set_x, valid_set_y = shared_dataset((valid_x, valid_y), doCastLabels=True)

        # compute number of minibatches for training, validation 
        n_train_batches = train_samples / batchSize
        n_valid_batches = valid_samples / batchSize


        #BUILD THE MODEL
        cost = self.cost(y)

        if valid_samples > 0:
            validate_model = theano.function(
                [index],
                self.errors(y),
                givens={
                    x: valid_set_x[index * batchSize: (index + 1) * batchSize],
                    y: valid_set_y[index * batchSize: (index + 1) * batchSize]
                }
            )

        '''
        predict_samples = theano.function(
                inputs=[index],
                outputs=T.neq(self.y_pred, self.y),
                givens={
                        x: train_set_x[index * batchSize: (index + 1) * batchSize],
                        y: train_set_y[index * batchSize: (index + 1) * batchSize]
                }
        )
        '''
        predict_samples = theano.function(
                [],
                outputs=T.neq(self.y_pred, self.y),
                givens={
                        x: train_set_x,
                        y: train_set_y,
                }
        )


        gparams = []
        for param in self.params:
            gparam = T.grad(cost, param)
            gparams.append(gparam)

        updates = gradient_updates_momentum(cost, self.params, lr, m)

        train_model = theano.function(inputs=[index], outputs=cost,
                updates=updates,
                givens={
                    x: train_set_x[index * batchSize:(index + 1) * batchSize],
                    y: train_set_y[index * batchSize:(index + 1) * batchSize],
                    lr: lr_shared,
                    m: m_shared})


        # TRAIN THE MODEL
        print '... training'
        print 'self.best_validation_loss:', self.best_validation_loss
        best_iter = 0
        validation_frequency = 1

        start_time = time.clock()

        elapsed_time = 0
        iter = 0

        minibatch_avg_costs = []
        minibatch_index = 0


        #while (elapsed_time < self.trainTime)\
        #    and (minibatch_index<n_train_batches)\
        #    and (not self.done):
        while (minibatch_index<n_train_batches) and (not self.done):
            if (elapsed_time >= self.trainTime):
                break

            train_cost = train_model(minibatch_index)

            # test the trained samples against the target
            # values to measure the training performance
            i = minibatch_index

            '''
            probs = predict_samples(minibatch_index)
            #print 'probs:', probs.shape
            i_batch = data.i_train[ i * batchSize:(i+1)*batchSize ]
            data.p[ i_batch ] = probs
            '''

            '''
            good = np.where( probs == 0)[0]
            bad  = np.where( probs == 1)[0]
            print 'bad:', len(bad)
            print 'good:', len(good)
            #print probs
            '''
            #print '----->traincost:', type(train_cost), train_cost

            minibatch_avg_costs.append(train_cost)

            iter += 1
            #iter = (epoch - 1) * n_train_batches + minibatch_index
            if (iter + 1) % validation_frequency == 0 and valid_samples > 0:

                validation_losses = np.array([validate_model(i) for i in xrange(n_valid_batches)])
                this_validation_loss = numpy.sum(validation_losses) * 100.0 / valid_samples
                elapsed_time = time.clock() - start_time

                '''
                self.reportTrainingStats(elapsed_time,
                        minibatch_index,
                        this_validation_loss,
                        minibatch_avg_costs[-1].item(0))
                '''
                print this_validation_loss, '/', self.best_validation_loss
                data.add_validation_loss( this_validation_loss )

                # if we got the best validation score until now
                if this_validation_loss < self.best_validation_loss:
                    self.best_validation_loss = this_validation_loss
                    best_iter = iter

                    self.save()
                    print "New best score!"

            # advance to next mini batch
            minibatch_index += 1

            # update elapsed time
            elapsed_time = time.clock() - start_time

        if valid_samples == 0:
            self.save()

        probs = predict_samples()
        data.p[ data.i_train ] = probs

        elapsed_time = time.clock() - start_time
        msg = 'The code an for'
        status = '%f seconds' % (elapsed_time)
        Utility.report_status( msg, status )
        print 'done...'
コード例 #52
0
 def _error_func(self, y):
     return 100 * T.mean(T.neq(T.argmax(y, axis=1), self.k))
コード例 #53
0
    def evaluate(self, train_set, test_set, shuffle_batch=True,
              epochs=25, lr_decay=0.95, sqr_norm_lim=9,labels=None,model=None):
        """
        Train a simple conv net
        sqr_norm_lim = s^2 in the paper
        lr_decay = adadelta decay parameter
        """    
        cost = self.negative_log_likelihood(self.y) 
        dropout_cost = self.dropout_negative_log_likelihood(self.y)
        # adadelta upgrades: dict of variable:delta
        grad_updates = self.sgd_updates_adadelta(dropout_cost, lr_decay, 1e-6, sqr_norm_lim)
        # shuffle dataset and assign to mini batches.
        # if dataset size is not a multiple of batch size, replicate 
        # extra data (at random)
        np.random.seed(3435)
        batch_size = self.batch_size
        if train_set.shape[0] % batch_size > 0:
            extra_data_num = batch_size - train_set.shape[0] % batch_size
            #extra_data = train_set[np.random.choice(train_set.shape[0], extra_data_num)]
            perm_set = np.random.permutation(train_set)   
            extra_data = perm_set[:extra_data_num]
            new_data = np.append(train_set, extra_data, axis=0)
        else:
            new_data = train_set
        
        shuffled_data = np.random.permutation(new_data) # Attardi
        n_batches     = shuffled_data.shape[0]/batch_size
        # divide train set into 90% train, 10% validation sets
        n_train_batches = int(np.round(n_batches*0.8))
        n_val_batches = n_batches - n_train_batches
        train_set = shuffled_data[:n_train_batches*batch_size,:]
        val_set   = shuffled_data[n_train_batches*batch_size:,:]     
        # push data to gpu        
        # the dataset has the format [word_indices,padding,user,label]
        train_set_x, train_set_y = shared_dataset(train_set[:,:-2], train_set[:,-1])  
        train_set_u = theano.shared(np.asarray(train_set[:,-2],dtype='int32'))      
        # val_set_x = val_set[:,:-2]
        # val_set_u = val_set[:,-2]
        # val_set_y = val_set[:,-1]
        val_set_x, val_set_y = shared_dataset(val_set[:,:-2], val_set[:,-1])
        val_set_u  = theano.shared(np.asarray(val_set[:,-2],dtype='int32'))      
        test_set_x = test_set[:,:-2]
        test_set_u = test_set[:,-2]
        test_set_y = test_set[:,-1]        
        batch_start = self.index * batch_size
        batch_end = batch_start + batch_size

        # compile Theano functions to get train/val/test errors
        
        
        test_y_pred = self.predict(test_set_x)
        test_error = T.mean(T.neq(test_y_pred, self.y))
        # errors on train set
        if self.Users is not None:
            train_model = theano.function([self.index], cost, updates=grad_updates,
                                      givens={
                                          self.x: train_set_x[batch_start:batch_end],
                                          self.y: train_set_y[batch_start:batch_end],
                                          self.u: train_set_u[batch_start:batch_end]
                                          },
                                      allow_input_downcast = True)

            train_error = theano.function([self.index], self.errors(self.y),
                                          givens={
                                              self.x: train_set_x[batch_start:batch_end],
                                              self.y: train_set_y[batch_start:batch_end],
                                              self.u: train_set_u[batch_start:batch_end]},
                                          allow_input_downcast=True)
            val_model = theano.function([self.index], self.errors(self.y),
                                    givens={
                                        self.x: val_set_x[batch_start:batch_end],
                                        self.y: val_set_y[batch_start:batch_end],        
                                        self.u: val_set_u[batch_start:batch_end]},
                                    allow_input_downcast=True)
            test_model = theano.function([self.x, self.u, self.y], test_error, allow_input_downcast=True)
        else:
            train_model = theano.function([self.index], cost, updates=grad_updates,
                                      givens={
                                          self.x: train_set_x[batch_start:batch_end],
                                          self.y: train_set_y[batch_start:batch_end]},
                                      allow_input_downcast = True)

            train_error = theano.function([self.index], self.errors(self.y),
                                          givens={
                                              self.x: train_set_x[batch_start:batch_end],
                                              self.y: train_set_y[batch_start:batch_end]},
                                          allow_input_downcast=True)

            val_model = theano.function([self.index], self.errors(self.y),
                                    givens={
                                        self.x: val_set_x[batch_start:batch_end],
                                        self.y: val_set_y[batch_start:batch_end]},
                                    allow_input_downcast=True)
            test_model = theano.function([self.x, self.y], test_error, allow_input_downcast=True)

        # start training over mini-batches
        print 'training...'        
        best_val_perf = 0
        test_perf = 0    
        patience = 5
        drops    = 0
        prev_val_perf = 0  
        for epoch in xrange(epochs):
            start_time = time.time()
            # FIXME: should permute whole set rather than minibatch indexes
            if shuffle_batch:
                for minibatch_index in np.random.permutation(range(n_train_batches)):
                    cost_epoch = train_model(minibatch_index)
                    self.set_zero(self.zero_vec) # CHECKME: Why?
            else:
                for minibatch_index in xrange(n_train_batches):
                    cost_epoch = train_model(minibatch_index)  
                    self.set_zero(self.zero_vec)
            train_losses = [train_error(i) for i in xrange(n_train_batches)]
            train_perf = 1 - np.mean(train_losses)
            val_losses = [val_model(i) for i in xrange(n_val_batches)]
            val_perf = 1 - np.mean(val_losses)     
            info = 'epoch: %i\%i (%.2f secs) train acc: %.2f %% | val acc: %.2f %%' % (
                epoch,epochs, time.time()-start_time, train_perf * 100., val_perf*100.)              
            # from ipdb import set_trace; set_trace()
            if val_perf > prev_val_perf:                
                drops=0
                if val_perf >= best_val_perf:
                    best_val_perf = val_perf
                    info+= " **"
                    if model:
                        # print "save model"
                        self.save(model)
                    if self.Users is not None:
                        test_loss = test_model(test_set_x, test_set_u, test_set_y)
                    else:
                        test_loss = test_model(test_set_x, test_set_y)
                    test_perf = 1 - test_loss         
            else: 
                drops+=1
            if drops >= patience:
                print "Ran out of patience..."
                break
            prev_val_perf = val_perf
            print info
        # set_trace() 
        return test_perf
コード例 #54
0
    def __init__(self, state):
        Model.__init__(self)
        self.state = state

        # Compatibility towards older models
        self.__dict__.update(state)
        self.rng = numpy.random.RandomState(state['seed'])

        # Load dictionary
        raw_dict = cPickle.load(open(self.dictionary, 'r'))

        # Probabilities for each term in the corpus
        self.noise_probs = [
            x[2] for x in sorted(raw_dict, key=operator.itemgetter(1))
        ]
        self.noise_probs = numpy.array(self.noise_probs, dtype='float64')
        self.noise_probs /= numpy.sum(self.noise_probs)
        self.noise_probs = self.noise_probs**0.75
        self.noise_probs /= numpy.sum(self.noise_probs)

        self.t_noise_probs = theano.shared(self.noise_probs.astype('float32'),
                                           't_noise_probs')
        # Dictionaries to convert str to idx and vice-versa
        self.str_to_idx = dict([(tok, tok_id) for tok, tok_id, _ in raw_dict])
        self.idx_to_str = dict([(tok_id, tok)
                                for tok, tok_id, freq in raw_dict])

        if '</q>' not in self.str_to_idx \
           or '</s>' not in self.str_to_idx:
            raise Exception("Error, malformed dictionary!")

        # Number of words in the dictionary
        self.idim = len(self.str_to_idx)
        self.state['idim'] = self.idim

        logger.debug("Initializing encoder")
        self.encoder = Encoder(self.state, self.rng, self)
        logger.debug("Initializing decoder")
        self.decoder = Decoder(self.state, self.rng, self, self.encoder)

        # Init params
        self.params = self.encoder.params + self.decoder.params
        assert len(set(self.params)) == (len(self.encoder.params) +
                                         len(self.decoder.params))

        self.y_neg = T.itensor3('y_neg')
        self.x_data = T.imatrix('x_data')
        self.x_ranks = T.imatrix('x_ranks')
        self.x_cost_mask = T.matrix('cost_mask')
        self.x_max_length = T.iscalar('x_max_length')

        # The training is done with a trick. We append a special </q> at the beginning of the dialog
        # so that we can predict also the first sent in the dialog starting from the dialog beginning token (</q>).
        self.aug_x_data = T.concatenate([
            T.alloc(np.int32(self.eoq_sym), 1, self.x_data.shape[1]),
            self.x_data
        ])
        training_x = self.aug_x_data[:self.x_max_length]
        training_y = self.aug_x_data[1:self.x_max_length + 1]
        training_ranks = self.x_ranks[:self.x_max_length - 1].flatten()
        training_ranks_mask = T.neq(training_ranks, 0).flatten()
        # Here we find the end-of-sentence tokens in the minibatch.
        training_hs_mask = T.neq(training_x, self.eoq_sym)
        training_x_cost_mask = self.x_cost_mask[:self.x_max_length].flatten()

        # Backward compatibility
        if 'decoder_bias_type' in self.state:
            logger.debug("Decoder bias type {}".format(self.decoder_bias_type))

        logger.info("Build encoder")
        (self.h, _), self.hs, (self.rs, self.us) = \
          self.encoder.build_encoder(training_x, xmask=training_hs_mask)

        logger.info("Build decoder (EVAL)")
        target_probs, self.hd, self.decoder_states = \
            self.decoder.build_decoder(self.hs, training_x, xmask=training_hs_mask, \
                                                                                y=training_y, mode=Decoder.EVALUATION)
        logger.info("Build rank predictor")
        self.predicted_ranks = self.decoder.build_rank_layer(self.hs)

        # Prediction cost and rank cost
        self.per_example_cost = -T.log2(target_probs).reshape(
            (self.x_max_length, self.x_data.shape[1]))
        self.rank_cost = T.sum(
            ((self.predicted_ranks[1:].flatten() - training_ranks)**2) *
            (training_ranks_mask)) / T.sum(training_ranks_mask)
        self.training_cost = T.sum(
            -T.log2(target_probs) * training_x_cost_mask) + np.float32(
                self.lambda_rank) * self.rank_cost
        self.updates = self.compute_updates(
            self.training_cost / training_x.shape[1], self.params)

        # Beam-search variables
        self.beam_source = T.lvector("beam_source")
        self.beam_hs = T.matrix("beam_hs")
        self.beam_step_num = T.lscalar("beam_step_num")
        self.beam_hd = T.matrix("beam_hd")
コード例 #55
0
ファイル: mlp.py プロジェクト: lpigou/chalearn2014
 def errors(self, y):
     """ Errors over the total number of examples (in the minibatch) """
     return T.mean(T.neq(self.y_pred, y))
コード例 #56
0
    def ready(self):
        global total_generate_time
        #say("in generator ready: \n")
        #start_generate_time = time.time()
        embedding_layer = self.embedding_layer
        args = self.args
        padding_id = embedding_layer.vocab_map["<padding>"]

        dropout = self.dropout = theano.shared(
                np.float64(args.dropout).astype(theano.config.floatX)
            )

        # len*batch
        x = self.x = T.imatrix()

        n_d = args.hidden_dimension
        n_e = embedding_layer.n_d
        activation = get_activation_by_name(args.activation)

        layers = self.layers = [ ]
        layer_type = args.layer.lower()
        for i in xrange(2):
            if layer_type == "rcnn":
                l = RCNN(
                        n_in = n_e,
                        n_out = n_d,
                        activation = activation,
                        order = args.order
                    )
            elif layer_type == "lstm":
                l = LSTM(
                        n_in = n_e,
                        n_out = n_d,
                        activation = activation
                    )
            

            

            layers.append(l)

        # len * batch
        #masks = T.cast(T.neq(x, padding_id), theano.config.floatX)
        masks = T.cast(T.neq(x, padding_id), theano.config.floatX ).dimshuffle((0,1,"x"))

        # (len*batch)*n_e
        embs = embedding_layer.forward(x.ravel())
        # len*batch*n_e
        embs = embs.reshape((x.shape[0], x.shape[1], n_e))
        embs = apply_dropout(embs, dropout)
        self.word_embs = embs

        flipped_embs = embs[::-1]

        # len*bacth*n_d
        h1 = layers[0].forward_all(embs)
        h2 = layers[1].forward_all(flipped_embs)
        h_final = T.concatenate([h1, h2[::-1]], axis=2)
        h_final = apply_dropout(h_final, dropout)
        size = n_d * 2

        #size = n_e


        output_layer = self.output_layer = Layer(
                n_in = size,
                n_out = 1,
                activation = sigmoid
            )

        # len*batch*1 
        probs = output_layer.forward(h_final)
        #probs = output_layer.forward(embs)
        #probs1 = probs.reshape(x.shape)

        #probs_rev = output_layer.forward(flipped_embs)
        #probs1_rev = probs.reshape(x.shape)

        #probs = T.concatenate([probs1, probs1_rev[::-1]], axis=2)

        # len*batch
        probs2 = probs.reshape(x.shape)
        if self.args.seed is not None: self.MRG_rng = MRG_RandomStreams(self.args.seed)
        else: self.MRG_rng = MRG_RandomStreams()
        z_pred = self.z_pred = T.cast(self.MRG_rng.binomial(size=probs2.shape, p=probs2), theano.config.floatX) #"int8")

        # we are computing approximated gradient by sampling z;
        # so should mark sampled z not part of the gradient propagation path
        #
        z_pred = self.z_pred = theano.gradient.disconnected_grad(z_pred)
        #self.sample_updates = sample_updates
        print "z_pred", z_pred.ndim

        z2 = z_pred.dimshuffle((0,1,"x"))
        logpz = - T.nnet.binary_crossentropy(probs, z2) * masks
        logpz = self.logpz = logpz.reshape(x.shape)
        probs = self.probs = probs.reshape(x.shape)

        # batch
        z = z_pred
        self.zsum = T.sum(z, axis=0, dtype=theano.config.floatX)
        self.zdiff = T.sum(T.abs_(z[1:]-z[:-1]), axis=0, dtype=theano.config.floatX)

        params = self.params = [ ]
        for l in layers + [ output_layer ]:
            for p in l.params:
                params.append(p)
        nparams = sum(len(x.get_value(borrow=True).ravel()) \
                                        for x in params)
        say("total # parameters: {}\n".format(nparams))

        l2_cost = None
        for p in params:
            if l2_cost is None:
                l2_cost = T.sum(p**2)
            else:
                l2_cost = l2_cost + T.sum(p**2)
        l2_cost = l2_cost * args.l2_reg
        self.l2_cost = l2_cost
コード例 #57
0
	def error(self,):
		return T.mean(T.neq(self.pred, self.outputs))
コード例 #58
0
ファイル: parikh_nobias.py プロジェクト: vyraun/parikh_snli
def main(num_epochs=NEPOCH):
    print("Loading data ...")
    snli = SNLI(batch_size=BSIZE)
    train_batches = list(snli.train_minibatch_generator())
    dev_batches = list(snli.dev_minibatch_generator())
    test_batches = list(snli.test_minibatch_generator())
    W_word_embedding = snli.weight  # W shape: (# vocab size, WE_DIM)
    W_word_embedding = snli.weight / \
                       (numpy.linalg.norm(snli.weight, axis=1).reshape(snli.weight.shape[0], 1) + \
                        0.00001)
    del snli

    print("Building network ...")
    ########### input layers ###########
    # hypothesis
    input_var_h = T.TensorType('int32', [False, False])('hypothesis_vector')
    input_var_h.tag.test_value = numpy.hstack(
        (numpy.random.randint(1, 10000, (BSIZE, 18),
                              'int32'), numpy.zeros(
                                  (BSIZE, 6)).astype('int32')))
    l_in_h = lasagne.layers.InputLayer(shape=(BSIZE, None),
                                       input_var=input_var_h)

    input_mask_h = T.TensorType('int32', [False, False])('hypo_mask')
    input_mask_h.tag.test_value = numpy.hstack((numpy.ones(
        (BSIZE, 18), dtype='int32'), numpy.zeros((BSIZE, 6), dtype='int32')))
    input_mask_h.tag.test_value[1, 18:22] = 1
    l_mask_h = lasagne.layers.InputLayer(shape=(BSIZE, None),
                                         input_var=input_mask_h)

    # premise
    input_var_p = T.TensorType('int32', [False, False])('premise_vector')
    input_var_p.tag.test_value = numpy.hstack(
        (numpy.random.randint(1, 10000, (BSIZE, 16),
                              'int32'), numpy.zeros(
                                  (BSIZE, 3)).astype('int32')))
    l_in_p = lasagne.layers.InputLayer(shape=(BSIZE, None),
                                       input_var=input_var_p)

    input_mask_p = T.TensorType('int32', [False, False])('premise_mask')
    input_mask_p.tag.test_value = numpy.hstack((numpy.ones(
        (BSIZE, 16), dtype='int32'), numpy.zeros((BSIZE, 3), dtype='int32')))
    input_mask_p.tag.test_value[1, 16:18] = 1
    l_mask_p = lasagne.layers.InputLayer(shape=(BSIZE, None),
                                         input_var=input_mask_p)
    ###################################

    # output shape (BSIZE, None, WEDIM)
    l_hypo_embed = lasagne.layers.EmbeddingLayer(
        l_in_h,
        input_size=W_word_embedding.shape[0],
        output_size=W_word_embedding.shape[1],
        W=W_word_embedding)

    l_prem_embed = lasagne.layers.EmbeddingLayer(
        l_in_p,
        input_size=W_word_embedding.shape[0],
        output_size=W_word_embedding.shape[1],
        W=l_hypo_embed.W)

    # EMBEDING MAPPING: output shape (BSIZE, None, WEMAP)
    l_hypo_reduced_embed = DenseLayer3DInput(l_hypo_embed,
                                             num_units=WEMAP,
                                             b=None,
                                             nonlinearity=None)
    l_hypo_embed_dpout = lasagne.layers.DropoutLayer(l_hypo_reduced_embed,
                                                     p=DPOUT,
                                                     rescale=True)
    l_prem_reduced_embed = DenseLayer3DInput(l_prem_embed,
                                             num_units=WEMAP,
                                             W=l_hypo_reduced_embed.W,
                                             b=None,
                                             nonlinearity=None)
    l_prem_embed_dpout = lasagne.layers.DropoutLayer(l_prem_reduced_embed,
                                                     p=DPOUT,
                                                     rescale=True)

    # ATTEND
    l_hypo_embed_hid1 = DenseLayer3DInput(
        l_hypo_embed_dpout,
        num_units=EMBDHIDA,
        b=None,
        nonlinearity=lasagne.nonlinearities.rectify)
    l_hypo_embed_hid1_dpout = lasagne.layers.DropoutLayer(l_hypo_embed_hid1,
                                                          p=DPOUT,
                                                          rescale=True)
    l_hypo_embed_hid2 = DenseLayer3DInput(
        l_hypo_embed_hid1_dpout,
        num_units=EMBDHIDB,
        b=None,
        nonlinearity=lasagne.nonlinearities.rectify)

    l_prem_embed_hid1 = DenseLayer3DInput(
        l_prem_embed_dpout,
        num_units=EMBDHIDA,
        W=l_hypo_embed_hid1.W,
        b=None,
        nonlinearity=lasagne.nonlinearities.rectify)
    l_prem_embed_hid1_dpout = lasagne.layers.DropoutLayer(l_prem_embed_hid1,
                                                          p=DPOUT,
                                                          rescale=True)
    l_prem_embed_hid2 = DenseLayer3DInput(
        l_prem_embed_hid1_dpout,
        num_units=EMBDHIDB,
        W=l_hypo_embed_hid2.W,
        b=None,
        nonlinearity=lasagne.nonlinearities.rectify)

    # output dim: (BSIZE, NROWx, NROWy)
    l_e = ComputeEmbeddingPool([l_hypo_embed_hid1, l_prem_embed_hid2])
    # output dim: (BSIZE, NROWy, DIM)
    l_hypo_weighted = AttendOnEmbedding([l_hypo_reduced_embed, l_e],
                                        masks=[l_mask_h, l_mask_p],
                                        direction='col')
    # output dim: (BSIZE, NROWx, DIM)
    l_prem_weighted = AttendOnEmbedding([l_prem_reduced_embed, l_e],
                                        masks=[l_mask_h, l_mask_p],
                                        direction='row')

    # COMPARE
    # output dim: (BSIZE, NROW, 4*LSTMHID)
    l_hypo_premwtd = lasagne.layers.ConcatLayer(
        [l_hypo_reduced_embed, l_prem_weighted], axis=2)
    l_prem_hypowtd = lasagne.layers.ConcatLayer(
        [l_prem_reduced_embed, l_hypo_weighted], axis=2)

    l_hypo_premwtd_dpout = lasagne.layers.DropoutLayer(l_hypo_premwtd,
                                                       p=DPOUT,
                                                       rescale=True)
    l_hypo_comphid1 = DenseLayer3DInput(
        l_hypo_premwtd_dpout,
        num_units=COMPHIDA,
        b=None,
        nonlinearity=lasagne.nonlinearities.rectify)

    l_hypo_comphid1_dpout = lasagne.layers.DropoutLayer(l_hypo_comphid1,
                                                        p=DPOUT,
                                                        rescale=True)
    l_hypo_comphid2 = DenseLayer3DInput(
        l_hypo_comphid1_dpout,
        num_units=COMPHIDB,
        b=None,
        nonlinearity=lasagne.nonlinearities.rectify)

    l_prem_hypowtd_dpout = lasagne.layers.DropoutLayer(l_prem_hypowtd,
                                                       p=DPOUT,
                                                       rescale=True)
    l_prem_comphid1 = DenseLayer3DInput(
        l_prem_hypowtd_dpout,
        num_units=COMPHIDA,
        W=l_hypo_comphid1.W,
        b=None,
        nonlinearity=lasagne.nonlinearities.rectify)
    l_prem_comphid1_dpout = lasagne.layers.DropoutLayer(l_prem_comphid1,
                                                        p=DPOUT,
                                                        rescale=True)
    l_prem_comphid2 = DenseLayer3DInput(
        l_prem_comphid1_dpout,
        num_units=COMPHIDB,
        W=l_hypo_comphid2.W,
        b=None,
        nonlinearity=lasagne.nonlinearities.rectify)

    # AGGREGATE
    # output dim: (BSIZE, 4*LSTMHID)
    l_hypo_mean = MeanOverDim(l_hypo_comphid2, mask=l_mask_h, dim=1)
    l_prem_mean = MeanOverDim(l_prem_comphid2, mask=l_mask_p, dim=1)

    l_v1v2 = lasagne.layers.ConcatLayer([l_hypo_mean, l_prem_mean], axis=1)
    l_v1v2_dpout = lasagne.layers.DropoutLayer(l_v1v2, p=DPOUT, rescale=True)

    l_outhid1 = lasagne.layers.DenseLayer(
        l_v1v2_dpout,
        num_units=OUTHID,
        b=None,
        nonlinearity=lasagne.nonlinearities.rectify)
    l_outhid1_dpout = lasagne.layers.DropoutLayer(l_outhid1,
                                                  p=DPOUT,
                                                  rescale=True)

    l_outhid2 = lasagne.layers.DenseLayer(
        l_outhid1_dpout,
        num_units=OUTHID,
        b=None,
        nonlinearity=lasagne.nonlinearities.rectify)
    # l_outhid2_dpout = lasagne.layers.DropoutLayer(l_outhid2, p=DPOUT, rescale=True)

    l_output = lasagne.layers.DenseLayer(
        l_outhid2,
        num_units=3,
        b=None,
        nonlinearity=lasagne.nonlinearities.softmax)

    ########### target, cost, validation, etc. ##########
    target_values = T.ivector('target_output')
    target_values.tag.test_value = numpy.asarray([
        1,
    ] * BSIZE, dtype='int32')

    network_output = lasagne.layers.get_output(l_output)
    network_prediction = T.argmax(network_output, axis=1)
    error_rate = T.mean(T.neq(network_prediction, target_values))

    network_output_clean = lasagne.layers.get_output(l_output,
                                                     deterministic=True)
    network_prediction_clean = T.argmax(network_output_clean, axis=1)
    error_rate_clean = T.mean(T.neq(network_prediction_clean, target_values))

    cost = T.mean(
        T.nnet.categorical_crossentropy(network_output, target_values))
    cost_clean = T.mean(
        T.nnet.categorical_crossentropy(network_output_clean, target_values))

    # Retrieve all parameters from the network
    all_params = lasagne.layers.get_all_params(l_output)
    if not UPDATEWE:
        all_params.remove(l_hypo_embed.W)

    numparams = sum(
        [numpy.prod(i) for i in [i.shape.eval() for i in all_params]])
    print("Number of params: {}\nName\t\t\tShape\t\t\tSize".format(numparams))
    print("-----------------------------------------------------------------")
    for item in all_params:
        print("{0:24}{1:24}{2}".format(item, item.shape.eval(),
                                       numpy.prod(item.shape.eval())))

    # if exist param file then load params
    look_for = 'params' + os.sep + 'params_' + filename + '.pkl'
    if os.path.isfile(look_for):
        print("Resuming from file: " + look_for)
        all_param_values = cPickle.load(open(look_for, 'rb'))
        for p, v in zip(all_params, all_param_values):
            p.set_value(v)

    # Compute SGD updates for training
    print("Computing updates ...")
    updates = lasagne.updates.adagrad(cost, all_params, LR)

    # Theano functions for training and computing cost
    print("Compiling functions ...")
    train = theano.function([
        l_in_h.input_var, l_mask_h.input_var, l_in_p.input_var,
        l_mask_p.input_var, target_values
    ], [cost, error_rate],
                            updates=updates)
    # mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=False))
    compute_cost = theano.function([
        l_in_h.input_var, l_mask_h.input_var, l_in_p.input_var,
        l_mask_p.input_var, target_values
    ], [cost_clean, error_rate_clean])

    # mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=False))

    def evaluate(mode):
        if mode == 'dev':
            data = dev_batches
        if mode == 'test':
            data = test_batches

        set_cost = 0.
        set_error_rate = 0.
        for batches_seen, (hypo, hm, premise, pm, truth) in enumerate(data, 1):
            _cost, _error = compute_cost(hypo, hm, premise, pm, truth)
            set_cost = (1.0 - 1.0 / batches_seen) * set_cost + \
                       1.0 / batches_seen * _cost
            set_error_rate = (1.0 - 1.0 / batches_seen) * set_error_rate + \
                             1.0 / batches_seen * _error

        return set_cost, set_error_rate

    print("Done. Evaluating scratch model ...")
    dev_set_cost, dev_set_error = evaluate('dev')
    print("BEFORE TRAINING: dev cost %f, error %f" %
          (dev_set_cost, dev_set_error))
    print("Training ...")
    try:
        for epoch in range(num_epochs):
            train_set_cost = 0.
            train_set_error = 0.
            start = time.time()

            for batches_seen, (hypo, hm, premise, pm,
                               truth) in enumerate(train_batches, 1):
                _cost, _error = train(hypo, hm, premise, pm, truth)
                train_set_cost = (1.0 - 1.0 / batches_seen) * train_set_cost + \
                                 1.0 / batches_seen * _cost
                train_set_error = (1.0 - 1.0 / batches_seen) * train_set_error + \
                                  1.0 / batches_seen * _error
                if (batches_seen * BSIZE) % 5000 == 0:
                    end = time.time()
                    print("Sample %d %.2fs, lr %.4f, train cost %f, error %f" %
                          (batches_seen * BSIZE, end - start, LR,
                           train_set_cost, train_set_error))
                    start = end

                if (batches_seen * BSIZE) % 100000 == 0:
                    dev_set_cost, dev_set_error = evaluate('dev')
                    print("***dev cost %f, error %f" %
                          (dev_set_cost, dev_set_error))

            # save parameters
            all_param_values = [p.get_value() for p in all_params]
            cPickle.dump(
                all_param_values,
                open('params' + os.sep + 'params_' + filename + '.pkl', 'wb'))

            dev_set_cost, dev_set_error = evaluate('dev')
            test_set_cost, test_set_error = evaluate('test')

            print("epoch %d, cost: train %f dev %f test %f;\n"
                  "         error train %f dev %f test %f" %
                  (epoch, train_set_cost, dev_set_cost, test_set_cost,
                   train_set_error, dev_set_error, test_set_error))
    except KeyboardInterrupt:
        pdb.set_trace()
        pass
コード例 #59
0
    def run(self, y):
        # y comes in as shape batch X total_seq
        y = y.transpose([1,0])
        # y is of shape seq X batch and of type 'int'
        # y needs to be 1-hot encoded, but this is more
        # easily done in the step function

        # reverse each example of y (not the batches, just the variables)
        y_rev = y[::-1, :]

        # get initial values for LSTMs
        hf, cf = self.forward_lstm.get_initial_hidden
        hb, cb = self.backward_lstm.get_initial_hidden

        # setup initial values for scan
        outputs_info = [dict(initial=hf, taps=[-1]), # hf
                        dict(initial=cf, taps=[-1]), # cf
                        dict(initial=hb, taps=[-1]), # cb
                        dict(initial=cb, taps=[-1])] # cb
                        
        # run LSTM loop
        [hf,cf,hb,cb], _ = theano.scan(fn=self.step,
                                       sequences=[y,y_rev],
                                       outputs_info=outputs_info,
                                       n_steps=self.N)

        # return forward and backward concatenated
        # this needs to be aligned so that [4,13,45,3,X, X, X]
        # and                              [0,0, 0, 3,45,13,4]
        # concatenate correctly to         [4/3,13/25,45/13,3/4,X,X,X]

        # stores the indices of the string
        b_indx = zeros((self.N, self.bs), int)
        # stores the last-set index
        c = zeros((self.bs,), int)
        # This loop creates an array that can be used to
        # map hb to hf with the proper alignment
        for i in range(self.N):
            # if this part of y_rev is 0, ignore
            # else, get the current index
            indx = T.switch(T.neq(y_rev[i,:], 0), i, 0)
            # set b_indx to be the current indx if this is
            # a valid part of the string
            b_indx = T.set_subtensor(b_indx[c,T.arange(self.bs)], indx)
            
            # increment those that were used
            inc = T.switch(T.neq(y_rev[i,:], 0), 1, 0)
            c  = c + inc
            
        # the magic that gets hb to align with hf
        # it takes hb, uses the aligning indices and grabs those on the
        # diagonal as the elements we are interested in. This results in
        # essentially "shifting" the first non-zero element of hb
        # to the front of the list, for each sample in the batch
        h_b_aligned = hb[b_indx][:,T.arange(self.bs),T.arange(self.bs)]
        # concatenate them together. Now everything is aligned, as it should be!
        h_lang = T.concatenate([hf, h_b_aligned], axis=2)

        # axis 0 -> N
        # axis 1 -> batch
        # axis 2 -> m
        return h_lang
コード例 #60
0
X = T.matrix()
X.tag.test_value = np.zeros((100,784),dtype='float32')
Y = T.matrix()
Y.tag.test_value = np.zeros((100,10),dtype='float32')

Q = model.mf(V=X, Y=Y)

H2 = Q[-2][-1]

hid, pen, lab = model.hidden_layers

Y_hat = lab.mf_update(state_below = H2)

true = T.argmax(Y, axis=1)
pred = T.argmax(Y_hat, axis=1)
err = T.neq(true, pred)
err_count = err.sum()

errs = function([X,Y], err_count)

total = 0

dataset = MNIST(which_set = 'train', binarize=1, one_hot=True)

for i in xrange(0, 60000, 100):
    x = dataset.X[i:i+100,:].astype(X.dtype)
    assert x.shape == (100, 784)
    y = dataset.y[i:i+100,:].astype(Y.dtype)
    assert y.shape == (100, 10)
    total += errs(x, y)