Beispiel #1
0
    def build_loss(self,
                   rot_true,
                   rot_vect_pred,
                   rot_pred,
                   class_true,
                   class_pred,
                   mask_true, 
                   mask_pred):
        pos_mask = self.rectify_scores(class_true)

        cls_cross_entropy = \
            utils.cross_entropy(pos_mask, class_pred)
        
        rot_vect_true = self.rot_to_prob(rot_true)
        rot_cross_entropy = tf.reduce_mean(
                                utils.cross_entropy(
                                    rot_vect_true, 
                                    rot_vect_pred), 
                                axis=1) * pos_mask
        rot_error = tf.reduce_sum(tf.abs(rot_pred - 
                                      rot_true) * pos_mask) \
                    / (1e-6 + tf.reduce_sum(pos_mask))

        mask_cross_entropy = \
            utils.cross_entropy(mask_true, mask_pred)
  
        mask_cross_entropy = tf.reduce_mean(
                             mask_cross_entropy, [1, 2])
        mask_cross_entropy = mask_cross_entropy * pos_mask

        cls_loss = tf.reduce_mean(cls_cross_entropy)
        rot_loss = tf.reduce_mean(rot_cross_entropy)
        mask_loss = tf.reduce_mean(mask_cross_entropy)
     
        return cls_loss, rot_loss, mask_loss, rot_error
Beispiel #2
0
    def test_cross_entropy(self):
        loss, dy = utils.cross_entropy(np.array([[0, 1]]), np.array([0]))
        self.assertTrue(np.allclose(loss, [[10]]))
        self.assertTrue(np.allclose(dy, [[-1, 1]]))

        loss, dy = utils.cross_entropy(np.array([[0, 1]]), np.array([1]))
        self.assertTrue(np.allclose(loss, [[0]]))
        self.assertTrue(np.allclose(dy, [[0, 0]]))

        loss, dy = utils.cross_entropy(np.array([[0.5, 0.5]]), np.array([1]))
        self.assertTrue(np.allclose(loss, [[-np.log(0.5)]]))
        self.assertTrue(np.allclose(dy, [[0.5, -0.5]]))
    def iteration(self, inputs, targets, state):
        caches = []
        loss = 0.0

        # ~ forward pass
        for x, y_true in zip(inputs, targets):
            y, state, cache = self.forward_pass(x, state)
            loss += u.cross_entropy(u.softmax(y), y_true)

            caches.append(cache)

        # updating loss
        loss /= inputs.shape[0]

        # ~ backward pass
        d_next = self.initial_state()
        grads = {k: np.zeros_like(v) for k, v in self.model.items()}

        for y_true, cache in reversed(list(zip(targets, caches))):
            grad, d_next = self.backward_pass(y_true, d_next, cache)

            # accumulating gradients
            for k in grads.keys():
                grads[k] += grad[k]

        # gradient clipping
        for k, v in grads.items():
            grads[k] = np.clip(v, -5., 5.)

        return grads, loss, state
 def loss_func(self):
     with tf.name_scope('Loss'):
         y_one_hot = tf.one_hot(self.y,
                                depth=self.conf.num_cls,
                                axis=4,
                                name='y_one_hot')
         if self.conf.loss_type == 'cross-entropy':
             with tf.name_scope('cross_entropy'):
                 loss = cross_entropy(y_one_hot, self.logits,
                                      self.conf.num_cls)
         elif self.conf.loss_type == 'dice':
             with tf.name_scope('dice_coefficient'):
                 loss = dice_coeff(y_one_hot, self.logits)
         with tf.name_scope('total'):
             if self.conf.use_reg:
                 with tf.name_scope('L2_loss'):
                     l2_loss = tf.reduce_sum(self.conf.lmbda * tf.stack([
                         tf.nn.l2_loss(v)
                         for v in tf.get_collection('weights')
                     ]))
                     self.total_loss = loss + l2_loss
             else:
                 self.total_loss = loss
             self.mean_loss, self.mean_loss_op = tf.metrics.mean(
                 self.total_loss)
Beispiel #5
0
    def create_optimizer(self, optimizer=None):
        model = get_collection('model')
        inputs = get_collection('inputs')

        x, y, logits, probabilities, predictions = (
            inputs['x'],
            inputs['y'],
            model['logits'],
            model['probabilities'],
            model['predictions']
        )

        with tf.name_scope('metrics'):
            xe = cross_entropy(self.n_classes, logits=logits, labels=y)
            loss = tf.reduce_mean(xe, name='loss')
            targets = tf.argmax(y, axis=1, name='targets')
            match = tf.cast(tf.equal(predictions, targets), tf.float32)
            accuracy = tf.reduce_mean(match, name='accuracy')
        add_to_collection('metrics', loss, accuracy)

        with tf.name_scope('training'):
            if optimizer is None:
                optimizer = tf.train.GradientDescentOptimizer
            opt = optimizer(inputs['learning_rate'])
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            with tf.control_dependencies(update_ops):
                training_op = opt.minimize(loss)
        add_to_collection('training', training_op)
 def _compute_loss(self):
     """Compute the loss function"""
     loss = cross_entropy(logits=self.pred, labels=self.Y)
     loss += self.reg_lam / 2 * np.linalg.norm(self.W1)**2
     loss += self.reg_lam / 2 * np.linalg.norm(self.W2)**2
     loss += self.reg_lam / 2 * np.linalg.norm(self.W3)**2
     return loss
Beispiel #7
0
 def forward(self, sample):
     logmasks, history_logsk = self.atten_net(sample)
     reconstruction_image = torch.zeros(sample.shape).to(sample.device)
     if self.train:
         loss1 = 0
         loss2 = 0
     for i in range(logmasks.shape[1]):
         logvar, mu, recon_img, logitrecon_mask = self.comp_net(sample, logmasks[:, i, :, :].unsqueeze(1))
         mask = torch.exp(logmasks[:, i, :, :].unsqueeze(1))
         if self.train:
             loss1 += torch.sum((mask * sample - mask * recon_img) ** 2 / 0.0225)
             loss2 += utils.normal_KL_div_loss(logvar, mu)
         if i == 0:
             loss3_l = logitrecon_mask
             recon_images = recon_img.unsqueeze(4)
         else:
             loss3_l = torch.cat((loss3_l, logitrecon_mask), 1)
             recon_images = torch.cat((recon_images, recon_img.unsqueeze(4)), 4)
         reconstruction_image += recon_img * mask
     logrecon_masks = torch.nn.functional.log_softmax(loss3_l, dim=1)
     if self.train:
         loss3 = utils.cross_entropy(logmasks, logrecon_masks)
         loss = loss1 + 0.25 * loss2 + 0.025 * loss3
     else:
         loss = loss1 = loss2 = loss3 = 0
     if not self.train:
         return reconstruction_image.detach(), logmasks.detach(), history_logsk.detach(), recon_images.detach()
     return reconstruction_image, logmasks, history_logsk, recon_images, loss1, loss2, loss3, loss
Beispiel #8
0
def train_model(xtrain, ytrain, args):
    # simply randomly initialze W
    iters = args.iterations
    lr = args.lr
    lamb = args.lamb

    features = xtrain.shape[1]
    class_num = ytrain.shape[1]

    # randomly initialize parameter W
    W = np.random.rand(features + 1, class_num)

    for epoch in range(iters):
        running_loss = 0
        for i, data in enumerate(data_loader(xtrain, ytrain, args.batch_size),
                                 0):
            x, y = data
            b = np.ones((x.shape[0], 1))
            x = np.append(x, b, 1)
            y_ = softmax(np.dot(x, W))
            W += (np.dot(x.T, y - y_) + lamb * W) / x.shape[0] * lr
            running_loss += cross_entropy(y, y_)
            if i % 1000 == 999:  # print every 2000 batches
                print('[%d, %5d] loss: %.6f' %
                      (epoch + 1, i + 1, running_loss / 2000))
                running_loss = 0.0
    return W
Beispiel #9
0
    def create_optimizer(self, optimizer=None):
        model = get_collection('model')
        inputs = get_collection('inputs')

        alpha, l1_ratio = self.alpha, self.l1_ratio
        (x, y, class_weights, learning_rate, theta, logits, probabilities,
         predictions) = (inputs['x'], inputs['y'], inputs['class_weights'],
                         inputs['learning_rate'], model['theta'],
                         model['logits'], model['probabilities'],
                         model['predictions'])

        with tf.name_scope('metrics'):
            xe = cross_entropy(self.n_classes, logits=logits, labels=y)
            loss = tf.reduce_mean(xe, name='loss')
            weights = tf.reduce_sum(class_weights * y, axis=1)
            weighted_loss = tf.reduce_mean(xe * weights, name='weighted_loss')
            penalty = elastic_net(theta, l1_ratio=l1_ratio)
            penalized_loss = tf.add(weighted_loss,
                                    alpha * penalty,
                                    name='penalized_loss')
            targets = tf.argmax(y, axis=1, name='targets')
            match = tf.cast(tf.equal(predictions, targets), tf.float32)
            accuracy = tf.reduce_mean(match, name='accuracy')
        add_to_collection('metrics', loss, penalized_loss, accuracy)

        with tf.name_scope('training'):
            opt = tf.train.GradientDescentOptimizer(learning_rate)
            training_op = opt.minimize(penalized_loss)
        add_to_collection('training', training_op)
Beispiel #10
0
def loss_and_gradients(x, y, params):
    """
    params: a list of the form [W, b, U, b_tag]

    returns:
        loss,[gW, gb, gU, gb_tag]

    loss: scalar
    gW: matrix, gradients of W
    gb: vector, gradients of b
    gU: matrix, gradients of U
    gb_tag: vector, gradients of b_tag
    """
    W, b, U, b_tag = params
    y_tag = softmax(classifier_output(x, params))

    loss = cross_entropy(y_tag, y)
    y_ = create_1_hot_vec(y, y_tag)

    z = np.array(x).dot(W) + b
    activation = np.tanh(z)

    gb_tag = y_tag - y_
    gU = gb_tag * activation.reshape(-1, 1)

    gb = (1 - np.power(activation, 2)) * gb_tag.dot(U.T)
    gW = gb * np.array(x).reshape(-1, 1)

    return loss, [gW, gb, gU, gb_tag]
 def evaluate(self, data):
     output = [self.feedforward(x) for x, y in data]
     cost = [utils.cross_entropy(o, d[1]) for o, d in zip(output, data)]
     results = [(np.argmax(self.feedforward(x)), np.argmax(y))
                for (x, y) in data]
     accuracy = sum(int(x == y) for (x, y) in results) / len(results)
     avg_cost = sum(cost) / len(cost)
     return (avg_cost, accuracy)
 def forward(self):
     logits = self.logits.get_data()
     labels = self.labels.get_data()
     pred = softmax(logits)
     res = cross_entropy(logits=pred, labels=labels)
     self.out.set_data_(res)
     self.pred.set_data_(pred)
     return self.out
def train(train_x, train_target, model):
    prediction = model.predict(train_x)
    loss = utils.cross_entropy(prediction, train_target)
    l1 = 0.0
    for param in model.parameters(flatten=True):
        l1 += np.abs(param)
    loss = loss + lambda1 * l1
    return loss
Beispiel #14
0
def main():

    X, T = get_facialexpression(balance_ones=True)
    # X, T  = np.shuffle(X,T)
    label_map = [
        'Anger', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral'
    ]
    # klass =3  error_rate=0.0
    # klass =4  error_rate=0.0
    # klass =5  error_rate=0.0
    # klass =0
    klass = 4
    N, D = X.shape
    X = np.concatenate(
        (np.ones((N, 1)), X),
        axis=1,
    )
    T = T.astype(np.int32)
    X = X.astype(np.float32)
    #Fix for forecasting on one image
    T = class1detect(T, detect=klass)

    D += 1

    # params
    lr = 5e-7
    max_iteration = 150
    W = np.random.randn(D) / np.sqrt(D)
    cost = []
    error = []
    for i in xrange(max_iteration):
        Y = forward(W, X)
        cost.append(cross_entropy(T, Y))
        error.append(error_rate(T, Y))

        W += lr * X.T.dot(T - Y)

        if i % 5 == 0:
            print "i=%d\tcost=%.3f\terror=%.3f" % (i, cost[-1], error[-1])

    if i % 5 == 0:
        print "i=%d\tcost=%.3f\terror=%.3f" % (i, cost[-1], error[-1])

    print "Final weight:", W
    print T
    print np.round(Y)

    plt.title('logistic regression ' + label_map[klass])
    plt.xlabel('iterations')
    plt.ylabel('cross entropy')
    plt.plot(cost)
    plt.show()

    plt.title('logistic regression ' + label_map[klass])
    plt.xlabel('iterations')
    plt.ylabel('error rate')
    plt.plot(error)
    plt.show()
    def fit(self,
            X,
            Y,
            learning_rate=1e-6,
            reg=0,
            epochs=12000,
            show_figure=False):
        X, Y = shuffle(X, Y)

        Xvalid, Yvalid = X[-1000:, :], Y[-1000:]
        X, Y = X[:-1000, :], Y[:-1000]

        K = len(set(Y))
        N, D = X.shape

        Yind_valid = np.zeros((1000, K), dtype=np.int32)
        Yind = np.zeros((N, K), dtype=np.int32)
        Yind_valid[np.arange(1000), Yvalid] = 1
        Yind[np.arange(N), Y] = 1

        self.W = np.random.randn(D, K) / np.sqrt(D + K)
        self.b = 0

        costs = []
        best_validation_error = 1
        for i in xrange(epochs):
            for j in xrange(N):
                xj = X[j, :].T
                yj = Y[j]

                yp = np.argmax((self.W.T).dot(xj), axis=0)

                # gradient descent step
                self.W[:, yj] += (xj + reg * self.W[:, yj])
                self.W[:, yp] -= (xj + reg * self.W[:, yp])
                # self.b -= learning_rate *((pY-Y).sum() 	+ reg*self.b)

                if i % 20 == 0:
                    import code
                    code.interact(local=dict(globals(), **locals()))
                    pYvalid = self.forward(Xvalid)
                    # c = sigmoid_cost(Yvalid, pYvalid)
                    c = cross_entropy(Yind_valid, pYvalid)
                    costs.append(c)
                    e = error_rate(Yvalid, pYvalid)
                    sys.stdout.write("i:%s\tcost:%.4f\terror:%.4f\t\r" %
                                     (format(i, '04d'), c, e))
                    sys.stdout.flush()
                    # print "i", i, "cost:", c, "error", e
                    if e < best_validation_error:
                        best_validation_error = e
        print "best_validation_error:", best_validation_error

        if show_figure:
            plt.plot(costs)
            plt.show()
Beispiel #16
0
def compute_rnn_loss(yhat, y):
    l = len(y)
    loss = 0
    dy = [None] * l
    for t in range(l):
        pt = utils.softmax(yhat[t])
        losst, dy[t] = utils.cross_entropy(pt, y[t])
        loss += np.sum(losst)

    return loss, dy
Beispiel #17
0
    def loss_fn(params):
        targets = inputs.pop("labels")
        token_mask = jnp.where(targets > 0, 1.0, 0.0)
        logits = model(**inputs,
                       train=True,
                       dropout_rng=dropout_rng,
                       params=params)[0]
        loss, normalizing_factor = cross_entropy(logits, targets, token_mask)

        return loss / normalizing_factor
Beispiel #18
0
    def test_backward(self):
        config = {
            'dim_hidden' : 10
          , 'len' : 2
        }
        l = RNN(config)
        l.accept([26])
        x = [np.zeros([26])] * 2
        x[0][0] = 1.0
        x[1][1] = 1.0
         
        y = l.forward(x)


        dy = [None] * 2
        loss, dy[0] = utils.cross_entropy(utils.softmax(y[0]), np.array([0]))
        loss, dy[1] = utils.cross_entropy(utils.softmax(y[1]), np.array([1]))
        
        dW, dU, dV = l.backward(dy)
Beispiel #19
0
    def fit(self,
            X,
            Y,
            learning_rate=1e-8,
            reg=1e-12,
            epochs=10000,
            show_fig=False):

        D = X.shape[1]  # number of features
        K = len(set(Y))  # number of classes

        X, Y = shuffle(X, Y)
        X_valid, Y_valid = X[-1000:], Y[-1000:]
        T_valid = one_hot_encoder(Y_valid)
        X, Y = X[:-1000], Y[:-1000]

        T = one_hot_encoder(Y)

        self.W1 = np.random.randn(D, self.M) / np.sqrt(D)
        self.b1 = np.zeros(self.M)
        self.W2 = np.random.randn(self.M, K) / np.sqrt(self.M)
        self.b2 = np.zeros(K)

        costs = []
        best_validation_error = 1
        for epoch in range(epochs):
            Y_hat, Z = self.forward(X)

            # Weight updates ----------------------
            Y_hat_T = Y_hat - T
            self.W2 -= learning_rate * (Z.T.dot(Y_hat_T) + reg * self.W2)
            self.b2 -= learning_rate * (Y_hat_T.sum() + reg * self.b2)

            val = Y_hat_T.dot(self.W2.T) * (1 - Z * Z)  #tanh
            self.W1 -= learning_rate * (X.T.dot(val) + reg * self.W1)
            self.b1 -= learning_rate * (val.sum() + reg * self.b1)
            # -------------------------------------

            if epoch % 10 == 0:
                Y_hat_valid, _ = self.forward(X_valid)
                c = cross_entropy(T_valid, Y_hat_valid)
                costs.append(c)
                e = error_rate(Y_valid, np.argmax(Y_hat_valid, axis=1))
                print("epoch:", epoch, "cost:", c, "error:", e)
                if e < best_validation_error:
                    best_validation_error = e
        print("best_validation_error:", best_validation_error)

        if show_fig:
            plt.plot(costs)
            plt.title('Validation cost')

        print("Final train classification_rate:",
              self.score(Y, self.predict(Y_hat)))
Beispiel #20
0
 def loss(self, g_gen, p_gen, x_logits, x, g_inf, p_inf, p_inf_x, M_prev):
     # Calculate loss function, separately for each component because you might want to reweight contributions later                
     # L_p_gen is squared error loss between inferred grounded location and grounded location retrieved from inferred abstract location
     L_p_g = torch.sum(torch.stack(utils.squared_error(p_inf, p_gen), dim=0), dim=0)
     # L_p_inf is squared error loss between inferred grounded location and grounded location retrieved from sensory experience
     L_p_x = torch.sum(torch.stack(utils.squared_error(p_inf, p_inf_x), dim=0), dim=0) if self.hyper['use_p_inf'] else torch.zeros_like(L_p_g)
     # L_g is squared error loss between generated abstract location and inferred abstract location
     L_g = torch.sum(torch.stack(utils.squared_error(g_inf, g_gen), dim=0), dim=0)         
     # L_x is a cross-entropy loss between sensory experience and different model predictions. First get true labels from sensory experience
     labels = torch.argmax(x, 1)            
     # L_x_gen: losses generated by generative model from g_prev -> g -> p -> x
     L_x_gen = utils.cross_entropy(x_logits[2], labels)
     # L_x_g: Losses generated by generative model from g_inf -> p -> x
     L_x_g = utils.cross_entropy(x_logits[1], labels)
     # L_x_p: Losses generated by generative model from p_inf -> x
     L_x_p = utils.cross_entropy(x_logits[0], labels)
     # L_reg are regularisation losses, L_reg_g on L2 norm of g
     L_reg_g = torch.sum(torch.stack([torch.sum(g ** 2, dim=1) for g in g_inf], dim=0), dim=0)
     # And L_reg_p regularisation on L1 norm of p
     L_reg_p = torch.sum(torch.stack([torch.sum(torch.abs(p), dim=1) for p in p_inf], dim=0), dim=0)
     # Return total loss as list of losses, so you can possibly reweight them
     L = [L_p_g, L_p_x, L_x_gen, L_x_g, L_x_p, L_g, L_reg_g, L_reg_p]
     return L
    def criterion(self,t,targets_old,outputs,targets):
        # TODO: warm-up of the new layer (paper reports that improves performance, but negligible)

        # Knowledge distillation loss for all previous tasks
        loss_dist=0
        for t_old in range(0,t):
            loss_dist+=utils.cross_entropy(outputs[t_old],targets_old[t_old],exp=1/self.T)

        # Cross entropy loss
        loss_ce=self.ce(outputs[t],targets)

        # We could add the weight decay regularization mentioned in the paper. However, this might not be fair/comparable to other approaches

        return loss_ce+self.lamb*loss_dist
Beispiel #22
0
    def total_loss(self, data):
        """
        给定测试数据,计算模型的loss
        :param data: 用于测试的验证数据集或测试数据集
        :return: 模型的loss
        """
        loss = 0.0
        for x, y in data:
            a = self.predict(x)
            loss += utils.cross_entropy(a, y) / len(data)
        # 加上L2正则化项
        loss += 0.5 * (self.reg_lambda / len(data)) * sum(
            np.linalg.norm(w)**2 for w in self.weights)

        return loss
    def fit(self,
            X,
            Y,
            learning_rate=1e-8,
            reg=1e-12,
            epochs=10000,
            show_fig=False):

        D = X.shape[1]  # number of features
        K = len(set(Y))  # number of classes

        X, Y = shuffle(X, Y)
        X_valid, Y_valid = X[-1000:], Y[-1000:]
        T_valid = one_hot_encoder(Y_valid)
        X, Y = X[:-1000], Y[:-1000]

        T = one_hot_encoder(Y)

        self.W = np.random.randn(D, K) / np.sqrt(D)
        self.b = np.zeros(K)

        costs = []
        best_validation_error = 1
        for epoch in range(epochs):
            Y_hat = self.forward(X)

            self.W -= learning_rate * (self.dJ_dw(T, Y_hat, X) + reg * self.W)
            self.b -= learning_rate * (self.dJ_db(T, Y_hat) + reg * self.b)

            if epoch % 100 == 0:
                Y_hat_valid = self.forward(X_valid)
                c = cross_entropy(T_valid, Y_hat_valid)
                costs.append(c)
                e = error_rate(Y_valid, np.argmax(Y_hat_valid, axis=1))
                print("epoch:", epoch, "cost:", c, "error:", e)
                if e < best_validation_error:
                    best_validation_error = e
        print("best_validation_error:", best_validation_error)

        if show_fig:
            plt.plot(costs)
            plt.title('Validation cost')
            plt.show()
        print("Final train classification_rate:", self.score(X, Y))
Beispiel #24
0
def loss_and_gradients(x, y, params):
    """
    Compute the loss and the gradients at point x with given parameters.
    y is a scalar indicating the correct label.

    returns:
        loss,[gW,gb]

    loss: scalar
    gW: matrix, gradients of W
    gb: vector, gradients of b
    """
    # we put the softmax here for the loss calc, because in prediction its redundant to calc the prob
    y_tag = softmax(classifier_output(x, params))
    loss = cross_entropy(y_tag, y)
    y_ = create_1_hot_vec(y, y_tag)
    gb = y_tag - y_
    gW = gb * np.array(x).reshape(-1, 1)
    return loss, [gW, gb]
def main():
	user_action=3
	X, T  = get_ecommerce(user_action=user_action)
	# X, T  = np.shuffle(X,T)

	N, D  = X.shape 
	X 		= np.concatenate((np.ones((N,1)), X), axis=1, ) 
	T = T.astype(np.int32)
	X = X.astype(np.float32)
	D+=1

	# params
	lr = 5e-4
	max_iteration=1000
	W  		= np.random.randn(D) / np.sqrt(D)
	cost 	= []
	error = [] 
	for i in xrange(max_iteration):
		Y = forward(W, X)
		cost.append(cross_entropy(T,Y))
		error.append(error_rate(T,Y))

		W += lr*X.T.dot(T-Y)

		if i % 5 == 0:
			print "i=%d\tcost=%.3f\terror=%.3f" % (i,cost[-1],error[-1])

	if i % 5 == 0:
			print "i=%d\tcost=%.3f\terror=%.3f" % (i,cost[-1],error[-1])
					
	print "Final weight:", W 	

	plt.title('logistic regression user_action=%d' % (user_action))
	plt.xlabel('iterations')
	plt.ylabel('cross entropy')
	plt.plot(cost)
	plt.show()

	plt.title('logistic regression user_action=%d' % (user_action))
	plt.xlabel('iterations')
	plt.ylabel('error rate')
	plt.plot(error)
	plt.show()
Beispiel #26
0
    def __init__(self, input_channels=3, n_classes=2):
        tf.reset_default_graph()
        self.n_classes = n_classes

        self.x = tf.placeholder(dtype=tf.float32, shape=[None, None, None, input_channels])
        self.y = tf.placeholder(dtype=tf.float32, shape=[None, None, None, self.n_classes])

        logits = model_build.build(self.x, self.n_classes)

        self.loss = self._get_loss(logits)
        self.cross_entropy = tf.reduce_mean(utils.cross_entropy(tf.reshape(self.y, [-1, self.n_classes]),
                                                                tf.reshape(utils.pixel_wise_softmax_2(logits),
                                                                           [-1, self.n_classes])))

        # 计算像素级别的互熵损失
        self.predicter = utils.pixel_wise_softmax_2(logits)
        # 每个类别的正确与否
        self.correct_pred = tf.equal(tf.argmax(self.predicter, 3), tf.argmax(self.y, 3))
        # 计算所有类别的准确率
        self.accuracy = tf.reduce_mean(tf.cast(self.correct_pred, tf.float32))
    def fit(self,
            X,
            Y,
            learning_rate=1e-6,
            reg=0,
            epochs=12000,
            show_figure=False):
        X, Y = shuffle(X, Y)
        Xvalid, Yvalid = X[-1000:, :], Y[-1000:]
        X, Y = X[:-1000, :], Y[:-1000]

        N, D = X.shape
        self.W = np.random.randn(D) / np.sqrt(D)
        self.b = 0

        costs = []
        best_validation_error = 1
        for i in xrange(epochs):
            pY = self.forward(X)
            # gradient descent step
            self.W -= learning_rate * (X.T.dot(pY - Y) + reg * self.W)
            self.b -= learning_rate * ((pY - Y).sum() + reg * self.b)

            if i % 20 == 0:
                pYvalid = self.forward(Xvalid)
                # c = sigmoid_cost(Yvalid, pYvalid)
                c = cross_entropy(Yvalid, pYvalid)
                costs.append(c)
                e = error_rate(Yvalid, pYvalid)
                sys.stdout.write("i:%s\tcost:%.4f\terror:%.4f\t\r" %
                                 (format(i, '04d'), c, e))
                sys.stdout.flush()
                # print "i", i, "cost:", c, "error", e
                if e < best_validation_error:
                    best_validation_error = e
        print "best_validation_error:", best_validation_error

        if show_figure:
            plt.plot(costs)
            plt.show()
Beispiel #28
0
def loss_and_gradients(x, y, params):
    """
    params: a list as created by create_classifier(...)

    returns:
        loss,[gW1, gb1, gW2, gb2, ...]

    loss: scalar
    gW1: matrix, gradients of W1
    gb1: vector, gradients of b1
    gW2: matrix, gradients of W2
    gb2: vector, gradients of b2
    ...
    (of course, if we request a linear classifier (ie, params is of length 2),
    you should not have gW2 and gb2.)
    """
    grads = []
    y_tag = softmax(classifier_output(x, params))
    loss = cross_entropy(y_tag, y)
    y_ = create_1_hot_vec(y, y_tag)

    all_z = [x]
    all_activation = []
    for (W, b) in params:
        all_z.append(np.array(all_z[-1]).dot(W) + b)
        all_activation.append(np.tanh(all_z[-1]))

    gb = y_tag - y_
    gW = gb * all_activation[-2].reshape(-1, 1)
    grads.append([gW, gb])
    # We start from -2 because for every n layers we have n-1 calculation when we already
    # calculating the first one before the for loop
    for i, layer in enumerate(params[:-1][::-2]):
        cur_ind = len(params) - 2 - i
        gb = (1 - np.power(all_activation[cur_ind], 2)) * grads[-1][1].dot(
            params[cur_ind + 1][0].T)
        gW = gb * all_z[cur_ind].reshape(-1, 1)
        grads.append([gW, gb])

    return loss, grads
Beispiel #29
0
    def fit(self, x_train, y_train, max_epochs, learning_rate=0.002):
        self.initialize()  # initialize all weights and biases of all layers
        x_axis = []
        y_axis = []
        for i in range(max_epochs):
            index = 0
            loss = 0
            # stochastic gradient descent with batch size = 1
            for x in x_train:
                self.layers[0].values = x
                self.layers[0].output = x
                y = y_train[index]
                self.forward_prop()
                self.back_prop(y, learning_rate)
                y_pred = self.layers[self.num_layers - 1].output
                loss += cross_entropy(y, y_pred)
                index += 1
                # print("in no. : %d, loss: %f" % (index, loss))
            x_axis.append(i)
            y_axis.append(loss)
            print("iter no. : %d, loss: %f" % (i, loss))

        return x_axis, y_axis
Beispiel #30
0
    def backward(self, Y, cache):
        self._gradients = {
            key: [
                np.zeros_like(self._gradients[key][d])
                for d in range(len(self._gradients[key]))
            ]
            for key in self._gradients.keys()
        }
        (x, a, y_hat, dropout) = cache

        for t in reversed(range(self._cell_length)):
            self._loss += sum([
                cross_entropy(y_hat[t][b, :], Y[b, t])
                for b in range(self._batch_size)
            ]) / (self._cell_length * self._batch_size)
            dy = np.array([
                cross_entropy_d(y_hat[t][b, :], Y[b, t])
                for b in range(self._batch_size)
            ]) / (self._cell_length * self._batch_size)

            self._gradients['dW_ay'][0] += np.dot(x[self._depth_size][t].T, dy)
            self._gradients['db_y'][0] += dy.sum(axis=0)
            da = np.dot(dy, self._parameters['W_ay'][0].T)

            for d in reversed(range(self._depth_size)):
                da = (1 - a[d][t]**2) * (da * dropout[d][t] +
                                         self._gradients['da'][d])
                self._gradients['dW_xa'][d] += np.dot(x[d][t].T, da)
                self._gradients['dW_aa'][d] += np.dot(a[d][t - 1].T, da)
                self._gradients['db_a'][d] += da.sum(axis=0)
                self._gradients['da'][d] = np.dot(
                    da, self._parameters['W_aa'][d].T)
                da = np.dot(da, self._parameters['W_xa'][d].T)

        self._parameters['a'] = [
            a[d][self._cell_length - 1] for d in range(self._depth_size)
        ]
Beispiel #31
0
	def sgd(self, data, iterations, learning_rate, initial_momentum, final_momentum, minibatch=10, annealing=None, max_epoch_without_improvement=30, early_stop=True):
		"""
		Performes an Stochastic gradient descent (SGD) optimisation of the network.
		"""
		
		m = data.shape[1]
		data = data.T
		
		###########################################################################################
		# Initialisation of the weights and bias
		
		# Copy the weights and biases into a state vector theta
		weights = []
		biases = []
		for jj in range(self.mid * 2):
			weights.append(copy.copy(self.layers[jj].weights))
			biases.append(self.layers[jj].hidden_biases) 
			
		theta, indices, weights_shape, biases_shape = self._roll(weights, biases)
		del weights, biases
		
		###########################################################################################
		v_mom = 0
		best_cost = 1e8
		
		batch_indices = np.arange(m)
		n_minibatches = np.int(np.ceil(m/minibatch))
		
		gamma = initial_momentum
		
		for epoch in range(iterations):
			np.random.shuffle(batch_indices)			
			for ibatch in range(n_minibatches+1):
				ids = batch_indices[ibatch*minibatch:(ibatch+1)*minibatch]
				batch = data[:,ids]
				
				_, thetan = self.cost(theta, indices, weights_shape, biases_shape,
				0, 0, 0, batch, cost_fct='cross-entropy', log_cost=False)
				v_mom = gamma * v_mom + learning_rate * thetan
				theta -= v_mom
			
			actual = self.feedforward(data.T)
			cost = utils.cross_entropy(data.T, actual)
			print 'Epoch %4d/%4d:\t%e' % (epoch+1, iterations, cost)

			self.train_history.append(cost)
			
			if cost <= best_cost :
				best_cost = cost
				iter_best = epoch
				
			if epoch - iter_best > max_epoch_without_improvement :
				print 'STOP: %d epoches without improvment' % max_epoch_without_improvement
				break
			
			
			if annealing is not None:
				learning_rate /= (1. + float(epoch) / annealing)
				
			if epoch > 100:
				gamma = final_momentum
			else:
				gamma = initial_momentum + (final_momentum - initial_momentum) * utils.sigmoid(epoch - 50)
			print learning_rate, gamma
			
		###########################################################################################
		# Unroll the state vector and saves it to self.	
		for jj in range(self.mid * 2):
			w, b = self._unroll(theta, jj, indices, weights_shape, biases_shape)
			
			self.layers[jj].weights = w
			self.layers[jj].hidden_biases = b
			
		# We're done !
		self.is_trained = True