def main(args): # 数据加载 (x_train, y_train), (x_test, y_test) = load_cifar(args.cifar_root) # 随机选择训练样本 train_num = x_train.shape[0] def next_batch(batch_size): idx = np.random.choice(train_num, batch_size) return x_train[idx], y_train[idx] # 网络 vgg = VGG(image_size=32, name='vgg11') opt = RmsProp(vgg.weights, lr=args.lr, decay=1e-3) # 加载权重 if args.checkpoint: weights = load_weights(args.checkpoint) vgg.load_weights(weights) print("load weights done") # 评估 if args.eval_only: indices = np.random.choice(len(x_test), args.eval_num, replace=False) print('{} start evaluate'.format( time.asctime(time.localtime(time.time())))) acc = get_accuracy(vgg, x_test[indices], ys=y_test[indices]) print('{} acc on test dataset is :{:.3f}'.format( time.asctime(time.localtime(time.time())), acc)) return # 训练 num_steps = args.steps for step in range(num_steps): x, y_true = next_batch(args.batch_size) # 前向传播 y_predict = vgg.forward(x.astype(np.float)) # print('y_pred: min{},max{},mean:{}'.format(np.min(y_predict, axis=-1), # np.max(y_predict, axis=-1), # np.mean(y_predict, axis=-1))) # print('y_pred: {}'.format(y_predict)) acc = np.mean( np.argmax(y_predict, axis=1) == np.argmax(y_true, axis=1)) # 计算loss loss, gradient = cross_entropy_loss(y_predict, y_true) # 反向传播 vgg.backward(gradient) # 更新梯度 opt.iterate(vgg) # 打印信息 print('{} step:{},loss:{:.4f},acc:{:.4f}'.format( time.asctime(time.localtime(time.time())), step, loss, acc)) # 保存权重 if step % 100 == 0: save_weights( os.path.join(args.save_dir, 'weights-{:03d}.pkl'.format(step)), vgg.weights)
def backward(self, train_data, y_true): loss, self.gradients["A3"] = losses.cross_entropy_loss(self.nodes["A3"], y_true) self.gradients["W3"], self.gradients["B3"], self.gradients["Z2"] = \ layer.fc_backward(self.gradients["A3"], self.Parameters["W3"], self.nodes["Z2"]) self.gradients["A2"] = activations.relu_backward(self.gradients["Z2"].T, self.nodes["A2"]) self.gradients["W2"], self.gradients["B2"], self.gradients["Z1"] = \ layer.fc_backward(self.gradients["A2"], self.Parameters["W2"], self.nodes["Z1"]) self.gradients["A1"] = activations.relu_backward(self.gradients["Z1"].T, self.nodes["A1"]) self.gradients["W1"], self.gradients["B1"], self.gradients["Z1"] = \ layer.fc_backward(self.gradients["A1"], self.Parameters["W1"], self.nodes["X2"]) self.gradients["Z1"] = self.gradients["Z1"].reshape((128, 16, 5, 5)) self.gradients["Maxpool2"] = layer.max_pooling_backward(self.gradients["Z1"], self.nodes["Conv2"], (2, 2)) self.gradients["K2"], self.gradients["Kb2"], self.gradients["KZ2"] = \ layer.conv_backward(self.gradients["Maxpool2"], self.Parameters["K2"], self.nodes["Maxpool1"]) self.gradients["Maxpool1"] = \ layer.max_pooling_backward(self.gradients["KZ2"], self.nodes["Conv1"], (2, 2)) self.gradients["K1"], self.gradients["Kb1"], self.gradients["KZ1"] = \ layer.conv_backward(self.gradients["Maxpool1"], self.Parameters["K1"], train_data) return loss
def testCrossEntropyLossAllWrongWithWeight(self): with self.test_session(): logits = tf.constant([[10.0, 0.0, 0.0], [0.0, 10.0, 0.0], [0.0, 0.0, 10.0]]) labels = tf.constant([[0, 0, 1], [1, 0, 0], [0, 1, 0]]) loss = losses.cross_entropy_loss(logits, labels, weight=0.5) self.assertEquals(loss.op.name, 'CrossEntropyLoss/value') self.assertAlmostEqual(loss.eval(), 5.0, 3)
def backward(self, train_data, y_true): loss, self.gradients["y"] = cross_entropy_loss(self.nurons["y"], y_true) self.gradients["W3"], self.gradients["b3"], self.gradients["z3_relu"] = fc_backward(self.gradients["y"], self.weights["W3"], self.nurons["z3_relu"]) self.gradients["z3"] = relu_backward(self.gradients["z3_relu"], self.nurons["z3"]) self.gradients["W2"], self.gradients["b2"], self.gradients["z2_relu"] = fc_backward(self.gradients["z3"], self.weights["W2"], self.nurons["z2_relu"]) self.gradients["z2"] = relu_backward(self.gradients["z2_relu"], self.nurons["z2"]) self.gradients["W1"], self.gradients["b1"], _ = fc_backward(self.gradients["z2"], self.weights["W1"], train_data) return loss
def backward(self, train_data, y_true): loss, self.gradients["A3"] = losses.cross_entropy_loss( self.nodes["A3"], y_true) self.gradients["W3"], self.gradients["B3"], self.gradients["Z2"] = \ layer.fc_backward(self.gradients["A3"], self.Parameters["W3"], self.nodes["Z2"]) self.gradients["A2"] = activations.relu_backward( self.gradients["Z2"].T, self.nodes["A2"]) self.gradients["W2"], self.gradients["B2"], self.gradients["Z1"] = \ layer.fc_backward(self.gradients["A2"], self.Parameters["W2"], self.nodes["Z1"]) self.gradients["A1"] = activations.relu_backward( self.gradients["Z1"].T, self.nodes["A1"]) self.gradients["W1"], self.gradients["B1"], self.gradients["Z1"] = \ layer.fc_backward(self.gradients["A1"], self.Parameters["W1"], train_data) return loss
def backward_pass(self, layer_activations, targets): ''' Return the deltas for each layer of the network; deltas are as defined in theory of Michael Nielson's book ''' # Backward propagation : calculate the errors and gradients deltas = [None] * (len(self.layer_sizes)) # we assume that loss is always cross entropy and last layer is a softmax layer deltas[-1] = losses.cross_entropy_loss(layer_activations[-1], targets, deriv=True) # start the iteration from the second last layer for layer in range(len(deltas) - 2, 0, -1): deltas[layer] = np.dot(deltas[layer + 1], self.weight_matrices[layer].T) * activation( layer_activations[layer], type=self.non_lins[layer], deriv=True) return deltas
def make_train_op(self): masks = self.tf_placeholders['dir_masks'] logits, pred_regs, mask_logits = self.make_rpn_op(training=True) labels = self.tf_placeholders['labels'] gt_regs = self.tf_placeholders['bbox_regs'] weights = self.tf_placeholders['weights'] learning_rate = self.tf_placeholders['learning_rate'] batch_size = self.cfg.batch_size clf_losses, reg_losses = [], [] for i in self.block_layers: # classification loss (object vs background) logits_i = tf.reshape(logits[i], [batch_size, -1, 2]) labels_i = tf.maximum(labels[i], 0) cross_entropy_loss = losses.cross_entropy_loss(logits=logits_i, labels=labels_i, weights=weights[i]) # regression loss reg_loss = losses.reg_loss(gt_regs[i], pred_regs[i], labels[i]) reg_loss = self.cfg.reg_weight * reg_loss clf_losses.append(cross_entropy_loss) reg_losses.append(reg_loss) sem_seg_loss, dir_seg_loss = losses.mask_loss(mask_logits, masks) mask_loss = self.cfg.sem_seg_weight * tf.reduce_mean(sem_seg_loss) \ + self.cfg.dir_seg_weight * tf.reduce_mean(dir_seg_loss) solver = tf.train.AdamOptimizer(learning_rate, epsilon=1e-8) # regular_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) # loss = cross_entropy_loss + reg_loss + tf.add_n(regular_losses) clf_loss = tf.add_n(clf_losses) reg_loss = tf.add_n(reg_losses) loss = clf_loss + reg_loss + mask_loss update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): self.train_op = solver.minimize(loss, global_step=self.global_step) self.loss_op = [clf_loss, reg_loss, mask_loss]
def score(self, x, target, metric='accuracy'): y = self.predict(x) if metric == 'accuracy': return accuracy(y, target) elif metric == 'loss': return losses.cross_entropy_loss(y, target)
def pretrain(opts): # dataset and iterator dataset = Dataset(opts.rawdata_path) dataset_train = dataset.get_dataset(opts) iterator = tf.data.Iterator.from_structure(dataset_train.output_types, dataset_train.output_shapes) v_a, v_b, label = iterator.get_next() # network outputs, training = get_network(tf.concat((v_a, v_b), axis=0), opts) # save and load saver = tf.train.Saver(var_list=tf.get_collection( tf.GraphKeys.GLOBAL_VARIABLES, scope=opts.network)) # loss loss, accuracy = cross_entropy_loss(outputs, label, opts) # summary writer_train = tf.summary.FileWriter( os.path.join(opts.output_path, opts.time, 'logs'), tf.get_default_graph()) summary_op = tf.summary.merge_all() # optimizer global_step = tf.Variable(0, trainable=False) lr = tf.train.exponential_decay(opts.lr, global_step, dataset_train.length * 67, 0.1, staircase=True) train_op = tf.train.AdamOptimizer(learning_rate=lr).minimize( loss, global_step=global_step, colocate_gradients_with_ops=True) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) train_op = tf.group(update_ops + [train_op]) # main loop with tf.Session(config=tf.ConfigProto(log_device_placement=False, allow_soft_placement=True)) as sess: sess.run(tf.global_variables_initializer()) print('training loop start') start_train = time.clock() for epoch in range(1, opts.epochs + 1): print('epoch: %d' % epoch) start_ep = time.clock() # train print('training') sess.run(iterator.make_initializer(dataset_train)) while True: try: summary_train, _ = sess.run([summary_op, train_op], feed_dict={training: True}) writer_train.add_summary( summary_train, tf.train.global_step(sess, global_step)) except tf.errors.OutOfRangeError: break print('step: %d' % tf.train.global_step(sess, global_step)) # save model if epoch % opts.save_freq == 0 or epoch == opts.epochs: print('save model') saver.save( sess, os.path.join(opts.output_path, opts.time, 'pretrainmodel.ckpt')) print("epoch end, elapsed time: %ds, total time: %ds" % (time.clock() - start_train, time.clock() - start_ep)) print('training loop end') writer_train.close()