def train(trainloader,valloader,testloader,optimizers,model,epoch=10): total_loss,total_acc = [],[] for epoch_ in range(epoch): for step,(x,y) in enumerate(trainloader): with tf.GradientTape() as tape: # [b, 32, 32, 3] => [b, 10],前向传播 logits = model(x) y_one_hot=tf.one_hot(y,depth=100)# [b]=>[b,100] #print('logits:',logits.shape,'y_onehot:',y_one_hot.shape) loss=tf.losses.categorical_crossentropy(y_one_hot,logits,from_logits=True) #loss loss=tf.reduce_mean(loss) #mean loss grads=tape.gradient(loss,model.trainable_variables) optimizers.apply_gradients(zip(grads,model.trainable_variables)) if step%100==0: total_loss.append(loss) print('epoch {}/{}'.format(epoch_,epoch),'------step:',step,'-----loss:',float(loss)) acc,total=0,0 for x, y in testloader: acc,total=0,0 logits = model(x) prob = tf.nn.softmax(logits, axis=1) y_pred = tf.argmax(prob, axis=1) y_pred = tf.cast(y_pred, dtype=tf.int32) currect = tf.equal(y_pred, y) acc += tf.reduce_sum(tf.cast(currect, dtype=tf.int32)).numpy() total += x.shape[0] print('evaluate acc', acc / total) total_acc.append(acc)
def train_step(image_data, target): with tf.GradientTape() as tape: pred_result = model(image_data, training=True) giou_loss=conf_loss=prob_loss=0 # optimizing process for i in range(3): conv, pred = pred_result[i*2], pred_result[i*2+1] loss_items = compute_loss(pred, conv, *target[i], i) giou_loss += loss_items[0] conf_loss += loss_items[1] prob_loss += loss_items[2] total_loss = giou_loss + conf_loss + prob_loss gradients = tape.gradient(total_loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) tf.print("=> STEP %4d lr: %.6f giou_loss: %4.2f conf_loss: %4.2f " "prob_loss: %4.2f total_loss: %4.2f" %(global_steps, optimizer.lr.numpy(), giou_loss, conf_loss, prob_loss, total_loss)) # update learning rate global_steps.assign_add(1) if global_steps < warmup_steps: lr = global_steps / warmup_steps *cfg.TRAIN.LR_INIT else: lr = cfg.TRAIN.LR_END + 0.5 * (cfg.TRAIN.LR_INIT - cfg.TRAIN.LR_END) * ( (1 + tf.cos((global_steps - warmup_steps) / (total_steps - warmup_steps) * np.pi)) ) tf.keras.optimizer.lr.assign(lr.numpy())
def train_step(x_train): with tf.GradientTape() as tape: probabilities = model(x_train, training=True) loss = evaluate(probabilities) gradients_of_generator = tape.gradient(loss, model.trainable_variables) opt.apply_gradients(zip(gradients_of_generator, model.trainable_variables)) return loss
def train_epoch(epoch): for step, (x, y) in enumerate(train_dataset): with tf.GradientTape() as tape: x = tf.reshape(x, (-1, 28 * 28)) out = model(x) loss = tf.reduce_sum(tf.square(out - y) / x.shape[0]) grads = tape.gradient(loss, model.trainable_variables) optimizers.apply_gradients(zip(grads, model.trainable_variables)) if step % 100 == 0: print(epoch, step, 'loss', loss.numpy())
def main(): for epoch in range(30): for step, (x, y) in enumerate(db): x = tf.reshape(x, [-1, 28 * 28]) with tf.GradientTape() as tape: logits = model(x) y_onehot = tf.one_hot(y, depth=10) #[b] loss_mse = tf.reduce_mean(tf.losses.MSE(y_onehot, logits)) loss_ce = tf.losses.categorical_crossentropy(y_onehot, logits, from_logits=True) loss_ce = tf.reduce_mean(loss_ce) grads = tape.gradient(loss_ce, model.trainable_variables) optimizers.apply_gradients(zip(grads, model.trainable_variables)) if step % 100 == 0: print(epoch, step, 'losses:', float(loss_ce), float(loss_mse)) # test total_correct = 0 total_num = 0 for x, y in db_test: # x: [b, 784] # y:[10] x = tf.reshape(x, [-1, 28 * 28]) # [b, 10] logits = model(x) # logits => prob prob = tf.nn.softmax(logits, axis=1) # [b,10] => [b] pred = tf.argmax(prob, axis=1) pred = tf.cast(pred, dtype=tf.int32) # pred:[b] # y:[b] # print(y.shape) # print(pred.shape) # exit() correct = tf.equal(pred, y) correct = tf.reduce_sum(tf.cast(correct, dtype=tf.int32)) total_correct += int(correct) total_num += x.shape[0] acc = total_correct / total_num print(epoch, 'acc:', acc)
def main(): for epoch in range(30): for step, (x, y) in enumerate(db): # x:[b,28,28] # y:[b] x = tf.reshape(x, [-1, 28 * 28]) # 利用梯度对网络进行更新 with tf.GradientTape() as tape: # [b,784]=>[b,10], 直接调用,实现网络的传播 logits = model(x) # onehot编码 y_onehot = tf.one_hot(y, depth=10) loss_mse = tf.reduce_mean(tf.losses.MSE(y_onehot, logits)) loss_ce = tf.reduce_mean( tf.losses.categorical_crossentropy(y_onehot, logits, from_logits=True)) # 利用构建好的优化器直接对数据进行优化(使用交叉熵进行发现传播) grads = tape.gradient(loss_ce, model.trainable_variables) optimizers.apply_gradients(zip(grads, model.trainable_variables)) if step % 100 == 0: print(epoch, step, 'loss', float(loss_ce), float(loss_mse)) # test,做前向传播,检查正确率 total_correct = 0 total_num = 0 for x, y in db_test: # x:[b,28,28] # y:[b] x = tf.reshape(x, [-1, 28 * 28]) # 不需要做梯度下降,所以不需要被包围 # [b,784]=>[b,10] logits = model(x) # logits=>概率,[b,10] prob = tf.nn.softmax(logits, axis=1) # [b,10] => [b] pred = tf.cast(tf.argmax(prob, axis=1), dtype=tf.int32) # pred:[b];y:[b] correct = tf.equal(pred, y) correct = tf.reduce_sum(tf.cast(correct, dtype=tf.int32)) total_correct += int(correct) total_num += x.shape[0] acc = total_correct / total_num print(epoch, 'test acc:', acc)
def main(): for epoch in range(30): for step, (x, y) in enumerate(db): # x:[b,28,28] # y:[b] x = tf.reshape(x, [-1, 28 * 28]) # 利用梯度对网络进行更新 with tf.GradientTape() as tape: # [b,784]=>[b,10], 直接调用,实现网络的传播 logits = model(x) # onehot编码 y_onehot = tf.one_hot(y, depth=10) loss = tf.reduce_mean( tf.losses.categorical_crossentropy(y_onehot, logits, from_logits=True)) # 2.更新loss值 loss_meter.update_state(loss) # 利用构建好的优化器直接对数据进行优化(使用交叉熵进行发现传播) grads = tape.gradient(loss, model.trainable_variables) optimizers.apply_gradients(zip(grads, model.trainable_variables)) if step % 100 == 0: # 3.打印出来现在想要的loss print(step, 'loss: ', loss_meter.result().numpy()) # 4.清除掉之前保存的准确率数据 loss_meter.reset_states() # test,做前向传播,检查正确率 # 调用之前要清空缓存 acc_meter.reset_states() for x, y in db_test: # x:[b,28,28] # y:[b] x = tf.reshape(x, [-1, 28 * 28]) # 不需要做梯度下降,所以不需要被包围 # [b,784]=>[b,10] logits = model(x) # logits=>概率,[b,10] prob = tf.nn.softmax(logits, axis=1) # [b,10] => [b] pred = tf.cast(tf.argmax(prob, axis=1), dtype=tf.int32) # pred:[b];y:[b] acc_meter.update_state(y, pred) print(epoch, 'test acc:', acc_meter.result().numpy())
def main(): for epoch in range(30): for step, (x_, y_) in enumerate(db): # x_: [b, 28*28] -> [b, 784] # y_: [b] x_ = tf.reshape(x_, [-1, 28 * 28]) with tf.GradientTape() as tape: # [b, 784] -> [b, 10] logits = model(x_) y_onehot = tf.one_hot(y_, depth=10) loss_mse = tf.reduce_mean(tf.losses.MSE(y_onehot, logits)) loss_cs = tf.reduce_mean(tf.losses.categorical_crossentropy(y_onehot, logits, from_logits=True)) grads = tape.gradient(loss_cs, model.trainable_variables) optimizers.apply_gradients(grads_and_vars=zip(grads, model.trainable_variables)) if step % 100 == 0: print(epoch, step, 'loss: ', float(loss_mse), float(loss_cs)) # test total_correct = 0 total_num = 0 for x, y in db_test: # x: [b, 28, 28] => [b, 784] # y: [b] x = tf.reshape(x, [-1, 28 * 28]) # [b, 10] logits = model(x) # logits => prob, [b, 10] prob = tf.nn.softmax(logits, axis=1) # [b, 10] => [b], int64 pred = tf.argmax(prob, axis=1) pred = tf.cast(pred, dtype=tf.int32) # pred:[b] # y: [b] # correct: [b], True: equal, False: not equal correct = tf.equal(pred, y) correct = tf.reduce_sum(tf.cast(correct, dtype=tf.int32)) total_correct += int(correct) total_num += x.shape[0] acc = total_correct / total_num print(epoch, 'test acc:', acc)
xs = tf.convert_to_tensor(xs, dtype=tf.float32) / 255. db = tf.data.Dataset.from_tensor_slices((xs, ys)) db = db.batch(batch_size).repeat(30) model = Sequential([ layers.Dense(256, activation='relu'), layers.Dense(128, activation='relu'), layers.Dense(10) ]) model.build(input_shape=(4, 28 * 28)) model.summary() optimizers = optimizers.SGD(lr=0.01) acc_meter = metrics.Accuracy() for step, (x, y) in enumerate(db): with tf.GradientTape() as tap: x = tf.reshape(x, (-1, 28 * 28)) out = model(x) y_onehot = tf.one_hot(y, depth=10) loss = tf.square(out - y_onehot) loss = tf.reduce_sum(loss) / x.shape[0] acc_meter.update_state(tf.argmax(out, axis=1), y) grads = tap.gradient(loss, model.trainable_variables) optimizers.apply_gradients(zip(grads, model.trainable_variables)) if step % 200 == 0: print(step, 'loss', float(loss), 'acc:', acc_meter.result().numpy()) acc_meter.reset_states()
# %% # 构建梯度记录环境 with tf.GradientTape() as tape: # 插入通道维度,=>[b,28,28,1] x = tf.expand_dims(x, axis=3) # 前向计算,获得10类别的预测分布,[b, 784] => [b, 10] out = network(x) # 真实标签one-hot编码,[b] => [b, 10] y_onehot = tf.one_hot(y, depth=10) # 计算交叉熵损失函数,标量 loss = criteon(y_onehot, out) # 自动计算梯度 grads = tape.gradient(loss, network.trainable_variables) # 自动更新参数 optimizers.apply_gradients(zip(grads, network.trainable_variables)) # %% # 记录预测正确的数量,总样本数量 correct, total = 0, 0 for x, y in db_test: # 遍历所有训练集样本 # 插入通道维度,=>[b,28,28,1] x = tf.expand_dims(x, axis=3) # 前向计算,获得10类别的预测分布,[b, 784] => [b, 10] out = network(x) # 真实的流程时先经过softmax,再argmax # 但是由于softmax不改变元素的大小相对关系,故省去 pred = tf.argmax(out, axis=-1) y = tf.cast(y, tf.int64) # 统计预测正确数量 correct += float(tf.reduce_sum(tf.cast(tf.equal(pred, y), tf.float32)))