def train(trainloader,valloader,testloader,optimizers,model,epoch=10):

    total_loss,total_acc = [],[]
    for epoch_ in range(epoch):
        for step,(x,y) in enumerate(trainloader):
            with tf.GradientTape() as tape:
                # [b, 32, 32, 3] => [b, 10],前向传播
                logits = model(x)
                y_one_hot=tf.one_hot(y,depth=100)# [b]=>[b,100]
                #print('logits:',logits.shape,'y_onehot:',y_one_hot.shape)
                loss=tf.losses.categorical_crossentropy(y_one_hot,logits,from_logits=True)  #loss
                loss=tf.reduce_mean(loss)   #mean loss

            grads=tape.gradient(loss,model.trainable_variables)
            optimizers.apply_gradients(zip(grads,model.trainable_variables))

            if step%100==0:
                total_loss.append(loss)
                print('epoch {}/{}'.format(epoch_,epoch),'------step:',step,'-----loss:',float(loss))

        acc,total=0,0
        for x, y in testloader:
            acc,total=0,0
            logits = model(x)
            prob = tf.nn.softmax(logits, axis=1)
            y_pred = tf.argmax(prob, axis=1)
            y_pred = tf.cast(y_pred, dtype=tf.int32)
            currect = tf.equal(y_pred, y)
            acc += tf.reduce_sum(tf.cast(currect, dtype=tf.int32)).numpy()
            total += x.shape[0]

        print('evaluate acc', acc / total)
        total_acc.append(acc)
Esempio n. 2
0
def train_step(image_data, target):
    with tf.GradientTape() as tape:
        pred_result = model(image_data, training=True)
        giou_loss=conf_loss=prob_loss=0

        # optimizing process
        for i in range(3):
            conv, pred = pred_result[i*2], pred_result[i*2+1]
            loss_items = compute_loss(pred, conv, *target[i], i)
            giou_loss += loss_items[0]
            conf_loss += loss_items[1]
            prob_loss += loss_items[2]

        total_loss = giou_loss + conf_loss + prob_loss

        gradients = tape.gradient(total_loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        tf.print("=> STEP %4d   lr: %.6f   giou_loss: %4.2f   conf_loss: %4.2f   "
                 "prob_loss: %4.2f   total_loss: %4.2f" %(global_steps, optimizer.lr.numpy(),
                                                          giou_loss, conf_loss,
                                                        prob_loss, total_loss))

        # update learning rate
        global_steps.assign_add(1)
        if global_steps < warmup_steps:
            lr = global_steps / warmup_steps *cfg.TRAIN.LR_INIT
        else:
            lr = cfg.TRAIN.LR_END + 0.5 * (cfg.TRAIN.LR_INIT - cfg.TRAIN.LR_END) * (
                (1 + tf.cos((global_steps - warmup_steps) / (total_steps - warmup_steps) * np.pi))
            )
        tf.keras.optimizer.lr.assign(lr.numpy())
Esempio n. 3
0
 def train_step(x_train):
     
     with tf.GradientTape() as tape:   
         probabilities = model(x_train, training=True)
         loss = evaluate(probabilities)
         
     gradients_of_generator = tape.gradient(loss, model.trainable_variables)  
     opt.apply_gradients(zip(gradients_of_generator, model.trainable_variables))
     return loss
Esempio n. 4
0
def train_epoch(epoch):
    for step, (x, y) in enumerate(train_dataset):
        with tf.GradientTape() as tape:
            x = tf.reshape(x, (-1, 28 * 28))
            out = model(x)
            loss = tf.reduce_sum(tf.square(out - y) / x.shape[0])
        grads = tape.gradient(loss, model.trainable_variables)
        optimizers.apply_gradients(zip(grads, model.trainable_variables))

        if step % 100 == 0:
            print(epoch, step, 'loss', loss.numpy())
Esempio n. 5
0
def main():

    for epoch in range(30):

        for step, (x, y) in enumerate(db):

            x = tf.reshape(x, [-1, 28 * 28])

            with tf.GradientTape() as tape:

                logits = model(x)
                y_onehot = tf.one_hot(y, depth=10)
                #[b]
                loss_mse = tf.reduce_mean(tf.losses.MSE(y_onehot, logits))
                loss_ce = tf.losses.categorical_crossentropy(y_onehot,
                                                             logits,
                                                             from_logits=True)
                loss_ce = tf.reduce_mean(loss_ce)

            grads = tape.gradient(loss_ce, model.trainable_variables)
            optimizers.apply_gradients(zip(grads, model.trainable_variables))

            if step % 100 == 0:
                print(epoch, step, 'losses:', float(loss_ce), float(loss_mse))

        # test
        total_correct = 0
        total_num = 0
        for x, y in db_test:
            # x: [b, 784]
            # y:[10]
            x = tf.reshape(x, [-1, 28 * 28])

            # [b, 10]
            logits = model(x)
            # logits => prob
            prob = tf.nn.softmax(logits, axis=1)
            # [b,10] => [b]
            pred = tf.argmax(prob, axis=1)
            pred = tf.cast(pred, dtype=tf.int32)
            # pred:[b]
            # y:[b]
            # print(y.shape)
            # print(pred.shape)
            # exit()
            correct = tf.equal(pred, y)
            correct = tf.reduce_sum(tf.cast(correct, dtype=tf.int32))

            total_correct += int(correct)
            total_num += x.shape[0]

        acc = total_correct / total_num
        print(epoch, 'acc:', acc)
def main():
    for epoch in range(30):
        for step, (x, y) in enumerate(db):
            # x:[b,28,28]
            # y:[b]
            x = tf.reshape(x, [-1, 28 * 28])

            # 利用梯度对网络进行更新
            with tf.GradientTape() as tape:
                # [b,784]=>[b,10], 直接调用,实现网络的传播
                logits = model(x)
                # onehot编码
                y_onehot = tf.one_hot(y, depth=10)
                loss_mse = tf.reduce_mean(tf.losses.MSE(y_onehot, logits))
                loss_ce = tf.reduce_mean(
                    tf.losses.categorical_crossentropy(y_onehot,
                                                       logits,
                                                       from_logits=True))

            # 利用构建好的优化器直接对数据进行优化(使用交叉熵进行发现传播)
            grads = tape.gradient(loss_ce, model.trainable_variables)
            optimizers.apply_gradients(zip(grads, model.trainable_variables))

            if step % 100 == 0:
                print(epoch, step, 'loss', float(loss_ce), float(loss_mse))

        # test,做前向传播,检查正确率
        total_correct = 0
        total_num = 0
        for x, y in db_test:
            # x:[b,28,28]
            # y:[b]
            x = tf.reshape(x, [-1, 28 * 28])
            # 不需要做梯度下降,所以不需要被包围
            # [b,784]=>[b,10]
            logits = model(x)
            # logits=>概率,[b,10]
            prob = tf.nn.softmax(logits, axis=1)
            # [b,10] => [b]
            pred = tf.cast(tf.argmax(prob, axis=1), dtype=tf.int32)
            # pred:[b];y:[b]
            correct = tf.equal(pred, y)
            correct = tf.reduce_sum(tf.cast(correct, dtype=tf.int32))
            total_correct += int(correct)
            total_num += x.shape[0]

        acc = total_correct / total_num
        print(epoch, 'test acc:', acc)
Esempio n. 7
0
def main():
    for epoch in range(30):
        for step, (x, y) in enumerate(db):
            # x:[b,28,28]
            # y:[b]
            x = tf.reshape(x, [-1, 28 * 28])

            # 利用梯度对网络进行更新
            with tf.GradientTape() as tape:
                # [b,784]=>[b,10], 直接调用,实现网络的传播
                logits = model(x)
                # onehot编码
                y_onehot = tf.one_hot(y, depth=10)
                loss = tf.reduce_mean(
                    tf.losses.categorical_crossentropy(y_onehot,
                                                       logits,
                                                       from_logits=True))
                # 2.更新loss值
                loss_meter.update_state(loss)

            # 利用构建好的优化器直接对数据进行优化(使用交叉熵进行发现传播)
            grads = tape.gradient(loss, model.trainable_variables)
            optimizers.apply_gradients(zip(grads, model.trainable_variables))

            if step % 100 == 0:
                # 3.打印出来现在想要的loss
                print(step, 'loss: ', loss_meter.result().numpy())
                # 4.清除掉之前保存的准确率数据
                loss_meter.reset_states()

        # test,做前向传播,检查正确率
        # 调用之前要清空缓存
        acc_meter.reset_states()
        for x, y in db_test:
            # x:[b,28,28]
            # y:[b]
            x = tf.reshape(x, [-1, 28 * 28])
            # 不需要做梯度下降,所以不需要被包围
            # [b,784]=>[b,10]
            logits = model(x)
            # logits=>概率,[b,10]
            prob = tf.nn.softmax(logits, axis=1)
            # [b,10] => [b]
            pred = tf.cast(tf.argmax(prob, axis=1), dtype=tf.int32)
            # pred:[b];y:[b]
            acc_meter.update_state(y, pred)

        print(epoch, 'test acc:', acc_meter.result().numpy())
def main():
    for epoch in range(30):
        for step, (x_, y_) in enumerate(db):
            # x_: [b, 28*28] -> [b, 784]
            # y_: [b]
            x_ = tf.reshape(x_, [-1, 28 * 28])

            with tf.GradientTape() as tape:
                # [b, 784] -> [b, 10]
                logits = model(x_)
                y_onehot = tf.one_hot(y_, depth=10)
                loss_mse = tf.reduce_mean(tf.losses.MSE(y_onehot, logits))
                loss_cs = tf.reduce_mean(tf.losses.categorical_crossentropy(y_onehot, logits, from_logits=True))
            grads = tape.gradient(loss_cs, model.trainable_variables)
            optimizers.apply_gradients(grads_and_vars=zip(grads, model.trainable_variables))

            if step % 100 == 0:
                print(epoch, step, 'loss: ', float(loss_mse), float(loss_cs))

        # test
        total_correct = 0
        total_num = 0
        for x, y in db_test:
            # x: [b, 28, 28] => [b, 784]
            # y: [b]
            x = tf.reshape(x, [-1, 28 * 28])
            # [b, 10]
            logits = model(x)
            # logits => prob, [b, 10]
            prob = tf.nn.softmax(logits, axis=1)
            # [b, 10] => [b], int64
            pred = tf.argmax(prob, axis=1)
            pred = tf.cast(pred, dtype=tf.int32)
            # pred:[b]
            # y: [b]
            # correct: [b], True: equal, False: not equal
            correct = tf.equal(pred, y)
            correct = tf.reduce_sum(tf.cast(correct, dtype=tf.int32))

            total_correct += int(correct)
            total_num += x.shape[0]

        acc = total_correct / total_num
        print(epoch, 'test acc:', acc)
Esempio n. 9
0
xs = tf.convert_to_tensor(xs, dtype=tf.float32) / 255.
db = tf.data.Dataset.from_tensor_slices((xs, ys))
db = db.batch(batch_size).repeat(30)

model = Sequential([
    layers.Dense(256, activation='relu'),
    layers.Dense(128, activation='relu'),
    layers.Dense(10)
])
model.build(input_shape=(4, 28 * 28))
model.summary()
optimizers = optimizers.SGD(lr=0.01)
acc_meter = metrics.Accuracy()

for step, (x, y) in enumerate(db):
    with tf.GradientTape() as tap:
        x = tf.reshape(x, (-1, 28 * 28))
        out = model(x)
        y_onehot = tf.one_hot(y, depth=10)
        loss = tf.square(out - y_onehot)
        loss = tf.reduce_sum(loss) / x.shape[0]

    acc_meter.update_state(tf.argmax(out, axis=1), y)

    grads = tap.gradient(loss, model.trainable_variables)
    optimizers.apply_gradients(zip(grads, model.trainable_variables))

    if step % 200 == 0:
        print(step, 'loss', float(loss), 'acc:', acc_meter.result().numpy())
        acc_meter.reset_states()
Esempio n. 10
0
# %%
# 构建梯度记录环境
with tf.GradientTape() as tape:
    # 插入通道维度,=>[b,28,28,1]
    x = tf.expand_dims(x, axis=3)
    # 前向计算,获得10类别的预测分布,[b, 784] => [b, 10]
    out = network(x)
    # 真实标签one-hot编码,[b] => [b, 10]
    y_onehot = tf.one_hot(y, depth=10)
    # 计算交叉熵损失函数,标量
    loss = criteon(y_onehot, out)
# 自动计算梯度
grads = tape.gradient(loss, network.trainable_variables)
# 自动更新参数
optimizers.apply_gradients(zip(grads, network.trainable_variables))

# %%
# 记录预测正确的数量,总样本数量
correct, total = 0, 0
for x, y in db_test:  # 遍历所有训练集样本
    # 插入通道维度,=>[b,28,28,1]
    x = tf.expand_dims(x, axis=3)
    # 前向计算,获得10类别的预测分布,[b, 784] => [b, 10]
    out = network(x)
    # 真实的流程时先经过softmax,再argmax
    # 但是由于softmax不改变元素的大小相对关系,故省去
    pred = tf.argmax(out, axis=-1)
    y = tf.cast(y, tf.int64)
    # 统计预测正确数量
    correct += float(tf.reduce_sum(tf.cast(tf.equal(pred, y), tf.float32)))