def __init__(self, net, train_iter, test_iter, num_epochs, device_name): self.timer = d2l.Timer() self.animator = d2l.Animator( xlabel='epoch', xlim=[0, num_epochs], legend=[ 'train loss', 'train acc', 'test acc']) self.net = net self.train_iter = train_iter self.test_iter = test_iter self.num_epochs = num_epochs self.device_name = device_name self.loss_arr = [] self.train_acc_arr = [] self.test_acc_arr = []
def train_epoch_ch8(model, train_iter, loss, updater, params, use_random_iter): """Train a model within one epoch (defined in Chapter 8).""" state, timer = None, d2l.Timer() metric = d2l.Accumulator(2) # Sum of training loss, no. of tokens for X, Y in train_iter: if state is None or use_random_iter: # Initialize `state` when either it is the first iteration or # using random sampling state = model._init_state(batch_size=X.shape[0]) with tf.GradientTape(persistent=True) as g: g.watch(params) y_hat, state = model(X, state) y = tf.reshape(Y, (-1)) l = loss(y, y_hat) grads = g.gradient(l, params) grads = grad_clipping(grads, 0.5) updater.apply_gradients(zip(grads, params)) # Keras loss by default returns the average loss in a batch # l_sum = l * float(tf.size(y).numpy()) if isinstance( # loss, tf.keras.losses.Loss) else tf.reduce_sum(l) metric.add(l * tf.size(y).numpy(), tf.size(y).numpy()) return math.exp(metric[0] / metric[1]), metric[1] / timer.stop()