Esempio n. 1
0
def train_ch8(model, train_iter, vocab, lr, num_epochs, strategy, use_random_iter=False):
    """Train a model (defined in Chapter 8)."""
    params = model.params  # ---------------------------------------------------------------------------------------------------------
    with strategy.scope():

        loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
        updater = tf.keras.optimizers.Adam(lr, amsgrad=True)
    animator = d2l.Animator(xlabel='epoch', ylabel='perplexity',
                            legend=['train'], xlim=[10, num_epochs])
    # def predict(self,vocab=vocab,prefix='generated ',num_tokens=50):
    predict = lambda prefix: model.predict(vocab=vocab, prefix=prefix, num_tokens=50)
    # Train and predict
    for epoch in range(num_epochs):
        ppl, speed = train_epoch_ch8(
            model, train_iter, loss, updater, params, use_random_iter)
        if (epoch + 1) % 10 == 0:
            print(predict('time traveller'))
            animator.add(epoch + 1, [ppl])
    device = d2l.try_gpu()._device_name
    print(f'perplexity {ppl:.1f}, {speed:.1f} tokens/sec on {str(device)}')
    print(predict('time traveller'))
    print(predict('traveller'))

    device_name = d2l.try_gpu()._device_name
    strategy = tf.distribute.OneDeviceStrategy(device_name)
    num_epochs, num_hiddens, lr = 500, 512, 0.0001
    model=RnnModelScrach(num_hiddens,len(vocab))
    train_ch8(model, train_iter, vocab,lr, num_epochs, strategy)
def train_ch6(net_fn, train_iter, test_iter, num_epochs, lr,
              device=d2l.try_gpu()):
    """Train a model with a GPU (defined in Chapter 6)."""
    device_name = '/device:XLA_GPU:0'
    strategy = tf.distribute.OneDeviceStrategy(device_name)
    with strategy.scope():
        optimizer = tf.keras.optimizers.SGD(learning_rate=lr)
        loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
        net = net_fn()
        net.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
    callback = TrainCallback(net, train_iter, test_iter, num_epochs,
                             device_name)
    net.fit(train_iter, epochs=num_epochs, verbose=0, callbacks=[callback])
    return callback.loss_arr, callback.train_acc_arr, callback.test_acc_arr, net
Esempio n. 3
0
def train_ch8(model, train_iter, vocab, num_hiddens, lr, num_epochs,
              use_random_iter=False):
    """Train a model (defined in Chapter 8)."""
    params = get_params(len(vocab), num_hiddens)
    loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
    animator = d2l.Animator(xlabel='epoch', ylabel='perplexity',
                            legend=['train'], xlim=[1, num_epochs])
    updater = tf.keras.optimizers.SGD(lr)
    predict = lambda prefix: predict_ch8(prefix, 50, model, vocab, params)
    # Train and predict
    for epoch in range(num_epochs):
        ppl, speed = train_epoch_ch8(
             model, train_iter, loss, updater, params, use_random_iter)
        if epoch % 10 == 0:
            print(predict('time traveller'))
            animator.add(epoch + 1, [ppl])
    device = d2l.try_gpu()._device_name
    print(f'perplexity {ppl:.1f}, {speed:.1f} tokens/sec on {str(device)}')
    print(predict('time traveller'))
    print(predict('traveller'))
Esempio n. 4
0
            """Filters = 256, Kernel size = 5x5, activation = relu"""
            tf.keras.layers.Conv2D(filters=256, kernel_size=5, padding='same',activation='relu'),
            """maxpooling layer of size 3 and stride 2"""        
            tf.keras.layers.MaxPool2D(pool_size=3, strides=2),
            """Use three successive convolutional layers and a smaller convolution window"""
            tf.keras.layers.Conv2D(filters=384, kernel_size=3, padding='same',activation='relu'),
            tf.keras.layers.Conv2D(filters=384, kernel_size=3, padding='same',activation='relu'),
            tf.keras.layers.Conv2D(filters=256, kernel_size=3, padding='same',activation='relu'),
            tf.keras.layers.MaxPool2D(pool_size=3, strides=2),
            """Flatten out the output for dense layer"""
            tf.keras.layers.Flatten(),
            """Start MLP"""
            tf.keras.layers.Dense(4096, activation='relu'),
            tf.keras.layers.Dropout(0.5),
            tf.keras.layers.Dense(4096, activation='relu'),
            tf.keras.layers.Dropout(0.5),
            """For Fashion MNIST the output classes are 10"""
            tf.keras.layers.Dense(10)])

"""construct a single-channel data example with both height and width of 224 to observe the output shape of each layer."""
X = tf.random.uniform((1, 224, 224, 1))
for layer in net().layers:
    X = layer(X)
    print(layer.__class__.__name__, 'output shape:\t', X.shape)

""" Train the model"""
batch_size = 128
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=224)
lr, num_epochs = 0.01, 10
d2l.train_ch6(net, train_iter, test_iter, num_epochs, lr, d2l.try_gpu())