Ejemplo n.º 1
0
def training_loop(n_steps=50, cutoff=0.05, output_dir="./model/"):
    train_gen, eval_gen, vocab_size = generate_data(cutoff)

    lr_schedule = trax.lr.warmup_and_rsqrt_decay(n_warmup_steps=1000,
                                                 max_value=0.01)

    train_task = training.TrainTask(
        # labeled data
        labeled_data=train_gen,
        # loss layer
        loss_layer=tl.CrossEntropyLoss(),
        # optimizer
        optimizer=trax.optimizers.Adam(0.01),
        # lr_schedule
        lr_schedule=lr_schedule,
        # n_steps
        n_steps_per_checkpoint=n_steps)

    eval_task = training.EvalTask(
        # labeled data
        labeled_data=eval_gen,
        # metrics
        metrics=[tl.CrossEntropyLoss(), tl.Accuracy()])

    loop = training.Loop(ReformerLM(vocab_size, 6, mode='train'),
                         train_task,
                         eval_tasks=[eval_task],
                         output_dir=output_dir)

    return loop
Ejemplo n.º 2
0
def training_loop(TransformerLM, train_gen, eval_gen, output_dir="./model"):
    output_dir = os.path.expanduser(output_dir)
    lr_schedule = trax.lr.warmup_and_rsqrt_decay(n_warmup_steps=1000,
                                                 max_value=0.01)

    # This sets up loss function and our adam optimizer used to fit the data efficiently
    train_task = training.TrainTask(labeled_data=train_gen,
                                    loss_layer=tl.CrossEntropyLoss(),
                                    optimizer=trax.optimizers.Adam(0.01),
                                    lr_schedule=lr_schedule,
                                    n_steps_per_checkpoint=10)
    # We evaluate on a different dataset to ensure no overfitting
    eval_task = training.EvalTask(
        labeled_data=eval_gen, metrics=[tl.CrossEntropyLoss(),
                                        tl.Accuracy()])

    loop = training.Loop(TransformerLM(d_model=512,
                                       d_ff=2048,
                                       n_layers=6,
                                       n_heads=8,
                                       mode='train'),
                         train_task,
                         eval_tasks=[eval_task],
                         output_dir=output_dir)

    return loop
Ejemplo n.º 3
0
def _mnist_tasks(head=None):
    """Creates MNIST training and evaluation tasks.

  Args:
    head: Adaptor layer to put before loss and accuracy layers in the tasks.

  Returns:
    A pair (train_task, eval_task) consisting of the MNIST training task and the
    MNIST evaluation task using cross-entropy as loss and accuracy as metric.
  """
    loss = tl.CrossEntropyLoss()
    accuracy = tl.Accuracy()
    if head is not None:
        loss = tl.Serial(head, loss)
        accuracy = tl.Serial(head, accuracy)
    task = training.TrainTask(
        itertools.cycle(_mnist_dataset().train_stream(1)),
        loss,
        adam.Adam(0.001),
    )
    eval_task = training.EvalTask(
        itertools.cycle(_mnist_dataset().eval_stream(1)),
        [loss, accuracy],
        n_eval_batches=10,
        metric_names=['CrossEntropy', 'Accuracy'],
    )
    return (task, eval_task)
Ejemplo n.º 4
0
    def test_train_mnist(self):
        """Train MNIST model (almost) fully, to compare to other implementations.

    Evals for cross-entropy loss and accuracy are run every 50 steps;
    their values are visible in the test log.
    """
        mnist_model = tl.Serial(
            tl.Flatten(),
            tl.Dense(512),
            tl.Relu(),
            tl.Dense(512),
            tl.Relu(),
            tl.Dense(10),
            tl.LogSoftmax(),
        )
        task = training.TrainTask(
            itertools.cycle(_mnist_dataset().train_stream(1)),
            tl.CrossEntropyLoss(), adafactor.Adafactor(.02))
        eval_task = training.EvalTask(
            itertools.cycle(_mnist_dataset().eval_stream(1)),
            [tl.CrossEntropyLoss(), tl.Accuracy()],
            n_eval_batches=10)

        training_session = training.Loop(
            mnist_model,
            task,
            eval_task=eval_task,
            eval_at=lambda step_n: step_n % 50 == 0)

        training_session.run(n_steps=1000)
        self.assertEqual(training_session.current_step, 1000)
Ejemplo n.º 5
0
 def test_names(self):
     layer = tl.L2Loss()
     self.assertEqual('L2Loss_in3', str(layer))
     layer = tl.Accuracy()
     self.assertEqual('Accuracy_in3', str(layer))
     layer = tl.SequenceAccuracy()
     self.assertEqual('SequenceAccuracy_in3', str(layer))
     layer = tl.CrossEntropyLoss()
     self.assertEqual('CrossEntropyLoss_in3', str(layer))
     layer = tl.CrossEntropySum()
     self.assertEqual('CrossEntropySum_in3', str(layer))
Ejemplo n.º 6
0
def _mnist_tasks():
    task = training.TrainTask(
        itertools.cycle(_mnist_dataset().train_stream(1)),
        tl.CrossEntropyLoss(),
        adam.Adam(0.001),
    )
    eval_task = training.EvalTask(
        itertools.cycle(_mnist_dataset().eval_stream(1)),
        (tl.CrossEntropyLoss(), tl.Accuracy()),
        n_eval_batches=10,
        metric_names=('CrossEntropy', 'Accuracy'),
    )
    return (task, eval_task)
Ejemplo n.º 7
0
    def test_accuracy_even_weights(self):
        layer = tl.Accuracy()
        weights = np.array([1., 1., 1.])
        targets = np.array([0, 1, 2])

        model_outputs = np.array([[.7, .2, .1, 0.], [.2, .7, .1, 0.],
                                  [.2, .1, .7, 0.]])
        accuracy = layer([model_outputs, targets, weights])
        self.assertEqual(accuracy, 1.0)

        model_outputs = np.array([[.2, .1, .7, 0.], [.2, .1, .7, 0.],
                                  [.2, .1, .7, 0.]])
        accuracy = layer([model_outputs, targets, weights])
        self.assertEqual(accuracy, 1 / 3)
Ejemplo n.º 8
0
    def test_accuracy_binary_classifier(self):
        layer = tl.Accuracy(classifier=tl.ThresholdToBinary())
        targets = np.array([[0, 0, 1, 1], [1, 1, 1, 0]])
        weights = np.ones_like(targets)

        model_outputs = np.array([[.499, .500, .501, .502],
                                  [.503, .502, .501, .500]])
        accuracy = layer([model_outputs, targets, weights])
        self.assertEqual(accuracy, 1.0)

        model_outputs = np.array([[.498, .499, .500, .501],
                                  [.502, .501, .500, .499]])
        accuracy = layer([model_outputs, targets, weights])
        self.assertEqual(accuracy, .75)
Ejemplo n.º 9
0
 def test_mnist(self) -> None:
     trainer = TraxTrainer()
     trainer.load_data('mnist', tfds_dir=TestMnist.tfds_dir)
     trainer.load_model(get_model, False, num_classes=10)
     training_session = trainer.train(
         epochs=self.epochs,
         model_dir=TestMnist.model_dir,
         metric_emit_freq=lambda step_n: step_n % 50 == 0,
         metrics=[tl.CrossEntropyLoss(),
                  tl.Accuracy()],
         loss=tl.CrossEntropyLoss(),
         optimizer=adafactor.Adafactor(.02),
         callbacks=None,
         save_directory=None)
     self.assertEqual(training_session.current_step, self.epochs)
Ejemplo n.º 10
0
def train_model(model, data_generator, batch_size=32, max_length=64, lines=lines, eval_lines=eval_lines, n_steps=1, output_dir='model/'): 
    """Function that trains the model

    Args:
        model (trax.layers.combinators.Serial): GRU model.
        data_generator (function): Data generator function.
        batch_size (int, optional): Number of lines per batch. Defaults to 32.
        max_length (int, optional): Maximum length allowed for a line to be processed. Defaults to 64.
        lines (list, optional): List of lines to use for training. Defaults to lines.
        eval_lines (list, optional): List of lines to use for evaluation. Defaults to eval_lines.
        n_steps (int, optional): Number of steps to train. Defaults to 1.
        output_dir (str, optional): Relative path of directory to save model. Defaults to "model/".

    Returns:
        trax.supervised.training.Loop: Training loop for the model.
    """
    
    ### START CODE HERE (Replace instances of 'None' with your code) ###
    bare_train_generator = data_generator(batch_size=batch_size, max_length=max_length, data_lines=lines)
    infinite_train_generator = itertools.cycle(bare_train_generator)
    
    bare_eval_generator = data_generator(batch_size=batch_size, max_length=max_length, data_lines=eval_lines)
    infinite_eval_generator = itertools.cycle(bare_eval_generator)
   
    train_task = training.TrainTask(
        labeled_data=infinite_train_generator, # Use infinite train data generator
        loss_layer=tl.CrossEntropyLoss(),   # Don't forget to instantiate this object
        optimizer=trax.optimizers.Adam(learning_rate=0.0005)     # Don't forget to add the learning rate parameter
    )

    eval_task = training.EvalTask(
        labeled_data=infinite_eval_generator,    # Use infinite eval data generator
        metrics=[tl.CrossEntropyLoss(), tl.Accuracy()], # Don't forget to instantiate these objects
        n_eval_batches=3      # For better evaluation accuracy in reasonable time
    )
    
    training_loop = training.Loop(model,
                                  train_task,
                                  eval_task=eval_task,
                                  output_dir=output_dir)

    training_loop.run(n_steps=n_steps)
    
    ### END CODE HERE ###
    
    # We return this because it contains a handle to the model, which has the weights etc.
    return training_loop
Ejemplo n.º 11
0
def set_model(model, train_stream, eval_stream, output_dir):
    train_task = training.TrainTask(labeled_data=train_stream,
                                    loss_layer=tl.CrossEntropyLoss(),
                                    optimizer=trax.optimizers.Adam(.01),
                                    lr_schedule=trax.lr.warmup_and_rsqrt_decay(
                                        1000, .01),
                                    n_steps_per_checkpoint=10)

    eval_task = training.EvalTask(
        labeled_data=eval_stream,
        metrics=[tl.CrossEntropyLoss(), tl.Accuracy()])

    training_loop = training.Loop(model,
                                  train_task,
                                  eval_tasks=[eval_task],
                                  output_dir=output_dir)
    return training_loop
def training_loop(ReformerLM, train_gen, eval_gen, output_dir="./model/"):
    """
    Args:
        ReformerLM:  the Reformer language model you are building
        train_gen (generator): train data generator.
        eval_gen (generator): Validation generator. 
        output_dir (string): Path to save the model output. Defaults to './model/'.

    Returns:
        trax.supervised.training.Loop: Training loop for the model.
    """

    # use the warmup_and_rsqrt_decay learning rate schedule
    lr_schedule = trax.lr.warmup_and_rsqrt_decay(n_warmup_steps=1000,
                                                 max_value=0.01)

    ### START CODE HERE (REPLACE INSTANCES OF 'None' WITH YOUR CODE) ###

    # define the train task
    train_task = training.TrainTask(
        # labeled data
        labeled_data=train_gen,  ##None,
        # loss layer
        loss_layer=tl.CrossEntropyLoss(),  ##None,
        # optimizer
        optimizer=trax.optimizers.Adam(0.01),  ##None,
        # lr_schedule
        lr_schedule=lr_schedule,  ##None,
        # n_steps
        n_steps_per_checkpoint=10  ##None
    )

    # define the eval task
    eval_task = training.EvalTask(
        # labeled data
        labeled_data=eval_gen,  ##None,
        # metrics
        metrics=[tl.CrossEntropyLoss(), tl.Accuracy()]  ##None
    )

    ### END CODE HERE ###
    loop = training.Loop(ReformerLM(mode='train'),
                         train_task,
                         eval_tasks=[eval_task],
                         output_dir=output_dir)
    return loop
Ejemplo n.º 13
0
def training_loop(TransformerLM,
                  train_gen,
                  eval_gen,
                  output_dir="~/model",
                  d_model=512,
                  d_ff=2048,
                  n_layers=6,
                  n_heads=8):
    """
    Input:
ls        TransformerLM (trax.layers.combinators.Serial): The model you are building.
        train_gen (generator): Training stream of data.
        eval_gen (generator): Evaluation stream of data.
        output_dir (str): folder to save your file.

    Returns:
        trax.supervised.training.Loop: Training loop.
    """
    output_dir = os.path.expanduser(output_dir)  # trainer is an object
    lr_schedule = trax.lr.warmup_and_rsqrt_decay(n_warmup_steps=1000,
                                                 max_value=0.01)

    train_task = training.TrainTask(
        labeled_data=train_gen,
        loss_layer=tl.CrossEntropyLoss(),  # Loss function
        optimizer=trax.optimizers.Adam(
            0.01),  # Optimizer (Don't forget to set LR to 0.01)
        lr_schedule=lr_schedule,
        n_steps_per_checkpoint=10)

    eval_task = training.EvalTask(
        labeled_data=eval_gen,  # The evaluation generator
        metrics=[tl.CrossEntropyLoss(),
                 tl.Accuracy()]  # CrossEntropyLoss and Accuracy
    )

    loop = training.Loop(TransformerLM(d_model=d_model,
                                       d_ff=d_ff,
                                       n_layers=n_layers,
                                       n_heads=n_heads,
                                       mode='train'),
                         train_task,
                         eval_tasks=[eval_task],
                         output_dir=output_dir)
    return loop
Ejemplo n.º 14
0
 def test_names(self):
     layer = tl.L2Loss()
     self.assertEqual('L2Loss_in3', str(layer))
     layer = tl.BinaryClassifier()
     self.assertEqual('BinaryClassifier', str(layer))
     layer = tl.MulticlassClassifier()
     self.assertEqual('MulticlassClassifier', str(layer))
     layer = tl.Accuracy()
     self.assertEqual('Accuracy_in3', str(layer))
     layer = tl.SequenceAccuracy()
     self.assertEqual('SequenceAccuracy_in3', str(layer))
     layer = tl.BinaryCrossEntropyLoss()
     self.assertEqual('BinaryCrossEntropyLoss_in3', str(layer))
     layer = tl.CrossEntropyLoss()
     self.assertEqual('CrossEntropyLoss_in3', str(layer))
     layer = tl.BinaryCrossEntropySum()
     self.assertEqual('BinaryCrossEntropySum_in3', str(layer))
     layer = tl.CrossEntropySum()
     self.assertEqual('CrossEntropySum_in3', str(layer))
Ejemplo n.º 15
0
def train_model(NER,
                train_generator,
                eval_generator,
                train_steps=1,
                output_dir='model'):
    '''
    Input: 
        NER - the model you are building
        train_generator - The data generator for training examples
        eval_generator - The data generator for validation examples,
        train_steps - number of training steps
        output_dir - folder to save your model
    Output:
        training_loop - a trax supervised training Loop
    '''
    ### START CODE HERE (Replace instances of 'None' with your code) ###
    train_task = training.TrainTask(
        train_generator,  # A train data generator
        loss_layer=tl.CrossEntropyLoss(),  # A cross-entropy loss function
        optimizer=trax.optimizers.Adam(0.01),  # The adam optimizer
    )

    eval_task = training.EvalTask(
        labeled_data=eval_generator,  # A labeled data generator
        metrics=[tl.CrossEntropyLoss(), tl.Accuracy()
                 ],  # Evaluate with cross-entropy loss and accuracy
        n_eval_batches=10  # Number of batches to use on each evaluation
    )

    training_loop = training.Loop(
        NER,  # A model to train
        train_task,  # A train task
        eval_task=eval_task,  # The evaluation task
        output_dir=output_dir)  # The output directory

    # Train with train_steps
    training_loop.run(n_steps=train_steps)
    ### END CODE HERE ###
    return training_loop
Ejemplo n.º 16
0

model = NMTAttn()
# print(model)

train_task = training.TrainTask(
    labeled_data=train_batch_data,
    loss_layer=tl.CrossEntropyLoss(),
    optimizer=trax.optimizers.Adam(0.01),
    lr_schedule=trax.lr.warmup_and_rsqrt_decay(1000, 0.01),
    n_steps_per_checkpoint=20,
)

eval_task = training.EvalTask(
    labeled_data=eval_batch_data,
    metrics=[tl.CrossEntropyLoss(), tl.Accuracy()],
)

output_dir = 'Nueral_Machine_Translation_With_Attention/output_dir/'
model_file_path = os.path.join(output_dir,"model.pkl.gz")
# # remove old model if it exists. restarts training.
if os.path.exists(model_file_path):
    os.remove(model_file_path)

# define the training loop
training_loop = training.Loop(NMTAttn(mode='train'),
                              train_task,
                              eval_tasks=[eval_task],
                              output_dir=output_dir)

training_loop.run(3)
Ejemplo n.º 17
0
    def f(x, u):
        return x

    return tl.Fn('DropLast', f)


Latent_METRICS = {
    'next_state_loss':
    tl.Serial(tl.Select([0, 1, 9]),
              tl.WeightedCategoryCrossEntropy()),  # DropLast()),
    'recon_state_loss':
    tl.Serial(tl.Select([2, 3, 10]), tl.WeightedCategoryCrossEntropy()),
    'recon_action_loss':
    tl.Serial(tl.Select([4, 5, 11]), tl.WeightedCategoryCrossEntropy()),
    'next_state_accuracy':
    tl.Serial(tl.Select([0, 1, 9]), tl.Accuracy()),  # DropLast()),
    'recon_state_accuracy':
    tl.Serial(tl.Select([2, 3, 10]), tl.Accuracy()),
    'recon_action_accuracy':
    tl.Serial(tl.Select([4, 5, 11]), tl.Accuracy()),
    'next_state_sequence_accuracy':
    tl.Serial(tl.Select([0, 1, 9]), tl.SequenceAccuracy()),  # DropLast()),
    'recon_state_sequence_accuracy':
    tl.Serial(tl.Select([2, 3, 10]), tl.SequenceAccuracy()),
    'recon_action_sequence_accuracy':
    tl.Serial(tl.Select([4, 5, 11]), tl.SequenceAccuracy()),
    # 'neg_log_perplexity': Serial(WeightedCategoryCrossEntropy(),
    #                                 Negate()),
    # 'weights_per_batch_per_core': Serial(tl.Drop(), Drop(), Sum()),
}
Ejemplo n.º 18
0
        'model_state',  # Auxilliary state of the model.
    ])

OptState = collections.namedtuple(
    '_OptState',
    [
        'weights',  # Model weights.
        'slots',  # Per-parameter optimizer state, e.g. gradient moments.
        'opt_params',  # Optimizer (hyper)parameters, e.g. learning rate, momentum.
    ])

_DEFAULT_METRICS = {
    'loss':
    tl.Serial(tl.LogSoftmax(), tl.CrossEntropyLoss()),
    'accuracy':
    tl.Accuracy(),
    'sequence_accuracy':
    tl.SequenceAccuracy(),
    'neg_log_perplexity':
    tl.Serial(tl.LogSoftmax(), tl.CrossEntropyLoss(), tl.Negate()),
    'weights_per_batch_per_core':
    tl.Serial(tl.Drop(), tl.Drop(), tl.Sum()),
}


class Trainer:
    """Trax trainer.

  A trainer allows to make training steps, train for full epochs,
  save the training state and access evaluation data.
  """
Ejemplo n.º 19
0
        'opt_state',  # OptState.
        'history',  # trax.history.History.
        'model_state',  # Auxilliary state of the model.
    ])

OptState = collections.namedtuple(
    '_OptState',
    [
        'weights',  # Model weights.
        'slots',  # Per-parameter optimizer state, e.g. gradient moments.
        'opt_params',  # Optimizer (hyper)parameters, e.g. learning rate, momentum.
    ])

_DEFAULT_METRICS = {
    'loss': tl.CrossEntropyLoss(),
    'accuracy': tl.Accuracy(),
    'sequence_accuracy': tl.SequenceAccuracy(),
    'neg_log_perplexity': tl.Serial(tl.CrossEntropyLoss(), tl.Negate()),
    'weights_per_batch_per_core': tl.SumOfWeights(),
}


class Trainer(object):
    """Trax trainer.

  A trainer allows to make training steps, train for full epochs,
  save the training state and access evaluation data.
  """
    def __init__(self,
                 model,
                 loss_fn,
Ejemplo n.º 20
0
def BERTPretrainingClsAcc():
    return tl.Serial(tl.Select([0, 2, 3], n_in=6), tl.Accuracy())
Ejemplo n.º 21
0
def BERTPretrainingMLMAcc():
    return tl.Serial(tl.Select([1, 4, 5], n_in=6), tl.Accuracy())
Ejemplo n.º 22
0
 def test_accuracy_binary_scalar(self):
     layer = tl.Accuracy(classifier=tl.BinaryClassifier())
     xs = [np.ones((9, 1)), np.ones((9, 1)), np.ones((9, 1))]
     y = layer(xs)
     self.assertEqual(y.shape, ())
Ejemplo n.º 23
0
 def test_accuracy_multiclass_scalar(self):
     layer = tl.Accuracy(classifier=tl.MulticlassClassifier())
     xs = [np.ones((9, 4, 4, 20)), np.ones((9, 4, 4)), np.ones((9, 4, 4))]
     y = layer(xs)
     self.assertEqual(y.shape, ())
Ejemplo n.º 24
0
 def test_accuracy_scalar(self):
     layer = tl.Accuracy()
     xs = [np.ones((9, 4, 4, 20)), np.ones((9, 4, 4)), np.ones((9, 4, 4))]
     y = layer(xs)
     self.assertEqual(y.shape, ())