def training_loop(n_steps=50, cutoff=0.05, output_dir="./model/"): train_gen, eval_gen, vocab_size = generate_data(cutoff) lr_schedule = trax.lr.warmup_and_rsqrt_decay(n_warmup_steps=1000, max_value=0.01) train_task = training.TrainTask( # labeled data labeled_data=train_gen, # loss layer loss_layer=tl.CrossEntropyLoss(), # optimizer optimizer=trax.optimizers.Adam(0.01), # lr_schedule lr_schedule=lr_schedule, # n_steps n_steps_per_checkpoint=n_steps) eval_task = training.EvalTask( # labeled data labeled_data=eval_gen, # metrics metrics=[tl.CrossEntropyLoss(), tl.Accuracy()]) loop = training.Loop(ReformerLM(vocab_size, 6, mode='train'), train_task, eval_tasks=[eval_task], output_dir=output_dir) return loop
def training_loop(TransformerLM, train_gen, eval_gen, output_dir="./model"): output_dir = os.path.expanduser(output_dir) lr_schedule = trax.lr.warmup_and_rsqrt_decay(n_warmup_steps=1000, max_value=0.01) # This sets up loss function and our adam optimizer used to fit the data efficiently train_task = training.TrainTask(labeled_data=train_gen, loss_layer=tl.CrossEntropyLoss(), optimizer=trax.optimizers.Adam(0.01), lr_schedule=lr_schedule, n_steps_per_checkpoint=10) # We evaluate on a different dataset to ensure no overfitting eval_task = training.EvalTask( labeled_data=eval_gen, metrics=[tl.CrossEntropyLoss(), tl.Accuracy()]) loop = training.Loop(TransformerLM(d_model=512, d_ff=2048, n_layers=6, n_heads=8, mode='train'), train_task, eval_tasks=[eval_task], output_dir=output_dir) return loop
def _mnist_tasks(head=None): """Creates MNIST training and evaluation tasks. Args: head: Adaptor layer to put before loss and accuracy layers in the tasks. Returns: A pair (train_task, eval_task) consisting of the MNIST training task and the MNIST evaluation task using cross-entropy as loss and accuracy as metric. """ loss = tl.CrossEntropyLoss() accuracy = tl.Accuracy() if head is not None: loss = tl.Serial(head, loss) accuracy = tl.Serial(head, accuracy) task = training.TrainTask( itertools.cycle(_mnist_dataset().train_stream(1)), loss, adam.Adam(0.001), ) eval_task = training.EvalTask( itertools.cycle(_mnist_dataset().eval_stream(1)), [loss, accuracy], n_eval_batches=10, metric_names=['CrossEntropy', 'Accuracy'], ) return (task, eval_task)
def test_train_mnist(self): """Train MNIST model (almost) fully, to compare to other implementations. Evals for cross-entropy loss and accuracy are run every 50 steps; their values are visible in the test log. """ mnist_model = tl.Serial( tl.Flatten(), tl.Dense(512), tl.Relu(), tl.Dense(512), tl.Relu(), tl.Dense(10), tl.LogSoftmax(), ) task = training.TrainTask( itertools.cycle(_mnist_dataset().train_stream(1)), tl.CrossEntropyLoss(), adafactor.Adafactor(.02)) eval_task = training.EvalTask( itertools.cycle(_mnist_dataset().eval_stream(1)), [tl.CrossEntropyLoss(), tl.Accuracy()], n_eval_batches=10) training_session = training.Loop( mnist_model, task, eval_task=eval_task, eval_at=lambda step_n: step_n % 50 == 0) training_session.run(n_steps=1000) self.assertEqual(training_session.current_step, 1000)
def test_names(self): layer = tl.L2Loss() self.assertEqual('L2Loss_in3', str(layer)) layer = tl.Accuracy() self.assertEqual('Accuracy_in3', str(layer)) layer = tl.SequenceAccuracy() self.assertEqual('SequenceAccuracy_in3', str(layer)) layer = tl.CrossEntropyLoss() self.assertEqual('CrossEntropyLoss_in3', str(layer)) layer = tl.CrossEntropySum() self.assertEqual('CrossEntropySum_in3', str(layer))
def _mnist_tasks(): task = training.TrainTask( itertools.cycle(_mnist_dataset().train_stream(1)), tl.CrossEntropyLoss(), adam.Adam(0.001), ) eval_task = training.EvalTask( itertools.cycle(_mnist_dataset().eval_stream(1)), (tl.CrossEntropyLoss(), tl.Accuracy()), n_eval_batches=10, metric_names=('CrossEntropy', 'Accuracy'), ) return (task, eval_task)
def test_accuracy_even_weights(self): layer = tl.Accuracy() weights = np.array([1., 1., 1.]) targets = np.array([0, 1, 2]) model_outputs = np.array([[.7, .2, .1, 0.], [.2, .7, .1, 0.], [.2, .1, .7, 0.]]) accuracy = layer([model_outputs, targets, weights]) self.assertEqual(accuracy, 1.0) model_outputs = np.array([[.2, .1, .7, 0.], [.2, .1, .7, 0.], [.2, .1, .7, 0.]]) accuracy = layer([model_outputs, targets, weights]) self.assertEqual(accuracy, 1 / 3)
def test_accuracy_binary_classifier(self): layer = tl.Accuracy(classifier=tl.ThresholdToBinary()) targets = np.array([[0, 0, 1, 1], [1, 1, 1, 0]]) weights = np.ones_like(targets) model_outputs = np.array([[.499, .500, .501, .502], [.503, .502, .501, .500]]) accuracy = layer([model_outputs, targets, weights]) self.assertEqual(accuracy, 1.0) model_outputs = np.array([[.498, .499, .500, .501], [.502, .501, .500, .499]]) accuracy = layer([model_outputs, targets, weights]) self.assertEqual(accuracy, .75)
def test_mnist(self) -> None: trainer = TraxTrainer() trainer.load_data('mnist', tfds_dir=TestMnist.tfds_dir) trainer.load_model(get_model, False, num_classes=10) training_session = trainer.train( epochs=self.epochs, model_dir=TestMnist.model_dir, metric_emit_freq=lambda step_n: step_n % 50 == 0, metrics=[tl.CrossEntropyLoss(), tl.Accuracy()], loss=tl.CrossEntropyLoss(), optimizer=adafactor.Adafactor(.02), callbacks=None, save_directory=None) self.assertEqual(training_session.current_step, self.epochs)
def train_model(model, data_generator, batch_size=32, max_length=64, lines=lines, eval_lines=eval_lines, n_steps=1, output_dir='model/'): """Function that trains the model Args: model (trax.layers.combinators.Serial): GRU model. data_generator (function): Data generator function. batch_size (int, optional): Number of lines per batch. Defaults to 32. max_length (int, optional): Maximum length allowed for a line to be processed. Defaults to 64. lines (list, optional): List of lines to use for training. Defaults to lines. eval_lines (list, optional): List of lines to use for evaluation. Defaults to eval_lines. n_steps (int, optional): Number of steps to train. Defaults to 1. output_dir (str, optional): Relative path of directory to save model. Defaults to "model/". Returns: trax.supervised.training.Loop: Training loop for the model. """ ### START CODE HERE (Replace instances of 'None' with your code) ### bare_train_generator = data_generator(batch_size=batch_size, max_length=max_length, data_lines=lines) infinite_train_generator = itertools.cycle(bare_train_generator) bare_eval_generator = data_generator(batch_size=batch_size, max_length=max_length, data_lines=eval_lines) infinite_eval_generator = itertools.cycle(bare_eval_generator) train_task = training.TrainTask( labeled_data=infinite_train_generator, # Use infinite train data generator loss_layer=tl.CrossEntropyLoss(), # Don't forget to instantiate this object optimizer=trax.optimizers.Adam(learning_rate=0.0005) # Don't forget to add the learning rate parameter ) eval_task = training.EvalTask( labeled_data=infinite_eval_generator, # Use infinite eval data generator metrics=[tl.CrossEntropyLoss(), tl.Accuracy()], # Don't forget to instantiate these objects n_eval_batches=3 # For better evaluation accuracy in reasonable time ) training_loop = training.Loop(model, train_task, eval_task=eval_task, output_dir=output_dir) training_loop.run(n_steps=n_steps) ### END CODE HERE ### # We return this because it contains a handle to the model, which has the weights etc. return training_loop
def set_model(model, train_stream, eval_stream, output_dir): train_task = training.TrainTask(labeled_data=train_stream, loss_layer=tl.CrossEntropyLoss(), optimizer=trax.optimizers.Adam(.01), lr_schedule=trax.lr.warmup_and_rsqrt_decay( 1000, .01), n_steps_per_checkpoint=10) eval_task = training.EvalTask( labeled_data=eval_stream, metrics=[tl.CrossEntropyLoss(), tl.Accuracy()]) training_loop = training.Loop(model, train_task, eval_tasks=[eval_task], output_dir=output_dir) return training_loop
def training_loop(ReformerLM, train_gen, eval_gen, output_dir="./model/"): """ Args: ReformerLM: the Reformer language model you are building train_gen (generator): train data generator. eval_gen (generator): Validation generator. output_dir (string): Path to save the model output. Defaults to './model/'. Returns: trax.supervised.training.Loop: Training loop for the model. """ # use the warmup_and_rsqrt_decay learning rate schedule lr_schedule = trax.lr.warmup_and_rsqrt_decay(n_warmup_steps=1000, max_value=0.01) ### START CODE HERE (REPLACE INSTANCES OF 'None' WITH YOUR CODE) ### # define the train task train_task = training.TrainTask( # labeled data labeled_data=train_gen, ##None, # loss layer loss_layer=tl.CrossEntropyLoss(), ##None, # optimizer optimizer=trax.optimizers.Adam(0.01), ##None, # lr_schedule lr_schedule=lr_schedule, ##None, # n_steps n_steps_per_checkpoint=10 ##None ) # define the eval task eval_task = training.EvalTask( # labeled data labeled_data=eval_gen, ##None, # metrics metrics=[tl.CrossEntropyLoss(), tl.Accuracy()] ##None ) ### END CODE HERE ### loop = training.Loop(ReformerLM(mode='train'), train_task, eval_tasks=[eval_task], output_dir=output_dir) return loop
def training_loop(TransformerLM, train_gen, eval_gen, output_dir="~/model", d_model=512, d_ff=2048, n_layers=6, n_heads=8): """ Input: ls TransformerLM (trax.layers.combinators.Serial): The model you are building. train_gen (generator): Training stream of data. eval_gen (generator): Evaluation stream of data. output_dir (str): folder to save your file. Returns: trax.supervised.training.Loop: Training loop. """ output_dir = os.path.expanduser(output_dir) # trainer is an object lr_schedule = trax.lr.warmup_and_rsqrt_decay(n_warmup_steps=1000, max_value=0.01) train_task = training.TrainTask( labeled_data=train_gen, loss_layer=tl.CrossEntropyLoss(), # Loss function optimizer=trax.optimizers.Adam( 0.01), # Optimizer (Don't forget to set LR to 0.01) lr_schedule=lr_schedule, n_steps_per_checkpoint=10) eval_task = training.EvalTask( labeled_data=eval_gen, # The evaluation generator metrics=[tl.CrossEntropyLoss(), tl.Accuracy()] # CrossEntropyLoss and Accuracy ) loop = training.Loop(TransformerLM(d_model=d_model, d_ff=d_ff, n_layers=n_layers, n_heads=n_heads, mode='train'), train_task, eval_tasks=[eval_task], output_dir=output_dir) return loop
def test_names(self): layer = tl.L2Loss() self.assertEqual('L2Loss_in3', str(layer)) layer = tl.BinaryClassifier() self.assertEqual('BinaryClassifier', str(layer)) layer = tl.MulticlassClassifier() self.assertEqual('MulticlassClassifier', str(layer)) layer = tl.Accuracy() self.assertEqual('Accuracy_in3', str(layer)) layer = tl.SequenceAccuracy() self.assertEqual('SequenceAccuracy_in3', str(layer)) layer = tl.BinaryCrossEntropyLoss() self.assertEqual('BinaryCrossEntropyLoss_in3', str(layer)) layer = tl.CrossEntropyLoss() self.assertEqual('CrossEntropyLoss_in3', str(layer)) layer = tl.BinaryCrossEntropySum() self.assertEqual('BinaryCrossEntropySum_in3', str(layer)) layer = tl.CrossEntropySum() self.assertEqual('CrossEntropySum_in3', str(layer))
def train_model(NER, train_generator, eval_generator, train_steps=1, output_dir='model'): ''' Input: NER - the model you are building train_generator - The data generator for training examples eval_generator - The data generator for validation examples, train_steps - number of training steps output_dir - folder to save your model Output: training_loop - a trax supervised training Loop ''' ### START CODE HERE (Replace instances of 'None' with your code) ### train_task = training.TrainTask( train_generator, # A train data generator loss_layer=tl.CrossEntropyLoss(), # A cross-entropy loss function optimizer=trax.optimizers.Adam(0.01), # The adam optimizer ) eval_task = training.EvalTask( labeled_data=eval_generator, # A labeled data generator metrics=[tl.CrossEntropyLoss(), tl.Accuracy() ], # Evaluate with cross-entropy loss and accuracy n_eval_batches=10 # Number of batches to use on each evaluation ) training_loop = training.Loop( NER, # A model to train train_task, # A train task eval_task=eval_task, # The evaluation task output_dir=output_dir) # The output directory # Train with train_steps training_loop.run(n_steps=train_steps) ### END CODE HERE ### return training_loop
model = NMTAttn() # print(model) train_task = training.TrainTask( labeled_data=train_batch_data, loss_layer=tl.CrossEntropyLoss(), optimizer=trax.optimizers.Adam(0.01), lr_schedule=trax.lr.warmup_and_rsqrt_decay(1000, 0.01), n_steps_per_checkpoint=20, ) eval_task = training.EvalTask( labeled_data=eval_batch_data, metrics=[tl.CrossEntropyLoss(), tl.Accuracy()], ) output_dir = 'Nueral_Machine_Translation_With_Attention/output_dir/' model_file_path = os.path.join(output_dir,"model.pkl.gz") # # remove old model if it exists. restarts training. if os.path.exists(model_file_path): os.remove(model_file_path) # define the training loop training_loop = training.Loop(NMTAttn(mode='train'), train_task, eval_tasks=[eval_task], output_dir=output_dir) training_loop.run(3)
def f(x, u): return x return tl.Fn('DropLast', f) Latent_METRICS = { 'next_state_loss': tl.Serial(tl.Select([0, 1, 9]), tl.WeightedCategoryCrossEntropy()), # DropLast()), 'recon_state_loss': tl.Serial(tl.Select([2, 3, 10]), tl.WeightedCategoryCrossEntropy()), 'recon_action_loss': tl.Serial(tl.Select([4, 5, 11]), tl.WeightedCategoryCrossEntropy()), 'next_state_accuracy': tl.Serial(tl.Select([0, 1, 9]), tl.Accuracy()), # DropLast()), 'recon_state_accuracy': tl.Serial(tl.Select([2, 3, 10]), tl.Accuracy()), 'recon_action_accuracy': tl.Serial(tl.Select([4, 5, 11]), tl.Accuracy()), 'next_state_sequence_accuracy': tl.Serial(tl.Select([0, 1, 9]), tl.SequenceAccuracy()), # DropLast()), 'recon_state_sequence_accuracy': tl.Serial(tl.Select([2, 3, 10]), tl.SequenceAccuracy()), 'recon_action_sequence_accuracy': tl.Serial(tl.Select([4, 5, 11]), tl.SequenceAccuracy()), # 'neg_log_perplexity': Serial(WeightedCategoryCrossEntropy(), # Negate()), # 'weights_per_batch_per_core': Serial(tl.Drop(), Drop(), Sum()), }
'model_state', # Auxilliary state of the model. ]) OptState = collections.namedtuple( '_OptState', [ 'weights', # Model weights. 'slots', # Per-parameter optimizer state, e.g. gradient moments. 'opt_params', # Optimizer (hyper)parameters, e.g. learning rate, momentum. ]) _DEFAULT_METRICS = { 'loss': tl.Serial(tl.LogSoftmax(), tl.CrossEntropyLoss()), 'accuracy': tl.Accuracy(), 'sequence_accuracy': tl.SequenceAccuracy(), 'neg_log_perplexity': tl.Serial(tl.LogSoftmax(), tl.CrossEntropyLoss(), tl.Negate()), 'weights_per_batch_per_core': tl.Serial(tl.Drop(), tl.Drop(), tl.Sum()), } class Trainer: """Trax trainer. A trainer allows to make training steps, train for full epochs, save the training state and access evaluation data. """
'opt_state', # OptState. 'history', # trax.history.History. 'model_state', # Auxilliary state of the model. ]) OptState = collections.namedtuple( '_OptState', [ 'weights', # Model weights. 'slots', # Per-parameter optimizer state, e.g. gradient moments. 'opt_params', # Optimizer (hyper)parameters, e.g. learning rate, momentum. ]) _DEFAULT_METRICS = { 'loss': tl.CrossEntropyLoss(), 'accuracy': tl.Accuracy(), 'sequence_accuracy': tl.SequenceAccuracy(), 'neg_log_perplexity': tl.Serial(tl.CrossEntropyLoss(), tl.Negate()), 'weights_per_batch_per_core': tl.SumOfWeights(), } class Trainer(object): """Trax trainer. A trainer allows to make training steps, train for full epochs, save the training state and access evaluation data. """ def __init__(self, model, loss_fn,
def BERTPretrainingClsAcc(): return tl.Serial(tl.Select([0, 2, 3], n_in=6), tl.Accuracy())
def BERTPretrainingMLMAcc(): return tl.Serial(tl.Select([1, 4, 5], n_in=6), tl.Accuracy())
def test_accuracy_binary_scalar(self): layer = tl.Accuracy(classifier=tl.BinaryClassifier()) xs = [np.ones((9, 1)), np.ones((9, 1)), np.ones((9, 1))] y = layer(xs) self.assertEqual(y.shape, ())
def test_accuracy_multiclass_scalar(self): layer = tl.Accuracy(classifier=tl.MulticlassClassifier()) xs = [np.ones((9, 4, 4, 20)), np.ones((9, 4, 4)), np.ones((9, 4, 4))] y = layer(xs) self.assertEqual(y.shape, ())
def test_accuracy_scalar(self): layer = tl.Accuracy() xs = [np.ones((9, 4, 4, 20)), np.ones((9, 4, 4)), np.ones((9, 4, 4))] y = layer(xs) self.assertEqual(y.shape, ())