def train_model(model, train_data, valid_data, fields, optim): # If repeat=True, program will forever run in: 'for b, batch in enumerate(train_iter):' until you manually break train_iter = table.IO.OrderedIterator(dataset=train_data, batch_size=opt.batch_size, device=opt.gpuid[0], repeat=False) # 'train=False' will also let repeat=False. So here do not need to define repeat=False. # sort_within_batch = False: The element in batch is in ascending order according to their length. Else in descending # sort=True, order the examples of dataset. And train=False means the batch will not be Random Shuffler. # Since the order in train is (and it should be) random which is different from here, it dosen't need to define: sort=True, sort_within_batch=False valid_iter = table.IO.OrderedIterator(dataset=valid_data, batch_size=opt.batch_size, device=opt.gpuid[0], train=False, sort=True, sort_within_batch=False) # default agg_sample_rate is 0.5; default smooth_eps is 0 # Loss is sum(negative log likelihood) train_loss = table.Loss.TableLossCompute( opt.agg_sample_rate, smooth_eps=model.opt.smooth_eps).cuda() valid_loss = table.Loss.TableLossCompute( opt.agg_sample_rate, smooth_eps=model.opt.smooth_eps).cuda() trainer = table.Trainer(model, train_iter, valid_iter, train_loss, valid_loss, optim) for epoch in range(opt.start_epoch, opt.epochs + 1): print('') if opt.fix_word_vecs: if (epoch >= opt.update_word_vecs_after): model.q_encoder.embeddings.set_update(True) else: model.q_encoder.embeddings.set_update(False) # 1. Train for one epoch on the training set. train_stats = trainer.train(epoch, report_func) print('Train accuracy: %s' % train_stats.accuracy(True)) # 2. Validate on the validation set. valid_stats = trainer.validate() print('Validation accuracy: %s' % valid_stats.accuracy(True)) # 3. Log to remote server. # train_stats.log("train", logger, optim.lr, epoch) # valid_stats.log("valid", logger, optim.lr, epoch) # 4. Update the learning rate trainer.epoch_step(None, epoch) # 5. Drop a checkpoint if needed. if epoch >= opt.start_checkpoint_at: trainer.drop_checkpoint(opt, epoch, fields, valid_stats)
def train_model(model, train_data, valid_data, fields, optim): train_iter = table.IO.OrderedIterator( dataset=train_data, batch_size=opt.batch_size, device=device, repeat=True) valid_iter = table.IO.OrderedIterator( dataset=valid_data, batch_size=opt.batch_size, device=device, repeat=True, train=False, sort=True, sort_within_batch=False) train_loss = table.Loss.TableLossCompute(opt.agg_sample_rate, opt, smooth_eps=model.opt.smooth_eps) if torch.cuda.is_available(): train_loss=train_loss.cuda() valid_loss = table.Loss.TableLossCompute(opt.agg_sample_rate, opt, smooth_eps=model.opt.smooth_eps) if torch.cuda.is_available(): # TODO: should this be valid_loss.cuda() ? valid_loss=valid_loss.cuda() trainer = table.Trainer(model, train_iter, valid_iter, train_loss, valid_loss, optim) for epoch in range(opt.start_epoch, opt.epochs + 1): #for epoch in range(1): print('Started Epoch: ' + str(epoch)) print("num examples: " + str(len(train_iter))) if opt.fix_word_vecs: if (epoch >= opt.update_word_vecs_after): model.q_encoder.embeddings.set_update(True) else: model.q_encoder.embeddings.set_update(False) train_stats = 0 valid_stats = 0 #try: # 1. Train for one epoch on the training set. train_stats = trainer.train(epoch, report_func) print('Train accuracy: %s' % train_stats.accuracy(True)) # 2. Validate on the validation set. valid_stats = trainer.validate() print('Validation accuracy: %s' % valid_stats.accuracy(True)) #except: # pass # 3. Log to remote server. # train_stats.log("train", logger, optim.lr, epoch) # valid_stats.log("valid", logger, optim.lr, epoch) # 4. Update the learning rate trainer.epoch_step(None, epoch) # 5. Drop a checkpoint if needed. if epoch >= opt.start_checkpoint_at: trainer.drop_checkpoint(opt, epoch, fields, valid_stats) print('Completed Epoch: ' + str(epoch))
def train_model(model, train_data, valid_data, fields, optim): train_iter = table.IO.OrderedIterator(dataset=train_data, batch_size=opt.batch_size, device=opt.gpuid[0], repeat=False) valid_iter = table.IO.OrderedIterator(dataset=valid_data, batch_size=opt.batch_size, device=opt.gpuid[0], train=False, sort=True, sort_within_batch=False) train_loss = table.Loss.TableLossCompute( opt.agg_sample_rate, smooth_eps=model.opt.smooth_eps).cuda() valid_loss = table.Loss.TableLossCompute( opt.agg_sample_rate, smooth_eps=model.opt.smooth_eps).cuda() trainer = table.Trainer(model, train_iter, valid_iter, train_loss, valid_loss, optim) for epoch in range(opt.start_epoch, opt.epochs + 1): print('') if opt.fix_word_vecs: if (epoch >= opt.update_word_vecs_after): model.q_encoder.embeddings.set_update(True) else: model.q_encoder.embeddings.set_update(False) # 1. Train for one epoch on the training set. train_stats = trainer.train(epoch, report_func) print('Train accuracy: %s' % train_stats.accuracy(True)) # 2. Validate on the validation set. valid_stats = trainer.validate() print('Validation accuracy: %s' % valid_stats.accuracy(True)) # 3. Log to remote server. # train_stats.log("train", logger, optim.lr, epoch) # valid_stats.log("valid", logger, optim.lr, epoch) # 4. Update the learning rate trainer.epoch_step(None, epoch) # 5. Drop a checkpoint if needed. if epoch >= opt.start_checkpoint_at: trainer.drop_checkpoint(opt, epoch, fields, valid_stats)
def train(model, train_data, valid_data, fields, optim): experiment.set_model_graph(str(model)) train_iter = table.IO.OrderedIterator( dataset=train_data, batch_size=args.batch_size, device=args.gpu_id[0], repeat=False ) valid_iter = table.IO.OrderedIterator( dataset=valid_data, batch_size=args.batch_size, device=args.gpu_id[0], train=False, sort=True, sort_within_batch=False ) train_loss = table.Loss.LossCompute(smooth_eps=model.args.smooth_eps).cuda() valid_loss = table.Loss.LossCompute(smooth_eps=model.args.smooth_eps).cuda() trainer = table.Trainer(model, train_iter, valid_iter, train_loss, valid_loss, optim, summary_writer, experiment) logger.debug("Training from epoch %d, total: %d" % (args.start_epoch, args.epochs)) for epoch in range(args.start_epoch, args.epochs + 1): if args.fix_word_vecs: model.q_encoder.embeddings.set_update(epoch >= args.update_word_vecs_after) train_stats = trainer.train(epoch, fields, report_func) logger.info('Train accuracy: %s' % train_stats.accuracy(return_str=True)) for k, v in train_stats.accuracy(return_str=False).items(): summary_writer.add_scalar("train/accuracy/%s" % k, v / 100.0, trainer.global_timestep) experiment.log_metric("train/accuracy/%s" % k, v / 100.0, step=trainer.global_timestep) valid_stats = trainer.validate(epoch, fields) logger.info('Validation accuracy: %s' % valid_stats.accuracy(return_str=True)) for k, v in valid_stats.accuracy(return_str=False).items(): summary_writer.add_scalar("valid/accuracy/%s" % k, v / 100.0, trainer.global_timestep) experiment.log_metric("valid/accuracy/%s" % k, v / 100.0, step=trainer.global_timestep) # Update the learning rate trainer.epoch_step(eval_metric=None, epoch=epoch) experiment.log_epoch_end(epoch_cnt=epoch) if epoch >= args.start_checkpoint_at: trainer.drop_checkpoint(args, epoch, fields, valid_stats) logger.info('Training done') summary_writer.close()