def train(): from models import model_factory from dataloaders import dataloader_factory from trainers import trainer_factory from pruners import pruner_factory from utils import * from utils import scatterplot from torch.utils.tensorboard import SummaryWriter from torchvision import datasets, transforms export_root = setup_train(args) test_result_root = 'experiments/testresults' test_result_title = export_root[12:] test_result_title += '.txt' model = model_factory(args) train_loader, val_loader, test_loader = dataloader_factory(args) pruner = pruner_factory(args, model) trainer = trainer_factory(args, model, train_loader, val_loader, test_loader, export_root, pruner) #load_pretrained_weights(model, './experiments/ml-1m.pth') trainer.train() trainer.test() if args.prune: trainer.prune() #pruner.print_mask(model) #pruner.print_percentage(model) i = 0 test_result = trainer.test() save_test_result(export_root, test_result) save_test_result(test_result_root, test_result, test_result_title) print(test_result_root)
def train(): export_root = setup_train(args) train_loader, val_loader, test_loader = dataloader_factory(args) model = model_factory(args) trainer = trainer_factory(args, model, train_loader, val_loader, test_loader, export_root) trainer.train()
def train(args): local_export_root, remote_export_root, communicator = setup_train(args, MACHINE_IS_HOST) assert (communicator is None and MACHINE_IS_HOST) or (communicator is not None and not MACHINE_IS_HOST) if communicator: communicator.close() # close station because it might lose connection during long training train_loader, val_loader, test_loader = dataloader_factory(args) model = model_factory(args) trainer = trainer_factory(args, model, train_loader, val_loader, test_loader, local_export_root) status_file = os.path.join(local_export_root, 'status.txt') error_log_file = os.path.join(local_export_root, 'error_log.txt') open(status_file, 'w').write(STATUS_RUNNING) try: trainer.train() open(status_file, 'w').write(STATUS_FINISHED) if not MACHINE_IS_HOST and args.experiment_group != 'test': communicator = Communicator(HOST, PORT, USERNAME, PASSWORD) communicator.upload_dir(local_export_root, remote_export_root) communicator.close() except Exception as err: # recover if args.experiment_group == 'test': raise if not os.path.exists(os.path.join(local_export_root, 'tables', 'val_log.csv')): print('Removing empty local export root') shutil.rmtree(local_export_root) raise open(status_file, 'w').write(STATUS_RECOVERY) open(error_log_file, 'w').write(str(err)) if not MACHINE_IS_HOST and args.experiment_group != 'test': print('Uploading recovery file') communicator = Communicator(HOST, PORT, USERNAME, PASSWORD) communicator.upload_dir(local_export_root, remote_export_root) communicator.close() raise
def train(model_args): export_root = setup_train(model_args) train_loader, val_loader, test_loader = dataloader_factory(model_args) model = model_factory(model_args) trainer = trainer_factory(model_args, model, train_loader, val_loader, test_loader, export_root) if model_args.mode == 'train': trainer.train() trainer.test()
def train(): export_root = setup_train(args) fix_random_seed_as(args.model_init_seed) train_loader, val_loader, test_loader = dataloader_factory(args) model = model_factory(args) trainer = trainer_factory(args, model, train_loader, val_loader, test_loader, export_root) trainer.train()
def validate(args, mode='val'): local_export_root, remote_export_root, communicator = setup_train(args, MACHINE_IS_HOST) if communicator: communicator.close() train_loader, val_loader, test_loader = dataloader_factory(args) model = model_factory(args) if args.pretrained_weights is not None: model.load(args.pretrained_weights) trainer = trainer_factory(args, model, train_loader, val_loader, test_loader, local_export_root) trainer.just_validate(mode)
def train(): export_root = setup_train(args) train_loader, val_loader, test_loader = dataloader_factory(args) model = model_factory(args) trainer = trainer_factory(args, model, train_loader, val_loader, test_loader, export_root) trainer.train() test_model = (input('Test model with test dataset? y/[n]: ') == 'y') if test_model: trainer.test()
def train(): export_root = setup_train(args) model = model_factory(args) train_loader, val_loader, test_loader = dataloader_factory(args) pruner = pruner_factory(args, model) trainer = trainer_factory(args, model, train_loader, val_loader, test_loader, export_root, pruner) #trainer.train() print("Model's state_dict:") for param_tensor in model.bert.state_dict(): print(param_tensor, "\t", model.bert.state_dict()[param_tensor].size()) # Print optimizer's state_dict print("Optimizer's state_dict:") for var_name in trainer.optimizer.state_dict(): print(var_name, "\t", trainer.optimizer.state_dict()[var_name]) torch.save(model, './initmodel.pth')
def evaluate(): export_root = setup_train(args) meta, train_loader, val_loader, test_loader = dataloader_factory(args) model = model_factory(args, meta) trainer = trainer_factory(args, model, train_loader, val_loader, test_loader, export_root) path = args.eval_model_path load_pretrained_weights(model, path) average_meter_set = AverageMeterSet() for batch in test_loader: with torch.no_grad(): batch = [x.to(trainer.device) for x in batch] metrics = trainer.calculate_metrics(batch) for k, v in metrics.items(): average_meter_set.update(k, v) print(average_meter_set.averages())