def main(args): np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) dataset = Dataset(args) vocab = dataset.get_vocab() if not os.path.isdir(args.output): assert not os.path.exists(args.output) os.makedirs(args.output) if args.glove is not None: glove_embeddings, glove_index = embeddings.load_glove_embeddings( path=args.glove, vocab=vocab, embedding_size=args.embedding_size, ) model_cls, training_args_map = get_model_args_list(args, vocab) logging.info('Sweeper discovered %d different model configurations' % len(training_args_map)) best_valid_ppl = float('inf') best_training_args_name = None for training_args_name, training_args_value in tqdm.tqdm( training_args_map.items(), desc='Model Configurations', bar_format=TRAINING_TQDM_BAD_FORMAT, ): model_args_value = training_args_value.copy() del model_args_value['learning_rate'] del model_args_value['momentum'] del model_args_value['profile_memory_estimation_weight'] model = model_cls(**model_args_value) if args.glove is not None: model.set_embeddings(glove_embeddings) if (args.profile_memory_attention is not None and args.init_profile_memory_weights): model.init_embeddings_weights_using_glove_index(glove_index) engine = Engine(model=model, vocab=vocab, log_interval=None, optimizer_params={ 'optim': args.optimizer, 'learning_rate': training_args_value['learning_rate'], 'momentum': training_args_value['momentum'], }, verbose=False, profile_memory_estimation_weight=training_args_value[ 'profile_memory_estimation_weight']) engine.set_checkpoint_dir( checkpoint_dir=os.path.join(args.output, training_args_name), verbose=False, ) valid_ppl = engine.full_training( num_epochs=args.num_epochs, dataset=dataset, verbose=False, ) tqdm.tqdm.write('%s: %.5f' % (training_args_name, valid_ppl)) if best_valid_ppl > valid_ppl: best_valid_ppl = valid_ppl best_training_args_name = training_args_name logging.info('Sweeping has finished with the best validation ppl %.5f' % (best_valid_ppl)) best_checkpoint_path = Engine.get_best_chechpoint( os.path.join(args.output, best_training_args_name)) logging.info( 'The best checkpoint %s. Picking up the model from there', best_checkpoint_path, ) model = model_cls.create_from_checkpoint(best_checkpoint_path, args.gpu) engine.model = model for corpus, dl in dataset.get_test_and_valid_data_loaders_map().items(): engine.valid(dl, corpus, use_progress_bar=False)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") config = get_config( os.path.join(os.path.abspath(os.path.dirname(__file__)), "config.yml")) torch.manual_seed(config["seed"]) random.seed(config["seed"]) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False MODEL_PATH = os.path.join(os.getcwd(), "models", config["model_name"] + ".pt") dataloader = Dataset(config["max_len"], config["max_size"], config["batch_size"], config["pad_token"]) train_iterator, test_iterator, valid_iterator = dataloader.get_iterator() print("Loaded iterator, generating vocab...") vocab = dataloader.get_vocab() tokenizer = dataloader.get_tokenizer() pad_idx = vocab[config["pad_token"]] input_dim = len(vocab) def train(model, iterator, optimizer, criterion): ep_loss, ep_acc = 0, 0 model.train() for labels, text, lengths in iterator: labels, text = labels.to(device), text.to(device) optimizer.zero_grad()