def load_model(path, vocab_size, model_class='GRU'): """ Load a model given saved states From: https://pytorch.org/tutorials/beginner/saving_loading_models.html Default model parameters: --model=RNN --optimizer=SGD --initial_lr=1.0 --batch_size=128 --seq_len=35 --hidden_size=512 --num_layers=2 --dp_keep_prob=0.8 --num_epochs=20 --save_best --model=GRU --optimizer=ADAM --initial_lr=0.001 --batch_size=128 --seq_len=35 --hidden_size=512 --num_layers=2 --dp_keep_prob=0.5 --num_epochs=20 --save_best :return: """ # Default if model_class is 'GRU': model = GRU(emb_size=200, hidden_size=512, seq_len=35, batch_size=128, vocab_size=vocab_size, num_layers=2, dp_keep_prob=0.8) else: model = RNN(emb_size=200, hidden_size=512, seq_len=35, batch_size=128, vocab_size=vocab_size, num_layers=2, dp_keep_prob=0.8) model.load_state_dict(torch.load(path)) return model
train_data, valid_data, test_data, word_to_id, id_2_word = raw_data vocab_size = len(word_to_id) print(' vocabulary size: {}'.format(vocab_size)) ############################################################################### # # MODEL SETUP # ############################################################################### # NOTE ============================================== # This is where your model code will be called. if args.model == 'RNN': model = RNN(emb_size=args.emb_size, hidden_size=args.hidden_size, seq_len=args.seq_len, batch_size=args.batch_size, vocab_size=vocab_size, num_layers=args.num_layers, dp_keep_prob=args.dp_keep_prob) elif args.model == 'GRU': model = GRU(emb_size=args.emb_size, hidden_size=args.hidden_size, seq_len=args.seq_len, batch_size=args.batch_size, vocab_size=vocab_size, num_layers=args.num_layers, dp_keep_prob=args.dp_keep_prob) elif args.model == 'TRANSFORMER': if args.debug: # use a very small model model = TRANSFORMER(vocab_size=vocab_size, n_units=16, n_blocks=2) else: # Note that we're using num_layers and hidden_size to mean slightly # different things here than in the RNNs. # Also, the Transformer also has other hyperparameters # (such as the number of attention heads) which can change it's behavior. model = TRANSFORMER(vocab_size=vocab_size, n_units=args.hidden_size,
initial hidden states for the next mini-batch. Using the final hidden states in this way makes sense when the elements of the mini-batches are actually successive subsequences in a set of longer sequences. This is the case with the way we've processed the Penn Treebank dataset. """ if isinstance(h, Variable): return h.detach_() else: return tuple(repackage_hidden(v) for v in h) model = RNN(emb_size=args.emb_size, hidden_size=args.hidden_size, seq_len=args.seq_len, batch_size=args.batch_size, vocab_size=10000, num_layers=args.num_layers, dp_keep_prob=args.dp_keep_prob) model.load_state_dict( torch.load( "RNN_SGD_model=RNN_optimizer=SGD_initial_lr=1.0_batch_size=128_seq_len=35_hidden_size=512_num_layers=2_dp_keep_prob=0.8_num_epochs=1_save_best_0/best_params.pt" )) model.eval() def per_timestep_gradients(model, data): """ Find the per-timestep gradients """
Using the final hidden states in this way makes sense when the elements of the mini-batches are actually successive subsequences in a set of longer sequences. This is the case with the way we've processed the Penn Treebank dataset. """ if isinstance(h, Variable): return h.detach_() else: return tuple(repackage_hidden(v) for v in h) if args.model == "RNN": model = RNN(emb_size=args.emb_size, hidden_size=args.hidden_size, seq_len=args.seq_len, batch_size=10, vocab_size=10000, num_layers=args.num_layers, dp_keep_prob=0.8) model.load_state_dict( torch.load( "RNN_SGD_model=RNN_optimizer=SGD_initial_lr=1.0_batch_size=128_seq_len=35_hidden_size=512_num_layers=2_dp_keep_prob=0.8_num_epochs=20_save_best_0/best_params.pt" )) elif args.model == "GRU": model = GRU(emb_size=args.emb_size, hidden_size=args.hidden_size, seq_len=args.seq_len, batch_size=10, vocab_size=10000, num_layers=args.num_layers,
HIDDEN_SIZE = 512 BATCH_SIZE = 128 VOCAB_SIZE = 10000 NUM_LAYERS = 2 DP_KEEP_PROB = 0.8 SEQ_LEN = 35 GENERATED_SEQ_LEN = 34 #--------------- LOAD MODEL load_path = os.path.join(MODEL_PATH, 'best_params.pt') model = RNN(emb_size=EMB_SIZE, hidden_size=HIDDEN_SIZE, seq_len=SEQ_LEN, batch_size=BATCH_SIZE, vocab_size=VOCAB_SIZE, num_layers=NUM_LAYERS, dp_keep_prob=DP_KEEP_PROB) model.load_state_dict(torch.load(load_path, map_location='cpu')) hidden = model.init_hidden() model.eval() #--------------- GENERATE SAMPLES first_words = torch.LongTensor(BATCH_SIZE).random_(0, 10000) # samples = model.generate(torch.zeros(BATCH_SIZE).to(torch.long), hidden, generated_seq_len=GENERATED_SEQ_LEN) samples = model.generate(first_words, hidden, generated_seq_len=GENERATED_SEQ_LEN) #-------------- CONVERTING TO WORDS
vocab_size = len(word_to_id) print(' vocabulary size: {}'.format(vocab_size)) ############################################################################### # # MODEL SETUP # ############################################################################### # NOTE ============================================== # This is where your model code will be called. if args.model == 'RNN': model = RNN(emb_size=args.emb_size, hidden_size=args.hidden_size, seq_len=args.seq_len, batch_size=args.batch_size, vocab_size=vocab_size, num_layers=args.num_layers, dp_keep_prob=args.dp_keep_prob) elif args.model == 'GRU': model = GRU(emb_size=args.emb_size, hidden_size=args.hidden_size, seq_len=args.seq_len, batch_size=args.batch_size, vocab_size=vocab_size, num_layers=args.num_layers, dp_keep_prob=args.dp_keep_prob) elif args.model == 'TRANSFORMER': if args.debug: # use a very small model model = TRANSFORMER(vocab_size=vocab_size, n_units=16, n_blocks=2) else:
vocab_size = len(word_to_id) print(' vocabulary size: {}'.format(vocab_size)) ############################################################################### # # MODEL SETUP # ############################################################################### # NOTE ============================================== # This is where your model code will be called. if args.model == 'RNN': model = RNN(emb_size=args.emb_size, hidden_size=args.hidden_size, seq_len=args.seq_len, batch_size=args.batch_size, vocab_size=vocab_size, num_layers=args.num_layers, dp_keep_prob=args.dp_keep_prob) model.load_state_dict( torch.load( '/network/home/bhattdha/IFT6135H20_assignment/Assignment2/practical/Exp1/RNN_SGD_model=RNN_optimizer=SGD_initial_lr=1.0_batch_size=128_seq_len=35_hidden_size=512_num_layers=2_dp_keep_prob=0.8_num_epochs=20_save_best_save_dir=Exp1_0/best_params.pt' )) elif args.model == 'GRU': model = GRU(emb_size=args.emb_size, hidden_size=args.hidden_size, seq_len=args.seq_len, batch_size=args.batch_size, vocab_size=vocab_size, num_layers=args.num_layers, dp_keep_prob=args.dp_keep_prob)
print("WARNING: You are about to run on cpu, and this will likely run out \ of memory. \n You can try setting batch_size=1 to reduce memory usage") device = torch.device("cpu") ############################################################################### # # MODEL SETUP # ############################################################################### # NOTE ============================================== # This is where your model code will be called. if args.model == 'RNN': model = RNN(emb_size=args.emb_size, hidden_size=args.hidden_size, seq_len=args.seq_len, batch_size=args.batch_size, vocab_size=vocab_size, num_layers=args.num_layers, dp_keep_prob=args.dp_keep_prob) elif args.model == 'GRU': model = GRU(emb_size=args.emb_size, hidden_size=args.hidden_size, seq_len=args.seq_len, batch_size=args.batch_size, vocab_size=vocab_size, num_layers=args.num_layers, dp_keep_prob=args.dp_keep_prob) elif args.model == 'TRANSFORMER': if args.debug: # use a very small model model = TRANSFORMER(vocab_size=vocab_size, n_units=16, n_blocks=2) else: # Note that we're using num_layers and hidden_size to mean slightly # different things here than in the RNNs. # Also, the Transformer also has other hyperparameters # (such as the number of attention heads) which can change it's behavior. model = TRANSFORMER(vocab_size=vocab_size, n_units=args.hidden_size,