if isinstance(h, Variable): return h.detach_() else: return tuple(repackage_hidden(v) for v in h) model = RNN(emb_size=args.emb_size, hidden_size=args.hidden_size, seq_len=args.seq_len, batch_size=args.batch_size, vocab_size=10000, num_layers=args.num_layers, dp_keep_prob=args.dp_keep_prob) model.load_state_dict( torch.load( "RNN_SGD_model=RNN_optimizer=SGD_initial_lr=1.0_batch_size=128_seq_len=35_hidden_size=512_num_layers=2_dp_keep_prob=0.8_num_epochs=1_save_best_0/best_params.pt" )) model.eval() def per_timestep_gradients(model, data): """ Find the per-timestep gradients """ start_time = time.time() hidden = model.init_hidden() hidden = hidden.to(device) costs = 0.0 iters = 0 losses = []
SEQ_LEN = 35 GENERATED_SEQ_LEN = 34 #--------------- LOAD MODEL load_path = os.path.join(MODEL_PATH, 'best_params.pt') model = RNN(emb_size=EMB_SIZE, hidden_size=HIDDEN_SIZE, seq_len=SEQ_LEN, batch_size=BATCH_SIZE, vocab_size=VOCAB_SIZE, num_layers=NUM_LAYERS, dp_keep_prob=DP_KEEP_PROB) model.load_state_dict(torch.load(load_path, map_location='cpu')) hidden = model.init_hidden() model.eval() #--------------- GENERATE SAMPLES first_words = torch.LongTensor(BATCH_SIZE).random_(0, 10000) # samples = model.generate(torch.zeros(BATCH_SIZE).to(torch.long), hidden, generated_seq_len=GENERATED_SEQ_LEN) samples = model.generate(first_words, hidden, generated_seq_len=GENERATED_SEQ_LEN) #-------------- CONVERTING TO WORDS data_path = "/Users/mlizaire/Codes/IFT6135/HW2/assignment2/data/" filename = os.path.join(data_path, "ptb.train.txt") word_2_id, id_2_word = _build_vocab(filename) sequences = []
# MODEL SETUP # ############################################################################### # NOTE ============================================== # This is where your model code will be called. if args.model == 'RNN': model = RNN(emb_size=args.emb_size, hidden_size=args.hidden_size, seq_len=args.seq_len, batch_size=args.batch_size, vocab_size=vocab_size, num_layers=args.num_layers, dp_keep_prob=args.dp_keep_prob) model.load_state_dict( torch.load( '/network/home/bhattdha/IFT6135H20_assignment/Assignment2/practical/Exp1/RNN_SGD_model=RNN_optimizer=SGD_initial_lr=1.0_batch_size=128_seq_len=35_hidden_size=512_num_layers=2_dp_keep_prob=0.8_num_epochs=20_save_best_save_dir=Exp1_0/best_params.pt' )) elif args.model == 'GRU': model = GRU(emb_size=args.emb_size, hidden_size=args.hidden_size, seq_len=args.seq_len, batch_size=args.batch_size, vocab_size=vocab_size, num_layers=args.num_layers, dp_keep_prob=args.dp_keep_prob) model.load_state_dict( torch.load( '/network/home/bhattdha/IFT6135H20_assignment/Assignment2/practical/Exp6/GRU_ADAM_model=GRU_optimizer=ADAM_initial_lr=0.001_batch_size=128_seq_len=35_hidden_size=512_num_layers=2_dp_keep_prob=0.5_num_epochs=20_save_best_save_dir=Exp6_0/best_params.pt' )) elif args.model == 'TRANSFORMER': if args.debug: # use a very small model
hidden = model.init_hidden() start = np.random.randint(1, 10000) inputs = torch.from_numpy(np.array([start])) print("Input ", id_2_word[start]) gen_seq = model.generate(inputs, hidden, 70) gen_seq = [gen.item() for gen in gen_seq] words = [id_2_word[word] for word in gen_seq] print("\\begin{spverbatim}") print(' '.join(words)) print("\\end{spverbatim}") print() """ rnn_path = "/Users/andreeadeac/Documents/PhDI/IFT6135/IFT6135H20_practical_assignments/assignment2/Assignment2/RNN_SGD_model=RNN_optimizer=SGD_initial_lr=1.0_batch_size=128_seq_len=35_hidden_size=512_num_layers=2_dp_keep_prob=0.8_num_epochs=20_save_best_0" gru_path = "/Users/andreeadeac/Documents/PhDI/IFT6135/IFT6135H20_practical_assignments/assignment2/Assignment2/GRU_ADAM_model=GRU_optimizer=ADAM_initial_lr=0.001_batch_size=128_seq_len=35_hidden_size=512_num_layers=2_dp_keep_prob=0.5_num_epochs=20_save_best_0" # --model=GRU --optimizer=ADAM --initial_lr=0.001 --batch_size=128 --seq_len=35 --hidden_size=512 --num_layers=2 --dp_keep_prob=0.5 --num_epochs=20 --save_best train_path = os.path.join(args.data, "ptb" + ".train.txt") word_to_id, id_2_word = _build_vocab(train_path) model.load_state_dict( torch.load(rnn_path + '/best_params.pt', map_location=lambda storage, loc: storage)) if args.optimizer == 'SGD_LR_SCHEDULE': lr_decay = lr_decay_base**max(0 - m_flat_lr, 0) lr = lr * lr_decay # decay lr if it is time train_ppl, train_loss = run_epoch(model, train_data, True, lr)
# different things here than in the RNNs. # Also, the Transformer also has other hyperparameters # (such as the number of attention heads) which can change it's behavior. model = TRANSFORMER(vocab_size=vocab_size, n_units=args.hidden_size, n_blocks=args.num_layers, dropout=1. - args.dp_keep_prob) # these 3 attributes don't affect the Transformer's computations; # they are only used in run_epoch model.batch_size = args.batch_size model.seq_len = args.seq_len model.vocab_size = vocab_size else: print("Model type not recognized.") model = model.to(device) model.load_state_dict(torch.load(lc_path)) #model = torch.load(map_location=torch.device('cpu')) model.eval() print(1) # print(model) ############################################################################### # # GENERATE DATA # ############################################################################### samples_id_with_inputs = [] samples_id = []