Esempio n. 1
0
    if isinstance(h, Variable):
        return h.detach_()
    else:
        return tuple(repackage_hidden(v) for v in h)


model = RNN(emb_size=args.emb_size,
            hidden_size=args.hidden_size,
            seq_len=args.seq_len,
            batch_size=args.batch_size,
            vocab_size=10000,
            num_layers=args.num_layers,
            dp_keep_prob=args.dp_keep_prob)

model.load_state_dict(
    torch.load(
        "RNN_SGD_model=RNN_optimizer=SGD_initial_lr=1.0_batch_size=128_seq_len=35_hidden_size=512_num_layers=2_dp_keep_prob=0.8_num_epochs=1_save_best_0/best_params.pt"
    ))
model.eval()


def per_timestep_gradients(model, data):
    """
    Find the per-timestep gradients
    """
    start_time = time.time()
    hidden = model.init_hidden()
    hidden = hidden.to(device)
    costs = 0.0
    iters = 0
    losses = []
Esempio n. 2
0
SEQ_LEN = 35
GENERATED_SEQ_LEN = 34

#--------------- LOAD MODEL


load_path = os.path.join(MODEL_PATH, 'best_params.pt')
model = RNN(emb_size=EMB_SIZE,
                    hidden_size=HIDDEN_SIZE,
                    seq_len=SEQ_LEN,
                    batch_size=BATCH_SIZE,
                    vocab_size=VOCAB_SIZE,
                    num_layers=NUM_LAYERS,
                    dp_keep_prob=DP_KEEP_PROB)

model.load_state_dict(torch.load(load_path, map_location='cpu'))
hidden = model.init_hidden()
model.eval()

#--------------- GENERATE SAMPLES

first_words = torch.LongTensor(BATCH_SIZE).random_(0, 10000)
# samples = model.generate(torch.zeros(BATCH_SIZE).to(torch.long), hidden, generated_seq_len=GENERATED_SEQ_LEN)
samples = model.generate(first_words, hidden, generated_seq_len=GENERATED_SEQ_LEN)

#-------------- CONVERTING TO WORDS
data_path  = "/Users/mlizaire/Codes/IFT6135/HW2/assignment2/data/"
filename = os.path.join(data_path, "ptb.train.txt")
word_2_id, id_2_word = _build_vocab(filename)

sequences = []
# MODEL SETUP
#
###############################################################################

# NOTE ==============================================
# This is where your model code will be called.
if args.model == 'RNN':
    model = RNN(emb_size=args.emb_size,
                hidden_size=args.hidden_size,
                seq_len=args.seq_len,
                batch_size=args.batch_size,
                vocab_size=vocab_size,
                num_layers=args.num_layers,
                dp_keep_prob=args.dp_keep_prob)
    model.load_state_dict(
        torch.load(
            '/network/home/bhattdha/IFT6135H20_assignment/Assignment2/practical/Exp1/RNN_SGD_model=RNN_optimizer=SGD_initial_lr=1.0_batch_size=128_seq_len=35_hidden_size=512_num_layers=2_dp_keep_prob=0.8_num_epochs=20_save_best_save_dir=Exp1_0/best_params.pt'
        ))
elif args.model == 'GRU':
    model = GRU(emb_size=args.emb_size,
                hidden_size=args.hidden_size,
                seq_len=args.seq_len,
                batch_size=args.batch_size,
                vocab_size=vocab_size,
                num_layers=args.num_layers,
                dp_keep_prob=args.dp_keep_prob)
    model.load_state_dict(
        torch.load(
            '/network/home/bhattdha/IFT6135H20_assignment/Assignment2/practical/Exp6/GRU_ADAM_model=GRU_optimizer=ADAM_initial_lr=0.001_batch_size=128_seq_len=35_hidden_size=512_num_layers=2_dp_keep_prob=0.5_num_epochs=20_save_best_save_dir=Exp6_0/best_params.pt'
        ))
elif args.model == 'TRANSFORMER':
    if args.debug:  # use a very small model
Esempio n. 4
0
    hidden = model.init_hidden()
    start = np.random.randint(1, 10000)
    inputs = torch.from_numpy(np.array([start]))
    print("Input ", id_2_word[start])

    gen_seq = model.generate(inputs, hidden, 70)
    gen_seq = [gen.item() for gen in gen_seq]
    words = [id_2_word[word] for word in gen_seq]
    print("\\begin{spverbatim}")
    print(' '.join(words))
    print("\\end{spverbatim}")
    print()
"""
rnn_path = "/Users/andreeadeac/Documents/PhDI/IFT6135/IFT6135H20_practical_assignments/assignment2/Assignment2/RNN_SGD_model=RNN_optimizer=SGD_initial_lr=1.0_batch_size=128_seq_len=35_hidden_size=512_num_layers=2_dp_keep_prob=0.8_num_epochs=20_save_best_0"

gru_path = "/Users/andreeadeac/Documents/PhDI/IFT6135/IFT6135H20_practical_assignments/assignment2/Assignment2/GRU_ADAM_model=GRU_optimizer=ADAM_initial_lr=0.001_batch_size=128_seq_len=35_hidden_size=512_num_layers=2_dp_keep_prob=0.5_num_epochs=20_save_best_0"
#                                                                                                      --model=GRU --optimizer=ADAM --initial_lr=0.001 --batch_size=128 --seq_len=35 --hidden_size=512 --num_layers=2 --dp_keep_prob=0.5  --num_epochs=20 --save_best

train_path = os.path.join(args.data, "ptb" + ".train.txt")
word_to_id, id_2_word = _build_vocab(train_path)

model.load_state_dict(
    torch.load(rnn_path + '/best_params.pt',
               map_location=lambda storage, loc: storage))

if args.optimizer == 'SGD_LR_SCHEDULE':
    lr_decay = lr_decay_base**max(0 - m_flat_lr, 0)
    lr = lr * lr_decay  # decay lr if it is time

train_ppl, train_loss = run_epoch(model, train_data, True, lr)
Esempio n. 5
0
        # different things here than in the RNNs.
        # Also, the Transformer also has other hyperparameters
        # (such as the number of attention heads) which can change it's behavior.
        model = TRANSFORMER(vocab_size=vocab_size, n_units=args.hidden_size,
                            n_blocks=args.num_layers, dropout=1. - args.dp_keep_prob)
    # these 3 attributes don't affect the Transformer's computations;
    # they are only used in run_epoch
    model.batch_size = args.batch_size
    model.seq_len = args.seq_len
    model.vocab_size = vocab_size
else:
    print("Model type not recognized.")


model = model.to(device)
model.load_state_dict(torch.load(lc_path))


#model = torch.load(map_location=torch.device('cpu'))
model.eval()

print(1)
# print(model)

###############################################################################
#
# GENERATE DATA
#
###############################################################################
samples_id_with_inputs = []
samples_id = []