Ejemplo n.º 1
0
                    hidden_size=HIDDEN_SIZE,
                    seq_len=SEQ_LEN,
                    batch_size=BATCH_SIZE,
                    vocab_size=VOCAB_SIZE,
                    num_layers=NUM_LAYERS,
                    dp_keep_prob=DP_KEEP_PROB)

model.load_state_dict(torch.load(load_path, map_location='cpu'))
hidden = model.init_hidden()
model.eval()

#--------------- GENERATE SAMPLES

first_words = torch.LongTensor(BATCH_SIZE).random_(0, 10000)
# samples = model.generate(torch.zeros(BATCH_SIZE).to(torch.long), hidden, generated_seq_len=GENERATED_SEQ_LEN)
samples = model.generate(first_words, hidden, generated_seq_len=GENERATED_SEQ_LEN)

#-------------- CONVERTING TO WORDS
data_path  = "/Users/mlizaire/Codes/IFT6135/HW2/assignment2/data/"
filename = os.path.join(data_path, "ptb.train.txt")
word_2_id, id_2_word = _build_vocab(filename)

sequences = []

print("THIS IS RNN")
for i in range(15):
    word_sequence = []
    id_sequence = np.array(torch.t(samples)[i])
    for index in id_sequence:
        word = id_2_word[int(index)]
        word_sequence.append(word)
Ejemplo n.º 2
0
#toy
#model.load_state_dict(torch.load("RNN_SGD_model=RNN_optimizer=SGD_initial_lr=1.0_batch_size=128_seq_len=35_hidden_size=512_num_layers=2_dp_keep_prob=0.8_num_epochs=1_save_best_0/best_params.pt"))

model.eval()

#print(model.out_layer.weight.data)

#also indices
inputs = torch.from_numpy(
    np.random.randint(1, high=1000, size=10).astype(np.int64))

hidden = model.init_hidden()
model.zero_grad()
hidden = repackage_hidden(hidden)
samples = model.generate(inputs, hidden, 35)  #returns indices

samples = samples.transpose(0, 1)

filename = "data/ptb.train.txt"


def _read_words(filename):
    with open(filename, "r") as f:
        return f.read().replace("\n", "<eos>").split()


def _build_vocab(filename):
    data = _read_words(filename)

    counter = collections.Counter(data)
Ejemplo n.º 3
0
#
# GENERATE DATA
#
###############################################################################
samples_id_with_inputs = []
samples_id = []

samples_words_with_inputs = []
samples_words = []

inputs = torch.from_numpy(np.random.randint(0, 10000, 128).astype(np.int64)).contiguous().to(device)

hidden = model.init_hidden()[0]
hidden = hidden.to(device)

samples_all = model.generate(inputs, hidden, SEQ_LEN)

print(samples_all[:, :10])
for i in range(10):
    # print(samples_all[:,i])
    # print(samples_all[:,:10].T)
    # print(samples_all[:,:10].T.tolist())
    samples_id_with_inputs.append([inputs[i].tolist()] + samples_all[:, i].T.tolist())
    samples_id.append(samples_all[:, i].T.tolist())

print(len(samples_id_with_inputs))
print(len(samples_id_with_inputs[0]))
# print(samples_id)
# print(samples_id.shape)
for s, sentence in enumerate(samples_id_with_inputs):
    samples_words_with_inputs.append([])