예제 #1
0
파일: tests.py 프로젝트: G-Wang/char-rnn
def test_running():
    epochs = 100000
    seq_batch_size = 100
    print_yes = 100
    loss_func = torch.nn.functional.nll_loss
    # create network and optimizer
    net = RNN(100, 120, 150, 2)
    net.to(device)  # add cuda to device
    optim = torch.optim.Adam(net.parameters(), lr=3e-5)
    # main training loop:
    for epoch in range(epochs):
        dat = get_batch(train_data, seq_batch_size)
        dat = torch.LongTensor([vocab.find(item) for item in dat])
        # pull x and y
        x_t = dat[:-1]
        y_t = dat[1:]
        hidden = net.init_hidden()
        # turn all into cuda
        x_t, y_t, hidden = x_t.to(device), y_t.to(device), hidden.to(device)
        # initialize hidden state and forward pass
        logprob, hidden = net.forward(x_t, hidden)
        loss = loss_func(logprob, y_t)
        # update
        optim.zero_grad()
        loss.backward()
        optim.step()
        # print the loss for every kth iteration
        if epoch % print_yes == 0:
            print('*' * 100)
            print('\n epoch {}, loss:{} \n'.format(epoch, loss))
            # make sure to pass True flag for running on cuda
            print('sample speech:\n', run_words(net, vocab, 500, True))
def main():
    epochs = 301
    seq_batch_size = 200
    print_yes = 100
    iscuda = False

    # create our network, optimizer and loss function
    net = RNN(len(chars), 100, 150, 2)  #instanciate a RNN object
    optim = torch.optim.Adam(net.parameters(), lr=6e-4)
    loss_func = torch.nn.functional.nll_loss

    if iscuda:
        net = net.cuda()

    # main training loop:
    for epoch in range(epochs):
        dat = getSequence(book, seq_batch_size)
        dat = torch.LongTensor(
            [chars.find(item) for item in dat]
        )  #find corresponding char index for each character and store this in tensor

        # pull x, y and initialize hidden state
        if iscuda:
            x_t = dat[:-1].cuda()
            y_t = dat[1:].cuda()
            hidden = net.init_hidden().cuda()
        else:
            x_t = dat[:-1]
            y_t = dat[1:]
            hidden = net.init_hidden()

        # forward pass
        logprob, hidden = net.forward(x_t, hidden)
        loss = loss_func(logprob, y_t)
        # update
        optim.zero_grad()
        loss.backward()
        optim.step()
        # print the loss for every kth iteration
        if epoch % print_yes == 0:
            print('*' * 60)
            print('\n epoch {}, loss:{} \n'.format(epoch, loss))
            print('sample speech:\n', test_words(net, chars, seq_batch_size))

    torch.save(net.state_dict(), 'trainedBook_v2.pt')
예제 #3
0
파일: tests.py 프로젝트: G-Wang/char-rnn
def no_test_forward():
    loss_func = torch.nn.functional.nll_loss
    net = RNN(100, 100, 100)
    net.to(device)  # add cuda to device
    optim = torch.optim.Adam(net.parameters(), lr=1e-4)
    # step 2: create a training batch of data, size 101, format this data and convert it to pytorch long tensors
    dat = get_batch(train_data, 100)
    dat = torch.LongTensor([vocab.find(item) for item in dat])
    # step 3: convert our dat into input/output
    x_t = dat[:-1]
    y_t = dat[1:]
    ho = net.init_hidden()
    # remember to load all variables used by the model to the device, this means the i/o as well as the hidden state
    x_t, y_t, ho = x_t.to(device), y_t.to(device), ho.to(device)
    # test forward pass
    log_prob, hidden = net.forward(x_t, ho)
    # let's see if the forward pass of the next hidden state is already cuda
    #log_prob2, hidden2 = net.forward(x_t, hidden)
    loss = loss_func(log_prob, y_t)
    optim.zero_grad()
    loss.backward()
    optim.step()
예제 #4
0
                        p.data.add_(-lr, p.grad.data)
            if step % (epoch_size // 10) == 10:
                print('step: '+ str(step) + '\t' \
                    + "loss (sum over all examples' seen this epoch):" + str(costs) + '\t' \
                    + 'speed (wps):' + str(iters * model.batch_size / (time.time() - start_time)))
    return np.exp(costs / iters), losses


###############################################################################
#
# RUN MAIN LOOP (TRAIN AND VAL)
#
###############################################################################

print("\n########## Running Main Loop ##########################")
print("\n########## Generating Samples #########################")

hidden_zero = model.init_hidden()
#hidden_zero.to(torch.long)
hidden_zero = hidden_zero.to(device)
#print(type(hidden_zero))
#samples = model.generate(torch.cuda.LongTensor([0+ 3*i for i in range(20)]),hidden_zero,10)
train_slice = train_data[5355:5365]
train_slice = np.array(train_slice, dtype=np.int32)
inputs = torch.from_numpy(train_slice.astype(np.int64)).contiguous().to(device)
samples = model.generate(inputs, hidden_zero, 70)
# NOTE ==============================================
# To load these, run
# >>> x = np.load(lc_path)[()]
# You will need these values for plotting learning curves (Problem 4)
예제 #5
0
# SAVE LEARNING CURVES
lc_path = os.path.join(args.save_dir, 'learning_curves.npy')
print('\nDONE\n\nSaving learning curves to ' + lc_path)
np.save(
    lc_path, {
        'train_ppls': train_ppls,
        'val_ppls': val_ppls,
        'train_losses': train_losses,
        'val_losses': val_losses
    })
# NOTE ==============================================
# To load these, run
# >>> x = np.load(lc_path)[()]
# You will need these values for plotting learning curves (Problem 4)
hidden = model.init_hidden()
hidden = hidden.to(device)

start = [word_to_id["the"]]
start.append(word_to_id["a"])
start.append(word_to_id["an"])
start.append(word_to_id["he"])
start.append(word_to_id["she"])
start.append(word_to_id["it"])
start.append(word_to_id["they"])
start.append(word_to_id["why"])
start.append(word_to_id["how"])
start.append(word_to_id["to"])

hidden = repackage_hidden(hidden)
short_samples = model.generate(start, hidden, args.seq_len)
예제 #6
0
          hidden_size=argsdict["GRU_hidden_size"],
          seq_len=argsdict["seq_len"],
          batch_size=argsdict["batch_size"],
          vocab_size=vocab_size,
          num_layers=argsdict["GRU_num_layers"],
          dp_keep_prob=1)

# Load the model weight
rnn.load_state_dict(torch.load(args.RNN_path))
gru.load_state_dict(torch.load(args.GRU_path))

rnn.eval()
gru.eval()

# Initialize the hidden state
hidden = [rnn.init_hidden(), gru.init_hidden()]

# Set the random seed manually for reproducibility.
torch.manual_seed(args.seed)

# Generate the word seed using random words
# in the first 100 most common words.
input = torch.randint(0, 100, (args.batch_size, 1)).squeeze()

for name_model, model, init_hidden in zip(["RNN", "GRU"], [rnn, gru], hidden):
    print("------------------------------------")
    print(name_model)
    print("------------------------------------")
    print_sentence(model.generate(input, init_hidden, args.seq_len))
    print_sentence(model.generate(input, init_hidden, 2 * args.seq_len))
예제 #7
0
model = model.to(device)
print("device is = ", model)

print("Load model parameters, best_params.pt")

dir = args.save_dir
bp_path = os.path.join(dir, 'best_params.pt')
model.load_state_dict(torch.load(bp_path))
txt_path = os.path.join(dir, '_generate.txt')
model.eval()
nb_sentence = 20
inputs = torch.LongTensor(nb_sentence).random_(0, model.vocab_size).to(device)
generated_seq_len = model.seq_len
f = open(txt_path, "w+")
f.write("==============seq_len===============\n")
hidden = repackage_hidden(model.init_hidden())
samples = model.generate(inputs, hidden.to(device), generated_seq_len)
# import pdb; pdb.set_trace()
for i in range(nb_sentence):
    sentence = ""
    for t in range(generated_seq_len):
        sentence += id_2_word[samples[t][i].data.item()] + ' '

    f.write("---------------------This is line %d\r\n" % (i + 1))
    f.write(sentence + '\n')

f.write("==============seq_len * 2===============\n")
inputs = torch.LongTensor(nb_sentence).random_(0, model.vocab_size).to(device)
generated_seq_len = (generated_seq_len * 2)
print('generated_seq_len', generated_seq_len)
f.write("---------------------This is line %d\r\n" % (i + 1))
예제 #8
0
# np.save(lc_path, {'val_ts_losses':timestep_loss.numpy(),
#                   'val_ppls':val_ppls,
#                   #'train_losses':train_losses,
#                   'val_losses':val_losses,
#                   'times': times})
# NOTE ==============================================
# To load these, run
# >>> x = np.load(lc_path)[()]
# You will need these values for plotting learning curves (Problem 4)
val_loss = []
val_ppl = []
timestep_loss = []
#val_ppl, val_loss, timestep_loss = run_epoch(model, valid_data)
#print(val_ppl)
input = torch.tensor([1]).to(device)
hidden = model.init_hidden().to(
    device
)  #hidden = torch.zeros([args.num_layers, model.batch_size,args.hidden_size]).to(device)
seq_length = args.seq_len

for i in (35, 70):
    for j in range(10):
        input = torch.randint(low=0, high=999, size=(1, ),
                              dtype=torch.long).to(device)
        sequence = model.generate(input, hidden, i)
        phrase = "\n" + str(input.item()) + ": "
        for token_id in enumerate(sequence):
            phrase += id_2_word[token_id[1].item()] + " "

        print(phrase)
예제 #9
0
# GENERATING
#
###############################################################################

print("\n########## Running Main Loop ##########################")
model.load_state_dict(
    torch.load(os.path.join(args.save_dir, 'best_params.pt'),
               map_location=lambda storage, location: storage))
model = model.to(device)
print(model)

given = np.array(list(id_2_word.keys())[10:20], dtype=np.int64)
print([id_2_word[id] for id in given])
given = torch.from_numpy(given).to(device)

hidden = model.init_hidden().to(device)
model.eval()

res = model.generate(given, hidden, args.gen_length).transpose(0, 1)
print(res.shape)
res = res.to('cpu').numpy()

res = [' '.join([id_2_word[id] for id in sent]) for sent in res]

with open('{}_{}.txt'.format(args.model, args.gen_length), 'w') as writer:
    for i, sent in enumerate(res):
        writer.write('{}. {}\n\n'.format(
            i + 1,
            sent.replace('<unk>', '!unk!').replace('<eos>',
                                                   '!eos!').replace('$',
                                                                    '\$')))
예제 #10
0
                hidden_size=args.hidden_size,
                seq_len=args.seq_len,
                batch_size=args.batch_size,
                vocab_size=vocab_size,
                num_layers=args.num_layers,
                dp_keep_prob=args.dp_keep_prob)
elif args.model == 'GRU':
    model = GRU(emb_size=args.emb_size,
                hidden_size=args.hidden_size,
                seq_len=args.seq_len,
                batch_size=args.batch_size,
                vocab_size=vocab_size,
                num_layers=args.num_layers,
                dp_keep_prob=args.dp_keep_prob)

model.load_state_dict(torch.load(args.load_model))  #, map_location='cpu'))
# model.to(device) ???
model.eval()
print(model)
input = torch.LongTensor(args.batch_size).random_(0, vocab_size)
generated_seq_len = 70
results = model.generate(
    input, model.init_hidden(),
    generated_seq_len)  ###  input, hidden, generated_seq_len)
# print(results)
results = results.numpy().transpose().tolist()
for l in results:
    for i in range(len(l)):
        l[i] = id_2_word[l[i]]
    print(" ".join(l))
                batch_size=args.batch_size,
                vocab_size=vocab_size,
                num_layers=args.num_layers,
                dp_keep_prob=1)
elif args.model == 'GRU':
    model = GRU(emb_size=args.emb_size,
                hidden_size=args.hidden_size,
                seq_len=args.seq_len,
                batch_size=args.batch_size,
                vocab_size=vocab_size,
                num_layers=args.num_layers,
                dp_keep_prob=1)
else:
    raise Exception("Unknown model")

model.load_state_dict(
    torch.load('{}/best_params.pt'.format(output_dir), map_location=device))

model.to(device)

# start with zeros
seed = torch.zeros(args.batch_size, dtype=torch.long)

samples = model.generate(seed, model.init_hidden(), seq_len)
samples = samples.transpose(0, 1)  # shape (batch_size, generated_seq_len)

with open(os.path.join(output_dir, "samples.{}.txt".format(seq_len)),
          "w") as of:
    for sample in samples:
        print(" ".join([id_2_word[idx] for idx in sample.numpy()]), file=of)
예제 #12
0
                            n_blocks=args.num_layers,
                            dropout=1. - args.dp_keep_prob)
    # these 3 attributes don't affect the Transformer's computations;
    # they are only used in run_epoch
    model.batch_size = args.batch_size
    model.seq_len = args.seq_len
    model.vocab_size = vocab_size
else:
    print("Model type not recognized.")

if args.model_path is not None and args.generate:
    model.load_state_dict(torch.load(args.model_path))
    model = model.to(device)
    gen = model.generate(
        torch.LongTensor(args.batch_size).random_(0, model.vocab_size).cuda(),
        model.init_hidden().cuda(), 10)
    gen = [[id_2_word[w] for w in seq.data.cpu().numpy()] for seq in gen]
    for i in range(10):
        print(gen[i])

# LOSS FUNCTION
loss_fn = nn.CrossEntropyLoss(reduction='none')
if args.optimizer == 'ADAM':
    optimizer = torch.optim.Adam(model.parameters(), lr=args.initial_lr)

# LEARNING RATE SCHEDULE
lr = args.initial_lr
lr_decay_base = 1 / 1.15
m_flat_lr = 14.0  # we will not touch lr for the first m_flat_lr epochs

###############################################################################