예제 #1
0
파일: tests.py 프로젝트: G-Wang/char-rnn
def test_running():
    epochs = 100000
    seq_batch_size = 100
    print_yes = 100
    loss_func = torch.nn.functional.nll_loss
    # create network and optimizer
    net = RNN(100, 120, 150, 2)
    net.to(device)  # add cuda to device
    optim = torch.optim.Adam(net.parameters(), lr=3e-5)
    # main training loop:
    for epoch in range(epochs):
        dat = get_batch(train_data, seq_batch_size)
        dat = torch.LongTensor([vocab.find(item) for item in dat])
        # pull x and y
        x_t = dat[:-1]
        y_t = dat[1:]
        hidden = net.init_hidden()
        # turn all into cuda
        x_t, y_t, hidden = x_t.to(device), y_t.to(device), hidden.to(device)
        # initialize hidden state and forward pass
        logprob, hidden = net.forward(x_t, hidden)
        loss = loss_func(logprob, y_t)
        # update
        optim.zero_grad()
        loss.backward()
        optim.step()
        # print the loss for every kth iteration
        if epoch % print_yes == 0:
            print('*' * 100)
            print('\n epoch {}, loss:{} \n'.format(epoch, loss))
            # make sure to pass True flag for running on cuda
            print('sample speech:\n', run_words(net, vocab, 500, True))
예제 #2
0
def generate_sequences(id_2_word, num_samples, model_type, emb_size, hidden_size, seq_len, batch_size, num_layers, dp_keep_prob, vocab_size, path):
	if model_type=='RNN':
		model = RNN(emb_size=emb_size, hidden_size=hidden_size,
				seq_len=seq_len, batch_size=batch_size,
				vocab_size=vocab_size, num_layers=num_layers,
				dp_keep_prob=dp_keep_prob)
	else:
		model = GRU(emb_size=emb_size, hidden_size=hidden_size,
				seq_len=seq_len, batch_size=batch_size,
				vocab_size=vocab_size, num_layers=num_layers,
				dp_keep_prob=dp_keep_prob)

	model.load_state_dict(torch.load(path))
	model = model.to(device)
	hidden = nn.Parameter(torch.zeros(num_layers, num_samples, hidden_size)).to(device)
	input = torch.ones(10000)*1/1000
	input = torch.multinomial(input, num_samples).to(device)
	output = model.generate(input, hidden, seq_len)
	f = open(model_type + '_generated_sequences' +'.txt','w')

	for i in range(num_samples):
		for j in range(seq_len):
			f.write(id_2_word.get(output[j,i].item())+' ')
		f.write('\n')
	f.close()
예제 #3
0
파일: tests.py 프로젝트: G-Wang/char-rnn
def no_test_forward():
    loss_func = torch.nn.functional.nll_loss
    net = RNN(100, 100, 100)
    net.to(device)  # add cuda to device
    optim = torch.optim.Adam(net.parameters(), lr=1e-4)
    # step 2: create a training batch of data, size 101, format this data and convert it to pytorch long tensors
    dat = get_batch(train_data, 100)
    dat = torch.LongTensor([vocab.find(item) for item in dat])
    # step 3: convert our dat into input/output
    x_t = dat[:-1]
    y_t = dat[1:]
    ho = net.init_hidden()
    # remember to load all variables used by the model to the device, this means the i/o as well as the hidden state
    x_t, y_t, ho = x_t.to(device), y_t.to(device), ho.to(device)
    # test forward pass
    log_prob, hidden = net.forward(x_t, ho)
    # let's see if the forward pass of the next hidden state is already cuda
    #log_prob2, hidden2 = net.forward(x_t, hidden)
    loss = loss_func(log_prob, y_t)
    optim.zero_grad()
    loss.backward()
    optim.step()
예제 #4
0
def make_my_model(model_name, device, seq_len=35, batch_size=20, pt=None):
    #          --model=RNN --optimizer=ADAM --initial_lr=0.0001 --batch_size=20 --seq_len=35 --hidden_size=1500 --num_layers=2 --dp_keep_prob=0.35 --save_best
    #          --model=GRU --optimizer=SGD_LR_SCHEDULE --initial_lr=10 --batch_size=20 --seq_len=35 --hidden_size=1500 --num_layers=2 --dp_keep_prob=0.35 --save_best
    #          --model=TRANSFORMER --optimizer=SGD_LR_SCHEDULE --initial_lr=20 --batch_size=128 --seq_len=35 --hidden_size=512 --num_layers=6 --dp_keep_prob=0.9 --save_best
    if model_name == 'RNN':
        model = RNN(emb_size=200,
                    hidden_size=1500,
                    seq_len=seq_len,
                    batch_size=batch_size,
                    vocab_size=vocab_size,
                    num_layers=2,
                    dp_keep_prob=0.35)
    elif model_name == 'GRU':
        model = GRU(emb_size=200,
                    hidden_size=1500,
                    seq_len=seq_len,
                    batch_size=batch_size,
                    vocab_size=vocab_size,
                    num_layers=2,
                    dp_keep_prob=0.35)
    elif model_name == 'TRANSFORMER':
        model = TRANSFORMER(vocab_size=vocab_size,
                            n_units=512,
                            n_blocks=6,
                            dropout=1. - 0.9)
        # these 3 attributes don't affect the Transformer's computations;
        # they are only used in run_epoch
        model.batch_size = 128
        model.seq_len = 35
        model.vocab_size = vocab_size
    else:
        print("ERROR: Model type not recognized.")
        return
    # Model to device
    model = model.to(device)
    # Load pt
    if pt is not None:
        model.load_state_dict(torch.load(pt, map_location=device))
    return model
예제 #5
0
def load_model(model_info,
               device,
               vocab_size,
               emb_size=200,
               load_on_device=True):
    params_path = model_info.get_params_path()

    if model_info.model == 'RNN':
        model = RNN(emb_size=emb_size,
                    hidden_size=model_info.hidden_size,
                    seq_len=model_info.seq_len,
                    batch_size=model_info.batch_size,
                    vocab_size=vocab_size,
                    num_layers=model_info.num_layers,
                    dp_keep_prob=model_info.dp_keep_prob)
    elif model_info.model == 'GRU':
        model = GRU(emb_size=emb_size,
                    hidden_size=model_info.hidden_size,
                    seq_len=model_info.seq_len,
                    batch_size=model_info.batch_size,
                    vocab_size=vocab_size,
                    num_layers=model_info.num_layers,
                    dp_keep_prob=model_info.dp_keep_prob)
    else:
        model = TRANSFORMER(vocab_size=vocab_size,
                            n_units=model_info.hidden_size,
                            n_blocks=model_info.num_layers,
                            dropout=1. - model_info.dp_keep_prob)
        model.batch_size = model_info.batch_size
        model.seq_len = model_info.seq_len
        model.vocab_size = vocab_size

    if load_on_device:
        model = model.to(device)
    model.load_state_dict(torch.load(params_path, map_location=device))
    return model
예제 #6
0
파일: ptb-lm.py 프로젝트: lebrice/IFT6135
        # different things here than in the RNNs.
        # Also, the Transformer also has other hyperparameters
        # (such as the number of attention heads) which can change it's behavior.
        model = TRANSFORMER(vocab_size=vocab_size,
                            n_units=args.hidden_size,
                            n_blocks=args.num_layers,
                            dropout=1. - args.dp_keep_prob)
    # these 3 attributes don't affect the Transformer's computations;
    # they are only used in run_epoch
    model.batch_size = args.batch_size
    model.seq_len = args.seq_len
    model.vocab_size = vocab_size
else:
    print("Model type not recognized.")

model = model.to(device)

# LOSS FUNCTION
loss_fn = nn.CrossEntropyLoss()
if args.optimizer == 'ADAM':
    optimizer = torch.optim.Adam(model.parameters(), lr=args.initial_lr)

# LEARNING RATE SCHEDULE
lr = args.initial_lr
lr_decay_base = 1 / 1.15
m_flat_lr = 14.0  # we will not touch lr for the first m_flat_lr epochs

###############################################################################
#
# DEFINE COMPUTATIONS FOR PROCESSING ONE EPOCH
#
LOG(f"[DATA]   Data is loaded. Vocabulary size is {len(word2idx)}")

# Model Definition
model = RNN(vocab_size=len(word2idx),
            embedding_dim=128,
            hidden_dim=256,
            num_layers=2,
            target="lstm")
optimizer = optim.Adam(model.parameters(),
                       lr=LEARNING_RATE,
                       weight_decay=WEIGHT_DECAY)
criterion = nn.CrossEntropyLoss()
loss_meter = tnt.meter.AverageValueMeter()
if MODEL_PATH is not None:
    model.load_state_dict(torch.load(MODEL_PATH))
model.to(device)
LOG(f"[MODEL]  Build model complete.")

# Train
if MODE == "train":
    for epoch in range(EPOCH):
        loss_meter.reset()
        for index, data in tqdm.tqdm(enumerate(dataloader, 0)):
            data = data.long().contiguous().to(device)

            optimizer.zero_grad()

            input_, target = data[:, :-1], data[:, 1:]
            output, _ = model(input_)
            loss = criterion(output, target.reshape(-1))
            loss.backward()