class Model(): def __init__(self, input_size, hidden_size, output_size, n_layers=1, gpu=-1): self.decoder = RNN(input_size, hidden_size, output_size, n_layers, gpu) if gpu >= 0: print("Use GPU %d" % torch.cuda.current_device()) self.decoder.cuda() self.optimizer = torch.optim.Adam(self.decoder.parameters(), lr=0.01) self.criterion = nn.CrossEntropyLoss() def train(self, inp, target, chunk_len=200): hidden = self.decoder.init_hidden() self.decoder.zero_grad() loss = 0 for c in range(chunk_len): out, hidden = self.decoder(inp[c], hidden) loss += self.criterion(out, target[c]) loss.backward() self.optimizer.step() return loss.data[0] / chunk_len def generate(self, prime_str, predict_len=100, temperature=0.8): predicted = prime_str hidden = self.decoder.init_hidden() prime_input = char_tensor(prime_str, self.decoder.gpu) # Use prime string to build up hidden state for p in range(len(prime_str) - 1): _, hidden = self.decoder(prime_input[p], hidden) inp = prime_input[-1] for p in range(predict_len): out, hidden = self.decoder(inp, hidden) # sample from network as a multinomial distribution out_dist = out.data.view(-1).div(temperature).exp() out_dist = out.data.view(-1).div(temperature).exp() top_i = torch.multinomial(out_dist, 1)[0] # Add predicted character to string and use as next input predicted_char = all_characters[top_i] predicted += predicted_char inp = char_tensor(predicted_char, self.decoder.gpu) return predicted def save(self): model_name = "char-rnn-gru.pt" if not os.path.exists("save"): os.mkdir("save") torch.save(self.decoder, "save/%s" % model_name) print("--------------> [Checkpoint] Save model into save/%s" % model_name) def load(self, model_path="save/char-rnn-gru.pt"): self.decoder = torch.load(model_path)
with open('input.pickle', 'rb') as f: input_lang = pickle.load(f) with open('target.pickle', 'rb') as f: target_lang = pickle.load(f) with open('../assets/SMSSpamCollection.txt') as f: lines = f.readlines() pairs = [[normalize_string(s) for s in line.split('\t')] for line in lines] # modelのロード hidden_size = 256 model = RNN(input_lang.n_words, target_lang.n_words, hidden_size).to(device) param = torch.load("model_data/model4.pth") for p in model.parameters(): print(p) model.load_state_dict(param) print("-" * 50) for p in model.parameters(): print(p) input_tensor = tensor_from_sentence(input_lang, pairs[1][1]).to(device) hidden = model.init_hidden().to(device) output = torch.zeros(target_lang.n_words).to(device) for i in range(input_tensor.size(0)): output = model(input_tensor[i], hidden) print(output) print(tensor_from_sentence(target_lang, pairs[1][0]))
criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adagrad(filter(lambda p: p.requires_grad, rnn.parameters()), lr=learning_rate) accuracy = 0 epoch_loss = [] epoch_accuracy = [] train_accuracy = [] epochs = [] try: #Call train on the model for epoch in range(1, num_epochs + 1): epochs.append(epoch) hidden = rnn.init_hidden() loss_total = 0 acc = 0 # Get training data for this cycle for i, sequence in enumerate(train_sequences): input_variable = Variable(torch.LongTensor(sequence[:-1])) targets = sequence[1:] target_variable = Variable(torch.LongTensor(targets)) hidden = repackage_hidden(hidden) rnn.zero_grad() output, hidden = rnn(input_variable, hidden) loss = criterion(output, target_variable.contiguous().view(-1)) val = (target_variable.data.view(-1).eq(
class Train(object): def __init__(self, training_file='../res/trump_tweets.txt', model_file='../res/model.pt', n_epochs=1000000, hidden_size=256, n_layers=2, learning_rate=0.001, chunk_len=140): self.training_file = training_file self.model_file = model_file self.n_epochs = n_epochs self.hidden_size = hidden_size self.n_layers = n_layers self.learning_rate = learning_rate self.chunk_len = chunk_len self.file, self.file_len = read_file(training_file) if os.path.isfile(model_file): self.decoder = torch.load(model_file) print('Loaded old model!') else: self.decoder = RNN(n_characters, hidden_size, n_characters, n_layers) print('Constructed new model!') self.decoder_optimizer = torch.optim.Adam(self.decoder.parameters(), learning_rate) self.criterion = nn.CrossEntropyLoss() self.generator = Generator(self.decoder) def train(self, inp, target): hidden = self.decoder.init_hidden() self.decoder.zero_grad() loss = 0 for c in range(self.chunk_len): output, hidden = self.decoder.forward(inp[c], hidden) loss += self.criterion(output, target[c]) loss.backward() self.decoder_optimizer.step() return loss.data[0] / self.chunk_len def save(self): torch.save(self.decoder, self.model_file) print('Saved as %s' % self.model_file) def random_training_set(self, chunk_len): start_index = random.randint(0, self.file_len - chunk_len) end_index = start_index + chunk_len + 1 chunk = self.file[start_index:end_index] inp = char_tensor(chunk[:-1]) target = char_tensor(chunk[1:]) return inp, target def start(self): start_time = time.time() print("Training for %d epochs..." % self.n_epochs) best_loss = None for epoch in range(1, self.n_epochs + 1): loss = self.train(*self.random_training_set(self.chunk_len)) if not best_loss or loss < best_loss: self.save() best_loss = loss print('[%s (%d %d%%) %.4f]' % (time_since(start_time), epoch, epoch / self.n_epochs * 100, loss)) print(self.generator.generate(), '\n') print("Finished training, saving...") self.save()
model = RNN(EMBEDDING_DIM, HIDDEN_DIM, len(word_vocab), len(label_vocab)) model.to(device) # cost function optimizer = optim.Adam(model.parameters(), lr=LR_RATE) # Define structures for loss, accuracy values training_loss = [] training_acc = [] training_f1 = [] validation_loss = [] validation_acc = [] validation_f1 = [] for e in range(EPOCH): hidden = model.init_hidden(BATCH_SIZE) # Training and saving the parameters train_loss, train_acc, train_f1 = train(model, train_loader, optimizer) # Testing on test dataset val_loss, val_acc, val_f1 = test(model, val_loader) print( "Epoch {} - Training loss: {} - Training accuracy: {} Training F1: {}" .format(e, train_loss, train_acc, train_f1)) training_loss.append(train_loss) training_acc.append(train_acc) training_f1.append(train_f1) writer.add_scalar('Loss/train', train_loss, e) writer.add_scalar('Accuracy/train', train_acc, e)