Beispiel #1
0
class Model():
    def __init__(self, input_size, hidden_size, output_size, n_layers=1, gpu=-1):
        self.decoder = RNN(input_size, hidden_size, output_size, n_layers, gpu)
        if gpu >= 0:
            print("Use GPU %d" % torch.cuda.current_device())
            self.decoder.cuda()

        self.optimizer = torch.optim.Adam(self.decoder.parameters(), lr=0.01)
        self.criterion = nn.CrossEntropyLoss()

    def train(self, inp, target, chunk_len=200):
        hidden = self.decoder.init_hidden()
        self.decoder.zero_grad()
        loss = 0

        for c in range(chunk_len):
            out, hidden = self.decoder(inp[c], hidden)
            loss += self.criterion(out, target[c])

        loss.backward()
        self.optimizer.step()

        return loss.data[0] / chunk_len

    def generate(self, prime_str, predict_len=100, temperature=0.8):
        predicted = prime_str

        hidden = self.decoder.init_hidden()
        prime_input = char_tensor(prime_str, self.decoder.gpu)

        # Use prime string to build up hidden state
        for p in range(len(prime_str) - 1):
            _, hidden = self.decoder(prime_input[p], hidden)

        inp  = prime_input[-1]
        for p in range(predict_len):
            out, hidden = self.decoder(inp, hidden)

            # sample from network as a multinomial distribution out_dist = out.data.view(-1).div(temperature).exp()
            out_dist = out.data.view(-1).div(temperature).exp()
            top_i = torch.multinomial(out_dist, 1)[0]

            # Add predicted character to string and use as next input
            predicted_char = all_characters[top_i]
            predicted += predicted_char
            inp = char_tensor(predicted_char, self.decoder.gpu)

        return predicted

    def save(self):
        model_name = "char-rnn-gru.pt"

        if not os.path.exists("save"):
            os.mkdir("save")
        torch.save(self.decoder, "save/%s" % model_name)
        print("--------------> [Checkpoint] Save model into save/%s" % model_name)

    def load(self, model_path="save/char-rnn-gru.pt"):
        self.decoder = torch.load(model_path)
    with open('input.pickle', 'rb') as f:
        input_lang = pickle.load(f)
    with open('target.pickle', 'rb') as f:
        target_lang = pickle.load(f)
    with open('../assets/SMSSpamCollection.txt') as f:
        lines = f.readlines()
        pairs = [[normalize_string(s) for s in line.split('\t')]
                 for line in lines]

    # modelのロード
    hidden_size = 256
    model = RNN(input_lang.n_words, target_lang.n_words,
                hidden_size).to(device)
    param = torch.load("model_data/model4.pth")
    for p in model.parameters():
        print(p)
    model.load_state_dict(param)
    print("-" * 50)
    for p in model.parameters():
        print(p)

    input_tensor = tensor_from_sentence(input_lang, pairs[1][1]).to(device)
    hidden = model.init_hidden().to(device)
    output = torch.zeros(target_lang.n_words).to(device)
    for i in range(input_tensor.size(0)):
        output = model(input_tensor[i], hidden)

    print(output)
    print(tensor_from_sentence(target_lang, pairs[1][0]))
Beispiel #3
0
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adagrad(filter(lambda p: p.requires_grad,
                                       rnn.parameters()),
                                lr=learning_rate)

accuracy = 0
epoch_loss = []
epoch_accuracy = []
train_accuracy = []
epochs = []
try:
    #Call train on the model
    for epoch in range(1, num_epochs + 1):

        epochs.append(epoch)
        hidden = rnn.init_hidden()
        loss_total = 0
        acc = 0
        # Get training data for this cycle
        for i, sequence in enumerate(train_sequences):

            input_variable = Variable(torch.LongTensor(sequence[:-1]))
            targets = sequence[1:]
            target_variable = Variable(torch.LongTensor(targets))

            hidden = repackage_hidden(hidden)
            rnn.zero_grad()
            output, hidden = rnn(input_variable, hidden)
            loss = criterion(output, target_variable.contiguous().view(-1))

            val = (target_variable.data.view(-1).eq(
Beispiel #4
0
class Train(object):
    def __init__(self,
                 training_file='../res/trump_tweets.txt',
                 model_file='../res/model.pt',
                 n_epochs=1000000,
                 hidden_size=256,
                 n_layers=2,
                 learning_rate=0.001,
                 chunk_len=140):
        self.training_file = training_file
        self.model_file = model_file
        self.n_epochs = n_epochs
        self.hidden_size = hidden_size
        self.n_layers = n_layers
        self.learning_rate = learning_rate
        self.chunk_len = chunk_len
        self.file, self.file_len = read_file(training_file)
        if os.path.isfile(model_file):
            self.decoder = torch.load(model_file)
            print('Loaded old model!')
        else:
            self.decoder = RNN(n_characters, hidden_size, n_characters,
                               n_layers)
            print('Constructed new model!')
        self.decoder_optimizer = torch.optim.Adam(self.decoder.parameters(),
                                                  learning_rate)
        self.criterion = nn.CrossEntropyLoss()
        self.generator = Generator(self.decoder)

    def train(self, inp, target):
        hidden = self.decoder.init_hidden()
        self.decoder.zero_grad()
        loss = 0
        for c in range(self.chunk_len):
            output, hidden = self.decoder.forward(inp[c], hidden)
            loss += self.criterion(output, target[c])
        loss.backward()
        self.decoder_optimizer.step()
        return loss.data[0] / self.chunk_len

    def save(self):
        torch.save(self.decoder, self.model_file)
        print('Saved as %s' % self.model_file)

    def random_training_set(self, chunk_len):
        start_index = random.randint(0, self.file_len - chunk_len)
        end_index = start_index + chunk_len + 1
        chunk = self.file[start_index:end_index]
        inp = char_tensor(chunk[:-1])
        target = char_tensor(chunk[1:])
        return inp, target

    def start(self):
        start_time = time.time()
        print("Training for %d epochs..." % self.n_epochs)
        best_loss = None
        for epoch in range(1, self.n_epochs + 1):
            loss = self.train(*self.random_training_set(self.chunk_len))
            if not best_loss or loss < best_loss:
                self.save()
                best_loss = loss
                print('[%s (%d %d%%) %.4f]' %
                      (time_since(start_time), epoch,
                       epoch / self.n_epochs * 100, loss))
                print(self.generator.generate(), '\n')
        print("Finished training, saving...")
        self.save()
Beispiel #5
0
    model = RNN(EMBEDDING_DIM, HIDDEN_DIM, len(word_vocab), len(label_vocab))
    model.to(device)

    # cost function
    optimizer = optim.Adam(model.parameters(), lr=LR_RATE)

    # Define structures for loss, accuracy values
    training_loss = []
    training_acc = []
    training_f1 = []
    validation_loss = []
    validation_acc = []
    validation_f1 = []

    for e in range(EPOCH):
        hidden = model.init_hidden(BATCH_SIZE)

        # Training and saving the parameters
        train_loss, train_acc, train_f1 = train(model, train_loader, optimizer)

        # Testing on test dataset
        val_loss, val_acc, val_f1 = test(model, val_loader)

        print(
            "Epoch {} - Training loss: {} - Training accuracy: {} Training F1: {}"
            .format(e, train_loss, train_acc, train_f1))
        training_loss.append(train_loss)
        training_acc.append(train_acc)
        training_f1.append(train_f1)
        writer.add_scalar('Loss/train', train_loss, e)
        writer.add_scalar('Accuracy/train', train_acc, e)