Ejemplo n.º 1
0
def trainIters(encoder,
               decoder,
               data_df,
               n_iters,
               print_every=1000,
               plot_every=100,
               learning_rate=0.05):
    start = time.time()
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every
    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
    criterion = nn.NLLLoss()  #weight = weight_tensor)
    for iter in range(1, n_iters + 1):
        #print(iter)
        sentence = train_df.iloc[iter - 1]["description"]
        sentence = normalizeString(sentence)
        input_tensor = embeddedTensorFromSentence(sentence, device, word_emb,
                                                  N_word)
        target_class = data_df.iloc[iter - 1]["department_new"]
        class_index = []
        for i in range(CLASS_size):
            class_index.append(0)
        class_index[class_dict[target_class]] = 1
        #import pdb; pdb.set_trace();
        #print(class_index)
        target_tensor = torch.tensor(class_index,
                                     dtype=torch.long,
                                     device=device).view(1, CLASS_size)
        loss = train(input_tensor, target_tensor, encoder, decoder,
                     encoder_optimizer, decoder_optimizer, criterion)
        print_loss_total += loss
        plot_loss_total += loss
        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' %
                  (timeSince(start, iter / n_iters), iter,
                   iter / n_iters * 100, print_loss_avg))

        if iter % plot_every == 0:
            plot_loss_avg = plot_loss_total / plot_every
            plot_losses.append(plot_loss_avg)
            plot_loss_total = 0
    showPlot(plot_losses)
Ejemplo n.º 2
0
def evaluateTest(encoder, decoder):
    test_size = test_df.shape[0]
    y_true = []
    y_pred = []
    for iter in range(0, test_size + 1):
        sentence = test_df.iloc[iter - 1]["description"]
        sentence = normalizeString(sentence)
        input_tensor = embeddedTensorFromSentence(sentence, device, word_emb,
                                                  N_word)
        target_class = test_df.iloc[iter - 1]["department_new"]
        class_index = []
        target_index = class_dict[target_class]
        #print(target_index)
        y_true.append(target_index)
        output = evaluate(encoder, decoder, input_tensor, max_length, device)
        topv, topi = output.topk(1)
        y_pred.append(topi.numpy()[0][0])
    cnf_matrix = confusion_matrix(y_true, y_pred)
    print("Accuarcy")
    print(accuracy_score(y_true, y_pred))
    print(cnf_matrix)
	print("Accuarcy")
	print(accuracy_score(y_true, y_pred))
	print(cnf_matrix)
		

#if __name__ == " __main__":
#import pdb;pdb.set_trace();
encoder = EncoderRNN(N_word, hidden_size).to(device)
decoder = AttnDecoderRNN(hidden_size, CLASS_size, dropout_p=0.1, max_length=max_length).to(device)
n_iterations = train_df.shape[0]
#trainIters(encoder, decoder, n_iterations, print_every=50, plot_every=10)
import pdb;pdb.set_trace();
trainIters(encoder, decoder, 1, print_every=50, plot_every=10)
sentence = train_df.iloc[0]["description"]
sentence = normalizeString(sentence)
input_tensor = embeddedTensorFromSentence(sentence,device,word_emb,N_word)
target_class = train_df.iloc[0]["department_new"]
class_index = []
target_index = class_dict[target_class]
print(target_index)
#y_true.append(target_index)
output, attention = evaluate(encoder, decoder, input_tensor,max_length,device)
#import pdb;pdb.set_trace();
topv, topi = output.topk(1)
#import pdb;pdb.set_trace();
#torch.save(encoder.state_dict(), "encoder")
#torch.save(decoder.state_dict(), "decoder")
#encoder = torch.load("encoder")
#decoder = torch.load("decoder")
#desc1 = full_table.iloc[0]["description"]
#dep1 = full_table.iloc[0]["department"]