Beispiel #1
0
def initialize_model(opt, src, tgt, train):
    # build vocabulary
    src.build_vocab(train.dataset, max_size=opt.src_vocab)
    tgt.build_vocab(train.dataset, max_size=opt.tgt_vocab)
    input_vocab = src.vocab
    output_vocab = tgt.vocab

    # Initialize model
    hidden_size = opt.hidden_size
    decoder_hidden_size = hidden_size * 2 if opt.bidirectional else hidden_size
    encoder = EncoderRNN(len(src.vocab),
                         opt.max_len,
                         hidden_size,
                         opt.embedding_size,
                         dropout_p=opt.dropout_p_encoder,
                         n_layers=opt.n_layers,
                         bidirectional=opt.bidirectional,
                         rnn_cell=opt.rnn_cell,
                         variable_lengths=True)
    decoder = DecoderRNN(
        len(tgt.vocab),
        opt.max_len,
        decoder_hidden_size,
        dropout_p=opt.dropout_p_decoder,
        n_layers=opt.n_layers,
        use_attention=opt.attention,
        attention_method=opt.attention_method,
        use_positional_attention=opt.positional_attention,
        bidirectional=opt.bidirectional,
        rnn_cell=opt.rnn_cell,
        eos_id=tgt.eos_id,
        sos_id=tgt.sos_id,
        positioning_generator_size=opt.positioning_generator_size,
        attention_mixer=opt.attention_mixer)

    # initialize weights using uniform distribution
    def uniform_weights_init(m):
        if isinstance(m, nn.LSTM):
            for name, param in m.named_parameters():
                if 'bias' in name:
                    nn.init.constant_(param, 0.0)
                elif 'weight' in name:
                    nn.init.uniform_(param, -opt.param_init, opt.param_init)
        if isinstance(m, nn.Linear) or isinstance(m, nn.Embedding):
            nn.init.uniform_(m.weight, -opt.param_init, opt.param_init)

    if opt.param_init > 0.0:
        encoder.apply(uniform_weights_init)
        decoder.apply(uniform_weights_init)

    seq2seq = Seq2seq(encoder, decoder)

    if torch.cuda.device_count() > 1:
        logging.info("Using {} GPUs".format(torch.cuda.device_count()))
        seq2seq = nn.DataParallel(seq2seq)

    # xavier initialization if flag
    if opt.param_init_glorot:
        for p in seq2seq.parameters():
            if p.dim() > 1:
                nn.init.xavier_uniform_(p)

    seq2seq.to(device)

    return seq2seq, input_vocab, output_vocab
Beispiel #2
0
    test_size = test_df.shape[0]
    y_true = []
    y_pred = []
    for iter in range(0, test_size + 1):
        sentence = test_df.iloc[iter - 1]["description"]
        sentence = normalizeString(sentence)
        input_tensor = embeddedTensorFromSentence(sentence, device, word_emb,
                                                  N_word)
        target_class = test_df.iloc[iter - 1]["department_new"]
        class_index = []
        target_index = class_dict[target_class]
        #print(target_index)
        y_true.append(target_index)
        output = evaluate(encoder, decoder, input_tensor, max_length, device)
        topv, topi = output.topk(1)
        y_pred.append(topi.numpy()[0][0])
    cnf_matrix = confusion_matrix(y_true, y_pred)
    print("Accuarcy")
    print(accuracy_score(y_true, y_pred))
    print(cnf_matrix)


encoder = EncoderRNN(N_word, hidden_size).to(device)
encoder.apply(init_weights)
decoder = DecoderRNN(hidden_size, CLASS_size).to(device)
decoder.apply(init_weights)
n_iterations = train_df.shape[0]
trainIters(encoder, decoder, n_iterations, print_every=50, plot_every=10)
print(classes_)
evaluateTest(encoder, decoder)