Ejemplo n.º 1
0
    test_text = load_data(test_path)

    # sentence tokenizer (MAXLEN means the max length of input text)
    st = SentenceTokenizer(vocabulary, MAX_LEN)

    # tokenize test text
    test_X, _, _ = st.tokenize_sentences(test_text)

    # load model
    model = deepmoji_architecture(nb_classes=nb_classes,
                                  nb_tokens=nb_tokens,
                                  maxlen=MAX_LEN)

    load_specific_weights(model,
                          model_path,
                          nb_tokens,
                          MAX_LEN,
                          nb_classes=nb_classes)

    pred_y_prob = model.predict(test_X)

    if nb_classes == 2:
        pred_y = [0 if p < 0.5 else 1 for p in pred_y_prob]
    else:
        pred_y = np.argmax(pred_y_prob, axis=1)

    with open(save_path, "w") as f:
        for i in range(0, len(test_text)):
            f.write("{}\t{}\r\n".format(test_text[i], pred_y[i]))

    print("Results were saved to {}".format(save_path))
Ejemplo n.º 2
0
        test_y = np.array([label2index[l] for l in test_label])

        nb_classes = len(label2index)
        nb_tokens = len(vocabulary)

        # use 20& of the training set for validation
        train_X, val_X, train_y, val_y = train_test_split(train_X, train_y,
                                                          test_size=0.2, random_state=0)
        # model 
        model = deepmoji_architecture(nb_classes=nb_classes,
                                      nb_tokens=nb_tokens,
                                      maxlen=MAX_LEN, embed_dropout_rate=0.25, final_dropout_rate=0.5, embed_l2=1E-6)
        model.summary()

        # load pretrained representation model
        load_specific_weights(model, model_path, nb_tokens, MAX_LEN,
                              exclude_names=["softmax"])
        
        # train model
        model, acc = finetune(model, [train_X, val_X, test_X], [train_y, val_y, test_y], nb_classes, 100,
                              method="chain-thaw", verbose=2)
        
        pred_y_prob = model.predict(test_X)

        if nb_classes == 2:
            pred_y = [0 if p < 0.5 else 1 for p in pred_y_prob]
        else:
            pred_y = np.argmax(pred_y_prob, axis=1)

        # evaluation
        print("*****************************************")
        print("Fold %d" % fold)
Ejemplo n.º 3
0
        vocabulary = json.load(f_vocab)
    nb_tokens = len(vocabulary)

    test_text = load_data(test_path)

    # sentence tokenizer (MAXLEN means the max length of input text)
    st = SentenceTokenizer(vocabulary, MAX_LEN)

    # tokenize test text
    test_X, _, _ = st.tokenize_sentences(test_text)

    # load model
    model = deepmoji_architecture(nb_classes=nb_classes,
                                  nb_tokens=nb_tokens,
                                  maxlen=MAX_LEN)

    load_specific_weights(model, model_path, nb_tokens, MAX_LEN)

    pred_y_prob = model.predict(test_X)

    if nb_classes == 2:
        pred_y = [0 if p < 0.5 else 1 for p in pred_y_prob]
    else:
        pred_y = np.argmax(pred_y_prob, axis=1)

    with open(save_path, "w") as f:
        for i in range(0, len(test_text)):
            f.write("{}\t{}\r\n".format(test_text[i], pred_y[i]))

    print("Results were saved to {}".format(save_path))