Esempio n. 1
0
    X = []
    Y = []

    cnt = 0
    kb_len = int(
        len(knowledge_base) * hparams_relation_classifier["kbLenPercentage"])
    print("Reading the knowledge base (" + str(kb_len) + " elements)")

    # Build X and Y:
    for elem in knowledge_base[:kb_len]:

        cnt += 1
        print("Progress: {:2.1%}".format(cnt / kb_len), end="\r")

        X.append(
            relation_classifier_vocabulary.sentence2indices(elem["question"]))
        Y.append(relation_to_int(elem["relation"]))

    print("\nDone.")

    # Relation to one hot enconding:
    Y = keras.utils.np_utils.to_categorical(Y, 16)

    # Add padding to X:
    longest_sentence_length = max([len(sentence) for sentence in X])
    X = keras.preprocessing.sequence.pad_sequences(
        sequences=X, maxlen=longest_sentence_length)

    # Split training set into train, dev and test:
    X_train, Y_train, X_dev, Y_dev, X_test, Y_test = split_dataset(
        X, Y, hparams_relation_classifier["kbSplit"])