X = [] Y = [] cnt = 0 kb_len = int( len(knowledge_base) * hparams_relation_classifier["kbLenPercentage"]) print("Reading the knowledge base (" + str(kb_len) + " elements)") # Build X and Y: for elem in knowledge_base[:kb_len]: cnt += 1 print("Progress: {:2.1%}".format(cnt / kb_len), end="\r") X.append( relation_classifier_vocabulary.sentence2indices(elem["question"])) Y.append(relation_to_int(elem["relation"])) print("\nDone.") # Relation to one hot enconding: Y = keras.utils.np_utils.to_categorical(Y, 16) # Add padding to X: longest_sentence_length = max([len(sentence) for sentence in X]) X = keras.preprocessing.sequence.pad_sequences( sequences=X, maxlen=longest_sentence_length) # Split training set into train, dev and test: X_train, Y_train, X_dev, Y_dev, X_test, Y_test = split_dataset( X, Y, hparams_relation_classifier["kbSplit"])