Ejemplo n.º 1
0
def load_testing_data():
    dp.establish_db_connection()
    sequence_list = []
    padded_output = []
    #sentence_numbers = []
    testing_dataset = DBHelperMethod.load_dataset_by_type("testing")
    #testing_dataset = DBHelperMethod.load_dataset_by_type_and_sentence_number_for_testing_purpose("testing", 3228)
    #sentence_numbers.append(3228)
    sentence_numbers = DBHelperMethod.get_list_of_sentence_numbers_by(
        "testing")
    labels_and_equiv_encoding = dp.get_label_table()

    for each_sentence_number in sentence_numbers:
        selected_sentence = testing_dataset[numpy.where(
            testing_dataset[:, 3] == str(each_sentence_number))]
        x, y = pad_data(selected_sentence, selected_sentence[:, [0, 1]],
                        labels_and_equiv_encoding)

        sequence_list.append(x)
        padded_output.append(y)

    padded_input, vocabulary, vocabulary_inv = convert_input_to_vocab(
        sequence_list)
    padded_output = numpy.array(list(chain(*padded_output)))

    testing_words = np.take(testing_dataset, 4, axis=1)
    input_testing_letters = np.take(testing_dataset, 0, axis=1)
    op_testing_letters = np.take(testing_dataset, 5, axis=1)
    sent_num = np.take(testing_dataset, 3, axis=1)
    letters_loc = np.take(testing_dataset, 6, axis=1)
    undiac_word = np.take(testing_dataset, 7, axis=1)

    return padded_input, padded_output, vocabulary, vocabulary_inv, testing_words, input_testing_letters, op_testing_letters,\
           sent_num, letters_loc, undiac_word
Ejemplo n.º 2
0
def load_testing_data():
    dp.establish_db_connection()
    sequence_list = []
    padded_output = []

    training_dataset = DBHelperMethod.load_dataset_by_type("testing")
    sentence_numbers = DBHelperMethod.get_list_of_sentence_numbers_by(
        "testing")
    labels_and_equiv_encoding = dp.get_label_table()

    for each_sentence_number in sentence_numbers:
        selected_sentence = training_dataset[numpy.where(
            training_dataset[:, 3] == str(each_sentence_number))]
        x, y = pad_data(selected_sentence, selected_sentence[:, [0, 1]],
                        labels_and_equiv_encoding)

        sequence_list.append(x)
        padded_output.append(y)

    padded_input, vocabulary, vocabulary_inv = convert_input_to_vocab(
        sequence_list)
    padded_output = numpy.array(list(chain(*padded_output)))

    return padded_input, padded_output, vocabulary, vocabulary_inv
Ejemplo n.º 3
0

if __name__ == "__main__":

    X_test, y_test, vocabulary_test, vocabulary_inv_test, words, ip_letters, op_letters, sentences_num, loc, \
    undiac_word = load_testing_data()
    dictionary = get_all_dic_words()

    model = load_model('weights.010-0.8941.hdf5')
    print(model.summary())
    prediction = model.predict(X_test, verbose=1)
    prediction = prediction[:, 36:49]
    nn_indices = prediction.argmax(axis=1)
    expected_indices = y_test.argmax(axis=1)

    labels = dp.get_label_table()

    nn_labels = labels[nn_indices]
    nn_labels = np.take(nn_labels, 1, axis=1)
    expected_labels = labels[expected_indices]
    expected_labels = np.take(expected_labels, 1, axis=1)

    if len(nn_labels) == len(expected_labels) and len(nn_labels) == len(
            ip_letters):
        pass
    else:
        raise Exception("mismatch in number of elements in the array")

    nn_op_letters = dp.concatenate_char_and_diacritization(
        ip_letters, nn_labels)
    expected_op_letters = op_letters