Esempio n. 1
0
def load_testing_data():
    dp.establish_db_connection()
    sequence_list = []
    padded_output = []
    #sentence_numbers = []
    testing_dataset = DBHelperMethod.load_dataset_by_type("testing")
    #testing_dataset = DBHelperMethod.load_dataset_by_type_and_sentence_number_for_testing_purpose("testing", 3228)
    #sentence_numbers.append(3228)
    sentence_numbers = DBHelperMethod.get_list_of_sentence_numbers_by(
        "testing")
    labels_and_equiv_encoding = dp.get_label_table()

    for each_sentence_number in sentence_numbers:
        selected_sentence = testing_dataset[numpy.where(
            testing_dataset[:, 3] == str(each_sentence_number))]
        x, y = pad_data(selected_sentence, selected_sentence[:, [0, 1]],
                        labels_and_equiv_encoding)

        sequence_list.append(x)
        padded_output.append(y)

    padded_input, vocabulary, vocabulary_inv = convert_input_to_vocab(
        sequence_list)
    padded_output = numpy.array(list(chain(*padded_output)))

    testing_words = np.take(testing_dataset, 4, axis=1)
    input_testing_letters = np.take(testing_dataset, 0, axis=1)
    op_testing_letters = np.take(testing_dataset, 5, axis=1)
    sent_num = np.take(testing_dataset, 3, axis=1)
    letters_loc = np.take(testing_dataset, 6, axis=1)
    undiac_word = np.take(testing_dataset, 7, axis=1)

    return padded_input, padded_output, vocabulary, vocabulary_inv, testing_words, input_testing_letters, op_testing_letters,\
           sent_num, letters_loc, undiac_word
Esempio n. 2
0
def load_testing_data():
    Dp.establish_db_connection()
    testing_dataset = Dp.load_dataset_by_type("testing")

    x = Dp.load_nn_input_dataset_string(testing_dataset[:, [0, 6]])
    y = Dp.load_nn_labels_dataset_string(testing_dataset[:, [0, 1]])

    sent_num, sen_len = Dp.load_nn_seq_lengths(testing_dataset[:, [3]])
    sentences_padded, vocabulary, vocabulary_inv = Dp.pad_sentences(x, sen_len, 4, 10)

    return sentences_padded, y, vocabulary, vocabulary_inv
Esempio n. 3
0
def load_testing_data():
    dp.establish_db_connection()
    testing_dataset = DBHelperMethod.load_dataset_by_type("testing")

    # x = dp.load_nn_input_dataset_string(testing_dataset[:, [0, 6]])
    x = dp.load_nn_input_dataset_string_space_only(testing_dataset[:, [0, 6]])
    y = dp.load_nn_labels_dataset_string(testing_dataset[:, [0, 1]])

    sent_num, sen_len = dp.load_nn_seq_lengths(testing_dataset[:, [3]])
    sentences_padded, vocabulary, vocabulary_inv = dp.pad_sentences1(x, sen_len, req_char_index, window_size)

    return sentences_padded, y, vocabulary, vocabulary_inv
Esempio n. 4
0
def load_training_data():
    dp.establish_db_connection()
    training_dataset = dp.load_dataset_by_type("training")

    # x = dp.load_nn_input_dataset_string(training_dataset[:, [0, 6]])
    x = dp.load_nn_input_dataset_string_space_only(training_dataset[:, [0, 6]])
    y = dp.load_nn_labels_dataset_string(training_dataset[:, [0, 1]])

    sent_num, sen_len = dp.load_nn_seq_lengths(training_dataset[:, [3]])
    sentences_padded, vocabulary, vocabulary_inv = dp.pad_sentences(
        x, sen_len, 4, 10)

    return sentences_padded, y, vocabulary, vocabulary_inv
Esempio n. 5
0
def load_testing_data():
    dp.establish_db_connection()
    training_dataset = DBHelperMethod.load_dataset_by_type("testing")

    x = dp.load_nn_input_dataset_string_space_only(training_dataset[:, [0, 6]])
    y = dp.load_nn_labels_dataset_string(training_dataset[:, [0, 1]])

    sent_num, sen_len = dp.load_nn_seq_lengths(training_dataset[:, [3]])
    sentences_padded, vocabulary, vocabulary_inv = dp.extract_sent_and_pad(
        x, sen_len, window_size)
    padded_output = dp.extract_sent_and_pad_output(y, sen_len, window_size)

    return sentences_padded, padded_output, vocabulary, vocabulary_inv
Esempio n. 6
0
def load_training_data():
    dp.establish_db_connection()
    training_dataset = DBHelperMethod.load_dataset_by_type("training")

    # x = dp.load_nn_input_dataset_string(training_dataset[:, [0, 6]])
    x = dp.load_nn_input_dataset_string_space_only(training_dataset[:, [0, 6]])
    y = dp.load_nn_labels_dataset_string(training_dataset[:, [0, 1]])

    sent_num, sen_len = dp.load_nn_seq_lengths(training_dataset[:, [3]])
    sentences_padded, vocabulary, vocabulary_inv = dp.build_one_to_one_input_data(
        x, sen_len, req_char_index, window_size)

    return numpy.array(sentences_padded), numpy.array(
        y), vocabulary, vocabulary_inv
Esempio n. 7
0
def load_testing_data():
    dp.establish_db_connection()
    training_dataset = DBHelperMethod.load_dataset_by_type("testing")

    x = dp.load_nn_input_dataset_string_space_only(training_dataset[:, [0, 6]])
    y = dp.load_nn_labels_dataset_string(training_dataset[:, [0, 1]])
    vocab, vocab_inv = dp.build_vocab(x)

    sent_num, sen_len = dp.load_nn_seq_lengths(training_dataset[:, [3]])

    input_sentences = sqp.create_window_of_chars(list(x), window_size)
    input_sentences = dp.build_input_data_without_flattening(
        input_sentences, vocab)
    input_sentences = numpy.array(input_sentences[:-1])

    output_labels = sqp.create_window_of_chars(list(y), window_size)
    output_labels = numpy.array(output_labels[:-1])

    return input_sentences, output_labels, vocab, vocab_inv
Esempio n. 8
0
def load_testing_data():
    dp.establish_db_connection()
    testing_dataset = dp.load_testing_dataset()

    x = dp.load_nn_input_dataset_string(testing_dataset[:, [0, 6]])
    y = dp.load_nn_labels_dataset_string(testing_dataset[:, [0, 1]])

    sent_num, sen_len = dp.load_nn_seq_lengths(testing_dataset[:, [3]])
    sentences_padded, vocabulary, vocabulary_inv = dp.pad_sentences(x, sen_len, 4, 10)

    testing_words = np.take(testing_dataset, 4, axis=1)
    input_testing_letters = np.take(testing_dataset, 0, axis=1)
    op_testing_letters = np.take(testing_dataset, 5, axis=1)
    sent_num = np.take(testing_dataset, 3, axis=1)
    letters_loc = np.take(testing_dataset, 6, axis=1)
    undiac_word = np.take(testing_dataset, 7, axis=1)

    return sentences_padded, y, vocabulary, vocabulary_inv, testing_words, input_testing_letters, op_testing_letters,\
           sent_num, letters_loc, undiac_word
Esempio n. 9
0
def load_testing_data():
    dp.establish_db_connection()
    testing_dataset = DBHelperMethod.load_dataset_by_type("testing")
    #testing_dataset = DBHelperMethod.load_dataset_by_type_and_sentence_number_for_testing_purpose("testing", 3062)

    x = dp.load_nn_input_dataset_string_space_only(testing_dataset[:, [0, 6]])
    y = dp.load_nn_labels_dataset_string(testing_dataset[:, [0, 1]])

    sent_num, sen_len = dp.load_nn_seq_lengths(testing_dataset[:, [3]])
    sentences_padded, vocabulary, vocabulary_inv = dp.pad_sentences1(x, sen_len, req_char_index, window_size)

    testing_words = np.take(testing_dataset, 4, axis=1)
    input_testing_letters = np.take(testing_dataset, 0, axis=1)
    op_testing_letters = np.take(testing_dataset, 5, axis=1)
    sent_num = np.take(testing_dataset, 3, axis=1)
    letters_loc = np.take(testing_dataset, 6, axis=1)
    undiac_word = np.take(testing_dataset, 7, axis=1)

    return sentences_padded, y, vocabulary, vocabulary_inv, testing_words, input_testing_letters, op_testing_letters,\
           sent_num, letters_loc, undiac_word
Esempio n. 10
0
def load_testing_data():
    dp.establish_db_connection()
    sequence_list = []
    padded_output = []

    training_dataset = DBHelperMethod.load_dataset_by_type("testing")
    sentence_numbers = DBHelperMethod.get_list_of_sentence_numbers_by(
        "testing")
    labels_and_equiv_encoding = dp.get_label_table()

    for each_sentence_number in sentence_numbers:
        selected_sentence = training_dataset[numpy.where(
            training_dataset[:, 3] == str(each_sentence_number))]
        x, y = pad_data(selected_sentence, selected_sentence[:, [0, 1]],
                        labels_and_equiv_encoding)

        sequence_list.append(x)
        padded_output.append(y)

    padded_input, vocabulary, vocabulary_inv = convert_input_to_vocab(
        sequence_list)
    padded_output = numpy.array(list(chain(*padded_output)))

    return padded_input, padded_output, vocabulary, vocabulary_inv
Esempio n. 11
0
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(64, (3, 3), padding='valid', strides=1, activation='relu'))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Flatten())
    model.add(Dense(512, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(nClasses, activation='softmax'))

    return model

dp.establish_db_connection()

data = dp.load_dataset_table("training")
train_data = dp.load_nn_input_dataset(data[:, [0, 8]])
train_labels_one_hot = dp.load_nn_labels_dataset(data[:, [0, 1]])

data = dp.load_dataset_table("testing")
test_data = dp.load_nn_input_dataset(data[:, [0, 8]])
test_labels_one_hot = dp.load_nn_labels_dataset(data[:, [0, 1]])

model1 = createModel(49, (39,))
batch_size = 256
epochs = 100
model1.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])