def load_testing_data(): dp.establish_db_connection() testing_dataset = DBHelperMethod.load_dataset_by_type("testing") # x = dp.load_nn_input_dataset_string(testing_dataset[:, [0, 6]]) x = dp.load_nn_input_dataset_string_space_only(testing_dataset[:, [0, 6]]) y = dp.load_nn_labels_dataset_string(testing_dataset[:, [0, 1]]) sent_num, sen_len = dp.load_nn_seq_lengths(testing_dataset[:, [3]]) sentences_padded, vocabulary, vocabulary_inv = dp.pad_sentences1( x, sen_len, req_char_index, window_size) return sentences_padded, y, vocabulary, vocabulary_inv
def load_training_data(): dp.establish_db_connection() training_dataset = DBHelperMethod.load_dataset_by_type("training") # x = dp.load_nn_input_dataset_string(training_dataset[:, [0, 6]]) x = dp.load_nn_input_dataset_string_space_only(training_dataset[:, [0, 6]]) y = dp.load_nn_labels_dataset_string(training_dataset[:, [0, 1]]) sent_num, sen_len = dp.load_nn_seq_lengths(training_dataset[:, [3]]) sentences_padded, vocabulary, vocabulary_inv = dp.build_one_to_one_input_data( x, sen_len, req_char_index, window_size) return numpy.array(sentences_padded), numpy.array( y), vocabulary, vocabulary_inv
def load_testing_data(): dp.establish_db_connection() training_dataset = DBHelperMethod.load_dataset_by_type("testing") x = dp.load_nn_input_dataset_string_space_only(training_dataset[:, [0, 6]]) y = dp.load_nn_labels_dataset_string(training_dataset[:, [0, 1]]) vocab, vocab_inv = dp.build_vocab(x) sent_num, sen_len = dp.load_nn_seq_lengths(training_dataset[:, [3]]) input_sentences = sqp.create_window_of_chars(list(x), window_size) input_sentences = dp.build_input_data_without_flattening( input_sentences, vocab) input_sentences = numpy.array(input_sentences[:-1]) output_labels = sqp.create_window_of_chars(list(y), window_size) output_labels = numpy.array(output_labels[:-1]) return input_sentences, output_labels, vocab, vocab_inv
def load_testing_data(): dp.establish_db_connection() testing_dataset = DBHelperMethod.load_dataset_by_type("testing") #testing_dataset = DBHelperMethod.load_dataset_by_type_and_sentence_number_for_testing_purpose("testing", 3062) x = dp.load_nn_input_dataset_string_space_only(testing_dataset[:, [0, 6]]) y = dp.load_nn_labels_dataset_string(testing_dataset[:, [0, 1]]) sent_num, sen_len = dp.load_nn_seq_lengths(testing_dataset[:, [3]]) sentences_padded, vocabulary, vocabulary_inv = dp.pad_sentences1(x, sen_len, req_char_index, window_size) testing_words = np.take(testing_dataset, 4, axis=1) input_testing_letters = np.take(testing_dataset, 0, axis=1) op_testing_letters = np.take(testing_dataset, 5, axis=1) sent_num = np.take(testing_dataset, 3, axis=1) letters_loc = np.take(testing_dataset, 6, axis=1) undiac_word = np.take(testing_dataset, 7, axis=1) return sentences_padded, y, vocabulary, vocabulary_inv, testing_words, input_testing_letters, op_testing_letters,\ sent_num, letters_loc, undiac_word