def load_testing_data(): Dp.establish_db_connection() testing_dataset = Dp.load_dataset_by_type("testing") x = Dp.load_nn_input_dataset_string(testing_dataset[:, [0, 6]]) y = Dp.load_nn_labels_dataset_string(testing_dataset[:, [0, 1]]) sent_num, sen_len = Dp.load_nn_seq_lengths(testing_dataset[:, [3]]) sentences_padded, vocabulary, vocabulary_inv = Dp.pad_sentences(x, sen_len, 4, 10) return sentences_padded, y, vocabulary, vocabulary_inv
def get_intent(sentence): x_ = load_test_data(sentence) # note x_ = data_helper.pad_sentences(x_, forced_sequence_length=param_length) x_ = map_word_to_index(x_, words_index) x_test = np.asarray(x_) predict_labels = [] batch_predictions = predict_step(x_test)[0] for batch_prediction in batch_predictions: predict_labels.append(labels[batch_prediction]) return (predict_labels)
def load_training_data(): dp.establish_db_connection() training_dataset = DBHelperMethod.load_dataset_by_type("training") # x = dp.load_nn_input_dataset_string(training_dataset[:, [0, 6]]) x = dp.load_nn_input_dataset_string_space_only(training_dataset[:, [0, 6]]) y = dp.load_nn_labels_dataset_string(training_dataset[:, [0, 1]]) sent_num, sen_len = dp.load_nn_seq_lengths(training_dataset[:, [3]]) sentences_padded, vocabulary, vocabulary_inv = dp.pad_sentences( x, sen_len, 4, 10) return sentences_padded, y, vocabulary, vocabulary_inv
def load_testing_data(): dp.establish_db_connection() testing_dataset = dp.load_testing_dataset() x = dp.load_nn_input_dataset_string(testing_dataset[:, [0, 6]]) y = dp.load_nn_labels_dataset_string(testing_dataset[:, [0, 1]]) sent_num, sen_len = dp.load_nn_seq_lengths(testing_dataset[:, [3]]) sentences_padded, vocabulary, vocabulary_inv = dp.pad_sentences(x, sen_len, 4, 10) testing_words = np.take(testing_dataset, 4, axis=1) input_testing_letters = np.take(testing_dataset, 0, axis=1) op_testing_letters = np.take(testing_dataset, 5, axis=1) sent_num = np.take(testing_dataset, 3, axis=1) letters_loc = np.take(testing_dataset, 6, axis=1) undiac_word = np.take(testing_dataset, 7, axis=1) return sentences_padded, y, vocabulary, vocabulary_inv, testing_words, input_testing_letters, op_testing_letters,\ sent_num, letters_loc, undiac_word
def predict_unseen_data(): trained_dir = sys.argv[1] if not trained_dir.endswith('/'): trained_dir += '/' test_file = sys.argv[2] params, words_index, labels, embedding_mat = load_trained_params( trained_dir) x_, y_, df = load_test_data(test_file, labels) x_ = data_helper.pad_sentences( x_, forced_sequence_length=params['sequence_length']) x_ = map_word_to_index(x_, words_index) x_test, y_test = np.asarray(x_), None if y_ is not None: y_test = np.asarray(y_) timestamp = trained_dir.split('/')[-2].split('_')[-1] predicted_dir = './predicted_results_' + timestamp + '/' if os.path.exists(predicted_dir): shutil.rmtree(predicted_dir) os.makedirs(predicted_dir) with tf.Graph().as_default(): session_conf = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) sess = tf.Session(config=session_conf) with sess.as_default(): cnn_rnn = TextCNNRNN(embedding_mat=embedding_mat, non_static=params['non_static'], hidden_unit=params['hidden_unit'], sequence_length=len(x_test[0]), max_pool_size=params['max_pool_size'], filter_sizes=map( int, params['filter_sizes'].split(",")), num_filters=params['num_filters'], num_classes=len(labels), embedding_size=params['embedding_dim'], l2_reg_lambda=params['l2_reg_lambda']) def real_len(batches): return [ np.ceil( np.argmin(batch + [0]) * 1.0 / params['max_pool_size']) for batch in batches ] def predict_step(x_batch): feed_dict = { cnn_rnn.input_x: x_batch, cnn_rnn.dropout_keep_prob: 1.0, cnn_rnn.batch_size: len(x_batch), cnn_rnn.pad: np.zeros([len(x_batch), 1, params['embedding_dim'], 1]), cnn_rnn.real_len: real_len(x_batch), } predictions = sess.run([cnn_rnn.predictions], feed_dict) return predictions checkpoint_file = trained_dir + 'best_model.ckpt' saver = tf.train.Saver(tf.all_variables()) saver = tf.train.import_meta_graph( "{}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) logging.critical('{} has been loaded'.format(checkpoint_file)) batches = data_helper.batch_iter(list(x_test), params['batch_size'], 1, shuffle=False) predictions, predict_labels = [], [] for x_batch in batches: batch_predictions = predict_step(x_batch)[0] for batch_prediction in batch_predictions: predictions.append(batch_prediction) predict_labels.append(labels[batch_prediction]) df['PREDICTED'] = predict_labels columns = sorted(df.columns, reverse=True) df.to_csv(predicted_dir + 'predictions_all.csv', index=False, columns=columns, sep='|') if y_test is not None: y_test = np.array(np.argmax(y_test, axis=1)) accuracy = sum(np.array(predictions) == y_test) / float( len(y_test)) logging.critical( 'The prediction accuracy is: {}'.format(accuracy)) logging.critical( 'Prediction is complete, all files have been saved: {}'.format( predicted_dir))
x_ = data_helper.pad_sentences(x_, forced_sequence_length=param_length) x_ = map_word_to_index(x_, words_index) x_test = np.asarray(x_) predict_labels = [] batch_predictions = predict_step(x_test)[0] for batch_prediction in batch_predictions: predict_labels.append(labels[batch_prediction]) return (predict_labels) if __name__ == '__main__': print ('please enter \"exit\" to exit.') logging.critical('The maximum length is {}'.format(param_length)) while (True): x_ = load_test_data() if x_ == -1: break # note x_ = data_helper.pad_sentences(x_, forced_sequence_length=param_length) x_ = map_word_to_index(x_, words_index) x_test = np.asarray(x_) predict_labels = [] batch_predictions = predict_step(x_test)[0] for batch_prediction in batch_predictions: predict_labels.append(labels[batch_prediction]) print ('The intent is [{}]'.format(predict_labels[0]))
def predict(): #trained_dir = sys.argv[1] trained_dir = './trained_results_1575177514/' #if not trained_dir.endswith('/'): # trained_dir += '/' #test_file = sys.argv[2] if request.method == 'POST': file = request.files['ReceivedFile'] logging.critical('Received Filename from App: {}'.format(file)) if file and allowed_file(file.filename): filename = secure_filename(file.filename) file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename)) test_file = './uploads/'+filename df = pd.read_csv(test_file) text = df[['Descript']] params, words_index, labels, embedding_mat = load_trained_params(trained_dir) flag = 0 x_, y_, df = load_test_data(test_file, labels,flag) x_ = data_helper.pad_sentences(x_, forced_sequence_length=params['sequence_length']) x_ = map_word_to_index(x_, words_index) else: #text = request.json["text"] text =request.form["query"] params, words_index, labels, embedding_mat = load_trained_params(trained_dir) flag = 1 x_, y_, df = load_test_data(text, labels,flag) x_ = data_helper.pad_sentences(x_, forced_sequence_length=params['sequence_length']) x_ = map_word_to_index(x_, words_index) x_test, y_test = np.asarray(x_), None if y_ is not None: y_test = np.asarray(y_) timestamp = trained_dir.split('/')[-2].split('_')[-1] predicted_dir = './predicted_results_' + timestamp + '/' if os.path.exists(predicted_dir): shutil.rmtree(predicted_dir) os.makedirs(predicted_dir) with tf.Graph().as_default(): session_conf = tf.compat.v1.ConfigProto(allow_soft_placement=True, log_device_placement=False) sess = tf.compat.v1.Session(config=session_conf) with sess.as_default(): cnn_rnn = TextCNNRNN( embedding_mat = embedding_mat, non_static = params['non_static'], hidden_unit = params['hidden_unit'], sequence_length = len(x_test[0]), max_pool_size = params['max_pool_size'], filter_sizes = map(int, params['filter_sizes'].split(",")), num_filters = params['num_filters'], num_classes = len(labels), embedding_size = params['embedding_dim'], l2_reg_lambda = params['l2_reg_lambda']) def real_len(batches): return [np.ceil(np.argmin(batch + [0]) * 1.0 / params['max_pool_size']) for batch in batches] def predict_step(x_batch): feed_dict = { cnn_rnn.input_x: x_batch, cnn_rnn.dropout_keep_prob: 1.0, cnn_rnn.batch_size: len(x_batch), cnn_rnn.pad: np.zeros([len(x_batch), 1, params['embedding_dim'], 1]), cnn_rnn.real_len: real_len(x_batch), } predictions = sess.run([cnn_rnn.predictions], feed_dict) return predictions checkpoint_file = trained_dir + 'best_model.ckpt' saver = tf.compat.v1.train.Saver(tf.compat.v1.all_variables()) saver = tf.compat.v1.train.import_meta_graph("{}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) logging.critical('{} has been loaded'.format(checkpoint_file)) batches = data_helper.batch_iter(list(x_test), params['batch_size'], 1, shuffle=False) predictions, predict_labels = [], [] for x_batch in batches: batch_predictions = predict_step(x_batch)[0] for batch_prediction in batch_predictions: predictions.append(batch_prediction) predict_labels.append(labels[batch_prediction]) logging.critical('Prediction is complete Class belongs to: {}'.format(predict_labels[0])) #if os.path.exists(test_file): # os.remove(test_file) return render_template('results.html',prediction = predict_labels[0],name =text)
def test_data(): trained_dir = sys.argv[1] if not trained_dir.endswith('/'): trained_dir += '/' test_file = sys.argv[2] params, words_index, labels, embedding_mat = load_trained_params( trained_dir) x_, y_, df = load_test_data(test_file, labels) x_ = data_helper.pad_sentences( x_, forced_sequence_length=params['sequence_length']) x_ = map_word_to_index(x_, words_index) x_test, y_test = np.asarray(x_), None if y_ is not None: y_test = np.asarray(y_) graph = tf.Graph() with graph.as_default(): session_conf = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) sess = tf.Session(config=session_conf) with sess.as_default(): # logging.info(params) cnn_rnn = TextCNNRNN(embedding_mat=embedding_mat, non_static=params['non_static'], hidden_unit=params['hidden_unit'], sequence_length=len(x_test[0]), max_pool_size=params['max_pool_size'], filter_sizes=map( int, params['filter_sizes'].split(",")), num_filters=params['num_filters'], num_classes=len(labels), embedding_size=params['embedding_dim'], l2_reg_lambda=params['l2_reg_lambda']) def real_len(batches): return [ np.ceil( np.argmin(batch + [0]) * 1.0 / params['max_pool_size']) for batch in batches ] def dev_step(x_batch, y_batch): feed_dict = { cnn_rnn.input_x: x_batch, cnn_rnn.input_y: y_batch, cnn_rnn.dropout_keep_prob: 1.0, cnn_rnn.batch_size: len(x_batch), cnn_rnn.pad: np.zeros([len(x_batch), 1, params['embedding_dim'], 1]), cnn_rnn.real_len: real_len(x_batch), } loss, num_correct, predictions = sess.run( [cnn_rnn.loss, cnn_rnn.num_correct, cnn_rnn.predictions], feed_dict) return loss, num_correct, predictions checkpoint_file = trained_dir + 'model-4' saver = tf.train.Saver(tf.global_variables()) logging.info(checkpoint_file) saver.restore(sess, checkpoint_file) logging.critical('{} has been loaded'.format(checkpoint_file)) total_test_correct = 0 loss, num_test_correct, predictions = dev_step(x_test, y_test) logging.info(num_test_correct) logging.info(predictions) total_test_correct += int(num_test_correct) logging.critical('Accuracy on test set: {}'.format( float(total_test_correct) / len(y_test)))
def predict_raw_data(argv): ################################################################### # Make sure to enter the trained_dir you want to load # # # # AT predicted_dir, there is predict_labels.txt # # which store the prediction result of test file # # # ################################################################### in_file = '' out_file = '' try: opts, args = getopt.getopt( argv, "h:t:i:o:", ["trained_dir=", "in_filepath=", "out_filepath="]) except getopt.GetoptError: print( "python main.py -i <in_filepath> -o <out_filepath> -t <trained_dir>" ) sys.exit(2) trained_dir = './trained_results/' for opt, arg in opts: if opt == '-h': print("python main.py -i <in_filepath> -o <out_filepath>") sys.exit() elif opt in ("-i", "--in_filepath"): in_file = arg elif opt in ("-o", "--out_filepath"): out_file = arg elif opt in ("-t", "--trained_dir"): trained_dir = arg params, words_index, labels, embedding_mat = load_trained_params( trained_dir) original_x, x_, y_ = load_test_data(in_file) x_ = data_helper.pad_sentences( x_, forced_sequence_length=params['sequence_length']) x_ = map_word_to_index(x_, words_index) x_test, y_test = np.asarray(x_), None if y_ is not None: y_test = np.asarray(y_) with tf.Graph().as_default(): session_conf = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) sess = tf.Session(config=session_conf) with sess.as_default(): cnn_rnn = TextCNNRNN(embedding_mat=embedding_mat, non_static=params['non_static'], hidden_unit=params['hidden_unit'], sequence_length=len(x_test[0]), max_pool_size=params['max_pool_size'], filter_sizes=map( int, params['filter_sizes'].split(",")), num_filters=params['num_filters'], num_classes=len(labels), embedding_size=params['embedding_dim'], l2_reg_lambda=params['l2_reg_lambda']) def real_len(batches): return [ np.ceil( np.argmin(batch + [0]) * 1.0 / params['max_pool_size']) for batch in batches ] def predict_step(x_batch): feed_dict = { cnn_rnn.input_x: x_batch, cnn_rnn.dropout_keep_prob: 1.0, cnn_rnn.batch_size: len(x_batch), cnn_rnn.pad: np.zeros([len(x_batch), 1, params['embedding_dim'], 1]), cnn_rnn.real_len: real_len(x_batch), } predictions, scores = sess.run( [cnn_rnn.predictions, cnn_rnn.scores], feed_dict) return predictions, scores checkpoint_file = trained_dir + 'best_model.ckpt' saver = tf.train.Saver(tf.global_variables()) saver = tf.train.import_meta_graph( "{}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) logging.critical('{} has been loaded'.format(checkpoint_file)) batches = data_helper.batch_iter(list(x_test), params['batch_size'], 1, shuffle=False) predictions, predict_labels, predict_probs = [], [], [] for x_test_batch in batches: batch_predictions = predict_step(x_test_batch)[0] batch_prop_preds = predict_step(x_test_batch)[1] for batch_prediction, batch_prop_pred in zip( batch_predictions, batch_prop_preds): predictions.append(batch_prediction) predict_labels.append(labels[batch_prediction]) predict_probs.append(batch_prop_pred[batch_prediction]) with open(out_file, "w", encoding='utf-8') as f: for original_x_, predict_label, predict_prob in zip( original_x, predict_labels, predict_probs): print_prob = round(predict_prob * 100, 2) f.write( str(original_x_) + '\t' + str(predict_label) + '\t' + str(print_prob) + '\n') if y_test is not None: y_test = np.array(np.argmax(y_test, axis=1)) accuracy = sum(np.array(predictions) == y_test) / float( len(y_test)) logging.critical( 'The prediction accuracy is: {}'.format(accuracy)) logging.critical('Prediction is complete')
def predict_unseen_data(): test_x = [] #test_input = os.environ.get('TEST_X', None) test_input = "What time is the class" if test_input is None: logging.critical(' TEST_X is not found ') sys.exit() test_x.append(test_input.split(' ')) trained_dir = "trained_results_1512435063" #os.environ.get('TRAINED_RESULTS', None) if trained_dir is None: logging.critical(' TRAINED_RESULTS is not found ') sys.exit() if not trained_dir.endswith('/'): trained_dir += '/' x_ = data_helper.pad_sentences(test_x, forced_sequence_length=params['sequence_length']) x_ = map_word_to_index(x_, words_index) x_test, y_test = np.asarray(x_), None timestamp = trained_dir.split('/')[-2].split('_')[-1] with tf.Graph().as_default(): session_conf = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) sess = tf.Session(config=session_conf) with sess.as_default(): cnn_rnn = TextCNNRNN( embedding_mat = embedding_mat, non_static = params['non_static'], hidden_unit = params['hidden_unit'], sequence_length = len(x_test[0]), max_pool_size = params['max_pool_size'], filter_sizes = map(int, params['filter_sizes'].split(",")), num_filters = params['num_filters'], num_classes = len(labels), embedding_size = params['embedding_dim'], l2_reg_lambda = params['l2_reg_lambda']) def real_len(batches): return [np.ceil(np.argmin(batch + [0]) * 1.0 / params['max_pool_size']) for batch in batches] def predict_step(x_batch): feed_dict = { cnn_rnn.input_x: x_batch, cnn_rnn.dropout_keep_prob: 1.0, cnn_rnn.batch_size: len(x_batch), cnn_rnn.pad: np.zeros([len(x_batch), 1, params['embedding_dim'], 1]), cnn_rnn.real_len: real_len(x_batch), } scores,predictions = sess.run([cnn_rnn.scores,cnn_rnn.predictions], feed_dict) return scores,predictions checkpoint_file = trained_dir + 'best_model.ckpt' saver = tf.train.Saver(tf.all_variables()) saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) logging.critical('{} has been loaded'.format(checkpoint_file)) batches = data_helper.batch_iter(list(x_test), params['batch_size'], 1, shuffle=False) response="" predictions, predict_labels = [], [] for x_batch in batches: scores,batch_predictions = predict_step(x_batch) print scores score=normalize(scores[0]) print score print score.max() mscore=score.max() range_perc = 0.01 max_range = mscore + (mscore * range_perc) min_range = mscore - (mscore * range_perc) for s in score: if(s > min_range and s < max_range) max_score = score.max() if(max_score>0.1): print scores for batch_prediction in batch_predictions: predictions.append(batch_prediction) predict_labels.append(labels[batch_prediction]) response= predict_labels[0] else: response="Fall back!" sys.stdout.write(response) print response os.environ['PRED_LABEL'] = response