def predict(list_docs, experiment_name, step='', batch_size=64): logging.info('*' * 50) logging.info('RUNNING PREDICT FOR MODEL: %s', experiment_name) if step == '': interesting_checkpoint = tf.train.latest_checkpoint(os.path.join(CURRENT_DIR, '..', 'checkpoint', experiment_name)) else: interesting_checkpoint = os.path.join(CURRENT_DIR, '..', 'checkpoint', experiment_name, 'step-{}'.format(step)) dataset_manager = DatasetManager() dataset_manager.boot() list_preprocessed_sentences = preprocessor.preprocess(list_docs) list_vecs = dataset_manager.text2vec.doc_to_vec(list_preprocessed_sentences) print(dataset_manager.text2vec.vec_to_doc(list_vecs)) list_vecs = dataset_manager.equalize_vector_length_to_np(list_vectors=list_vecs, max_length=model_v6.SENTENCE_LENGTH_MAX) with tf.Graph().as_default() as gr: logging.info('-- Restoring graph for model: %s', interesting_checkpoint) saver = tf.train.import_meta_graph('{}.meta'.format(interesting_checkpoint)) logging.info('-- Restored graph for model named: %s', interesting_checkpoint) with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)).as_default() as sess: saver.restore(sess=sess, save_path=interesting_checkpoint) logging.info('-- Restored variables for model named: %s', interesting_checkpoint) list_predictions = [] num_steps = len(list_vecs) // batch_size logging.info('There will be %s steps', num_steps + 1) for i in range(num_steps + 1): tf_input = gr.get_tensor_by_name('input/tf_input:0') tf_predictions = gr.get_tensor_by_name('prediction:0') prediction = sess.run(tf_predictions, feed_dict={ tf_input: list_vecs[i*batch_size: (i+1)*batch_size] }) list_predictions.extend([dataset_manager.LABEL_UNMAPPING[p] for p in prediction]) return list_predictions
def predict(list_sentences, output_file, experiment_name, step='', list_labels=[]): dataset_manager = DatasetManager() dataset_manager.boot() list_preprocessed_sentences = preprocessor.preprocess(list_sentences) list_vecs = dataset_manager.text2vec.doc_to_vec( list_preprocessed_sentences) list_vecs = dataset_manager.equalize_vector_length_to_np( list_vectors=list_vecs, max_length=model_v1.SENTENCE_LENGTH_MAX) list_labels = dataset_manager.convert_labels_to_np(list_labels) if step == '': interesting_checkpoint = tf.train.latest_checkpoint( os.path.join(CURRENT_DIR, '..', 'checkpoint', experiment_name)) else: interesting_checkpoint = os.path.join(CURRENT_DIR, '..', 'checkpoint', experiment_name, 'step-{}'.format(step)) with tf.Graph().as_default() as gr: logging.info('-- Restoring graph for model: %s', interesting_checkpoint) saver = tf.train.import_meta_graph( '{}.meta'.format(interesting_checkpoint)) logging.info('-- Restored graph for model named: %s', interesting_checkpoint) with tf.Session(config=tf.ConfigProto( allow_soft_placement=True)).as_default() as sess: saver.restore(sess=sess, save_path=interesting_checkpoint) logging.info('-- Restored variables for model named: %s', interesting_checkpoint) tf_input = gr.get_tensor_by_name('input/tf_input:0') tf_predictions = gr.get_tensor_by_name('prediction:0') prediction = sess.run(tf_predictions, feed_dict={tf_input: list_vecs}) if len(list_labels) != 0: logging.info('-- Report for model: %s', experiment_name) logging.info( classification_report(y_true=list_labels, y_pred=prediction)) result_dict = dict() result_dict['sentence'] = list_sentences result_dict['pre-processed'] = list_preprocessed_sentences result_dict[ 'pre-processed_recover'] = dataset_manager.text2vec.vec_to_doc( list_vecs) result_dict['predict'] = prediction if len(list_labels) != 0: result_dict['label'] = list_labels pd.DataFrame(result_dict).to_csv(output_file, index=None) logging.debug('Saved result at %s', output_file)