def predict_ckpt(): """从检查点导入模型""" with tf.Session() as sess: checkpoint_file = tf.train.latest_checkpoint(ckpt_path) saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) graph = tf.get_default_graph() input_x = graph.get_operation_by_name("input_x").outputs[0] keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0] probs = graph.get_tensor_by_name("softmaxLayer/probs:0") while True: text = input("请输入:") if text == "exit": exit(0) text_seq = texts_to_sequences(text, vocab_path, stopword_path) pred = sess.run(probs, feed_dict={ input_x: list(text_seq), keep_prob: 1.0 }) print("predict values: {}".format(pred[0])) print("{}".format("正向" if pred[0][0] > 0.7 else "负向" if pred[0][0] < 0.3 else "中性"))
def predict_meme_text(template_id, num_boxes, init_text = ''): template_id = str(template_id).zfill(12) min_score = 0.1 final_texts = [{'text': init_text, 'score': 1}] finished_texts = [] for char_count in range(len(init_text), MAX_OUTPUT_LENGTH): texts = [] for i in range(0, len(final_texts)): box_index = str(final_texts[i]['text'].count('|')) texts.append(template_id + ' ' + box_index + ' ' + final_texts[i]['text']) sequences = util.texts_to_sequences(texts, char_to_int) data = pad_sequences(sequences, maxlen=SEQUENCE_LENGTH) predictions_list = model.predict(data) sorted_predictions = [] for i in range(0, len(predictions_list)): for j in range(0, len(predictions_list[i])): sorted_predictions.append({ 'text': final_texts[i]['text'], 'next_char': labels[j], 'score': predictions_list[i][j] * final_texts[i]['score'] }) sorted_predictions = sorted(sorted_predictions, key=lambda p: p['score'], reverse=True) top_predictions = [] top_score = sorted_predictions[0]['score'] rand_int = random.randint(int(min_score * 1000), 1000) for prediction in sorted_predictions: # give each prediction a chance of being chosen corresponding to its score if prediction['score'] >= rand_int / 1000 * top_score: # or swap above line with this one to enable equal probabilities instead # if prediction['score'] >= top_score * min_score: top_predictions.append(prediction) random.shuffle(top_predictions) final_texts = [] for i in range(0, min(BEAM_WIDTH, len(top_predictions)) - len(finished_texts)): prediction = top_predictions[i] final_texts.append({ 'text': prediction['text'] + prediction['next_char'], # normalize all scores around top_score=1 so tiny floats don't disappear due to rounding 'score': prediction['score'] / top_score }) if prediction['next_char'] == '|' and prediction['text'].count('|') == num_boxes - 1: finished_texts.append(final_texts[len(final_texts) - 1]) final_texts.pop() if char_count >= MAX_OUTPUT_LENGTH - 1 or len(final_texts) == 0: final_texts = final_texts + finished_texts final_texts = sorted(final_texts, key=lambda p: p['score'], reverse=True) return final_texts[0]['text']
def index(): if request.method == "POST": config = load_yaml_config("config.yml") text = request.form['comments'] vocab_path = config["data"]["vocab_path"] stopword_path = config["data"]["stopword_path"] text = texts_to_sequences(text, vocab_path, stopword_path) pred = predict(list(text)[0].tolist()) positive = pred['predictions'][0][0] res = "正向" if positive > 0.7 else "负向" if positive < 0.3 else "中性" return render_template('index.html', RESULT=res + " " + str(positive)) return render_template('index.html')
def predict_pb(): """从冻结图导入模型""" with tf.Session() as sess: tf.saved_model.loader.load(sess, [tag_constants.SERVING], pb_path + "/1568521090") graph = tf.get_default_graph() input_x = graph.get_operation_by_name("input_x").outputs[0] keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0] probs = graph.get_tensor_by_name("softmaxLayer/probs:0") while True: text = input("请输入:") if text == "exit": exit(0) text_seq = texts_to_sequences(text, vocab_path, stopword_path) pred = sess.run(probs, feed_dict={ input_x: list(text_seq), keep_prob: 1.0 }) print("predict values: {}".format(pred[0])) print("{}".format("正向" if pred[0][0] > 0.7 else "负向" if pred[0][0] < 0.3 else "中性"))
break print('training text 0: %s' % texts[0]) print('training text 10: %s' % texts[10]) print('training text 1000: %s' % texts[1000]) print('scanning json took %ds' % round(time.time() - t)) util.print_memory() print('tokenizing %d texts...' % len(texts)) del training_data # free memory t = time.time() char_to_int = util.map_char_to_int(texts) sequences = util.texts_to_sequences(texts, char_to_int) del texts # free memory print('example sequence 10: ', sequences[10]) print('tokenizing took %ds' % round(time.time() - t)) util.print_memory() print('saving tokenizer and labels to file...') # save tokenizer, label indexes, and parameters so they can be used for predicting later with open(MODEL_PATH + '/params.json', 'w') as handle: json.dump({ 'sequence_length': SEQUENCE_LENGTH, 'embedding_dim': EMBEDDING_DIM, 'num_rows_used': len(sequences),
def predict_meme_text(model_path, template_id, num_boxes, init_text='', model_filename="model.h5", params_filename="params.json", beam_width=1, max_output_length=140): """ Required: - pretrained model - params.json: contains information like seq_length, mappings char_to_int. It should be automatically generated after running train.py """ model = load_model(os.path.join(model_path, model_filename)) params = json.load(open(os.path.join(model_path, params_filename))) SEQUENCE_LENGTH = params['sequence_length'] char_to_int = params['char_to_int'] labels = {v: k for k, v in params['labels_index'].items()} template_id = str(template_id).zfill(12) min_score = 0.1 final_texts = [{'text': init_text, 'score': 1}] finished_texts = [] for char_count in range(len(init_text), max_output_length): texts = [] for i in range(0, len(final_texts)): box_index = str(final_texts[i]['text'].count('|')) texts.append(template_id + ' ' + box_index + ' ' + final_texts[i]['text']) sequences = util.texts_to_sequences(texts, char_to_int) data = pad_sequences(sequences, maxlen=SEQUENCE_LENGTH) predictions_list = model.predict(data) sorted_predictions = [] for i in range(0, len(predictions_list)): for j in range(0, len(predictions_list[i])): sorted_predictions.append({ 'text': final_texts[i]['text'], 'next_char': labels[j], 'score': predictions_list[i][j] * final_texts[i]['score'] }) sorted_predictions = sorted(sorted_predictions, key=lambda p: p['score'], reverse=True) top_predictions = [] top_score = sorted_predictions[0]['score'] rand_int = random.randint(int(min_score * 1000), 1000) for prediction in sorted_predictions: # give each prediction a chance of being chosen corresponding to its score if prediction['score'] >= rand_int / 1000 * top_score: # or swap above line with this one to enable equal probabilities instead # if prediction['score'] >= top_score * min_score: top_predictions.append(prediction) random.shuffle(top_predictions) final_texts = [] for i in range( 0, min(beam_width, len(top_predictions)) - len(finished_texts)): prediction = top_predictions[i] final_texts.append({ 'text': prediction['text'] + prediction['next_char'], # normalize all scores around top_score=1 so tiny floats don't disappear due to rounding 'score': prediction['score'] / top_score }) if prediction['next_char'] == '|' and prediction['text'].count( '|') == num_boxes - 1: finished_texts.append(final_texts[len(final_texts) - 1]) final_texts.pop() if char_count >= max_output_length - 1 or len(final_texts) == 0: final_texts = final_texts + finished_texts final_texts = sorted(final_texts, key=lambda p: p['score'], reverse=True) return final_texts[0]['text']