Ejemplo n.º 1
0
def predict_ckpt():
    """从检查点导入模型"""
    with tf.Session() as sess:
        checkpoint_file = tf.train.latest_checkpoint(ckpt_path)
        saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file))
        saver.restore(sess, checkpoint_file)

        graph = tf.get_default_graph()
        input_x = graph.get_operation_by_name("input_x").outputs[0]
        keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0]
        probs = graph.get_tensor_by_name("softmaxLayer/probs:0")

        while True:
            text = input("请输入:")
            if text == "exit":
                exit(0)
            text_seq = texts_to_sequences(text, vocab_path, stopword_path)
            pred = sess.run(probs,
                            feed_dict={
                                input_x: list(text_seq),
                                keep_prob: 1.0
                            })
            print("predict values: {}".format(pred[0]))
            print("{}".format("正向" if pred[0][0] > 0.7 else
                              "负向" if pred[0][0] < 0.3 else "中性"))
Ejemplo n.º 2
0
def predict_meme_text(template_id, num_boxes, init_text = ''):
    template_id = str(template_id).zfill(12)
    min_score = 0.1

    final_texts = [{'text': init_text, 'score': 1}]
    finished_texts = []
    for char_count in range(len(init_text), MAX_OUTPUT_LENGTH):
        texts = []
        for i in range(0, len(final_texts)):
            box_index = str(final_texts[i]['text'].count('|'))
            texts.append(template_id + '  ' + box_index + '  ' + final_texts[i]['text'])
        sequences = util.texts_to_sequences(texts, char_to_int)
        data = pad_sequences(sequences, maxlen=SEQUENCE_LENGTH)
        predictions_list = model.predict(data)
        sorted_predictions = []
        for i in range(0, len(predictions_list)):
            for j in range(0, len(predictions_list[i])):
                sorted_predictions.append({
                    'text': final_texts[i]['text'],
                    'next_char': labels[j],
                    'score': predictions_list[i][j] * final_texts[i]['score']
                })

        sorted_predictions = sorted(sorted_predictions, key=lambda p: p['score'], reverse=True)
        top_predictions = []
        top_score = sorted_predictions[0]['score']
        rand_int = random.randint(int(min_score * 1000), 1000)
        for prediction in sorted_predictions:
            # give each prediction a chance of being chosen corresponding to its score
            if prediction['score'] >= rand_int / 1000 * top_score:
            # or swap above line with this one to enable equal probabilities instead
            # if prediction['score'] >= top_score * min_score:
                top_predictions.append(prediction)
        random.shuffle(top_predictions)
        final_texts = []
        for i in range(0, min(BEAM_WIDTH, len(top_predictions)) - len(finished_texts)):
            prediction = top_predictions[i]
            final_texts.append({
                'text': prediction['text'] + prediction['next_char'],
                # normalize all scores around top_score=1 so tiny floats don't disappear due to rounding
                'score': prediction['score'] / top_score
            })
            if prediction['next_char'] == '|' and prediction['text'].count('|') == num_boxes - 1:
                finished_texts.append(final_texts[len(final_texts) - 1])
                final_texts.pop()

        if char_count >= MAX_OUTPUT_LENGTH - 1 or len(final_texts) == 0:
            final_texts = final_texts + finished_texts
            final_texts = sorted(final_texts, key=lambda p: p['score'], reverse=True)
            return final_texts[0]['text']
def index():
    if request.method == "POST":
        config = load_yaml_config("config.yml")
        text = request.form['comments']
        vocab_path = config["data"]["vocab_path"]
        stopword_path = config["data"]["stopword_path"]

        text = texts_to_sequences(text, vocab_path, stopword_path)
        pred = predict(list(text)[0].tolist())
        positive = pred['predictions'][0][0]
        res = "正向" if positive > 0.7 else "负向" if positive < 0.3 else "中性"

        return render_template('index.html', RESULT=res + " " + str(positive))

    return render_template('index.html')
Ejemplo n.º 4
0
def predict_pb():
    """从冻结图导入模型"""
    with tf.Session() as sess:
        tf.saved_model.loader.load(sess, [tag_constants.SERVING],
                                   pb_path + "/1568521090")

        graph = tf.get_default_graph()
        input_x = graph.get_operation_by_name("input_x").outputs[0]
        keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0]
        probs = graph.get_tensor_by_name("softmaxLayer/probs:0")

        while True:
            text = input("请输入:")
            if text == "exit":
                exit(0)
            text_seq = texts_to_sequences(text, vocab_path, stopword_path)
            pred = sess.run(probs,
                            feed_dict={
                                input_x: list(text_seq),
                                keep_prob: 1.0
                            })
            print("predict values: {}".format(pred[0]))
            print("{}".format("正向" if pred[0][0] > 0.7 else
                              "负向" if pred[0][0] < 0.3 else "中性"))
Ejemplo n.º 5
0
        break


print('training text 0: %s' % texts[0])
print('training text 10: %s' % texts[10])
print('training text 1000: %s' % texts[1000])
print('scanning json took %ds' % round(time.time() - t))
util.print_memory()


print('tokenizing %d texts...' % len(texts))
del training_data  # free memory
t = time.time()

char_to_int = util.map_char_to_int(texts)
sequences = util.texts_to_sequences(texts, char_to_int)
del texts  # free memory

print('example sequence 10: ', sequences[10])
print('tokenizing took %ds' % round(time.time() - t))
util.print_memory()


print('saving tokenizer and labels to file...')

# save tokenizer, label indexes, and parameters so they can be used for predicting later
with open(MODEL_PATH + '/params.json', 'w') as handle:
    json.dump({
        'sequence_length': SEQUENCE_LENGTH,
        'embedding_dim': EMBEDDING_DIM,
        'num_rows_used': len(sequences),
Ejemplo n.º 6
0
def predict_meme_text(model_path,
                      template_id,
                      num_boxes,
                      init_text='',
                      model_filename="model.h5",
                      params_filename="params.json",
                      beam_width=1,
                      max_output_length=140):
    """
    Required: 
        - pretrained model
        - params.json: contains information like seq_length, mappings char_to_int. It should be automatically generated after running train.py
    """
    model = load_model(os.path.join(model_path, model_filename))
    params = json.load(open(os.path.join(model_path, params_filename)))
    SEQUENCE_LENGTH = params['sequence_length']
    char_to_int = params['char_to_int']
    labels = {v: k for k, v in params['labels_index'].items()}

    template_id = str(template_id).zfill(12)
    min_score = 0.1

    final_texts = [{'text': init_text, 'score': 1}]
    finished_texts = []
    for char_count in range(len(init_text), max_output_length):
        texts = []
        for i in range(0, len(final_texts)):
            box_index = str(final_texts[i]['text'].count('|'))
            texts.append(template_id + '  ' + box_index + '  ' +
                         final_texts[i]['text'])
        sequences = util.texts_to_sequences(texts, char_to_int)
        data = pad_sequences(sequences, maxlen=SEQUENCE_LENGTH)
        predictions_list = model.predict(data)
        sorted_predictions = []
        for i in range(0, len(predictions_list)):
            for j in range(0, len(predictions_list[i])):
                sorted_predictions.append({
                    'text':
                    final_texts[i]['text'],
                    'next_char':
                    labels[j],
                    'score':
                    predictions_list[i][j] * final_texts[i]['score']
                })

        sorted_predictions = sorted(sorted_predictions,
                                    key=lambda p: p['score'],
                                    reverse=True)
        top_predictions = []
        top_score = sorted_predictions[0]['score']
        rand_int = random.randint(int(min_score * 1000), 1000)
        for prediction in sorted_predictions:
            # give each prediction a chance of being chosen corresponding to its score
            if prediction['score'] >= rand_int / 1000 * top_score:
                # or swap above line with this one to enable equal probabilities instead
                # if prediction['score'] >= top_score * min_score:
                top_predictions.append(prediction)
        random.shuffle(top_predictions)
        final_texts = []
        for i in range(
                0,
                min(beam_width, len(top_predictions)) - len(finished_texts)):
            prediction = top_predictions[i]
            final_texts.append({
                'text':
                prediction['text'] + prediction['next_char'],
                # normalize all scores around top_score=1 so tiny floats don't disappear due to rounding
                'score':
                prediction['score'] / top_score
            })
            if prediction['next_char'] == '|' and prediction['text'].count(
                    '|') == num_boxes - 1:
                finished_texts.append(final_texts[len(final_texts) - 1])
                final_texts.pop()

        if char_count >= max_output_length - 1 or len(final_texts) == 0:
            final_texts = final_texts + finished_texts
            final_texts = sorted(final_texts,
                                 key=lambda p: p['score'],
                                 reverse=True)
            return final_texts[0]['text']