def evaluate_score(): evaluator.init() text_max_words = evaluator.all_distinct_texts.shape[1] print('text_max_words:', text_max_words) predictor = Predictor(FLAGS.model_dir, FLAGS.key, FLAGS.lkey, FLAGS.rkey, index=0) exact_predictor = None if FLAGS.use_exact_predictor: exact_predictor = Predictor(FLAGS.exact_model_dir, FLAGS.exact_key, FLAGS.exact_lkey, FLAGS.exact_rkey, index=-1) print(tf.get_collection(FLAGS.key)) seed = FLAGS.np_seed if FLAGS.np_seed else None index = evaluator.random_predict_index(seed=seed) evaluator.evaluate_scores(predictor, random=True, index=index) if exact_predictor is not None: ##well for seq2seq did experiment and for makeup title2name score(average time per step) is much better then ori_score ##so just juse score will be fine #exact_predictor._key = 'ori_score' #evaluator.evaluate_scores(predictor, random=True, exact_predictor=exact_predictor, index=index) #exact_predictor._key = 'score' evaluator.evaluate_scores(predictor, random=True, exact_predictor=exact_predictor, exact_ratio=FLAGS.exact_ratio, index=index)
def evaluate_score(): evaluator.init() text_max_words = evaluator.all_distinct_texts.shape[1] print('text_max_words:', text_max_words) predictor = algos_factory.gen_predictor(FLAGS.algo) predictor.init_predict(text_max_words) predictor.load(FLAGS.model_dir) evaluator.evaluate_scores(predictor)
def main(_): if not FLAGS.pre_calc_image_feature: melt.apps.image_processing.init() evaluator.init() logging.init(logtostderr=True, logtofile=False) global_scope = '' if FLAGS.add_global_scope: global_scope = FLAGS.global_scope if FLAGS.global_scope else FLAGS.algo with tf.variable_scope(global_scope): evaluate_score()
def gen_eval_generated_texts_ops(input_app, input_results, predictor, eval_scores, eval_neg_text=None, eval_neg_text_str=None): #need distinct_texts.npy distinct_text_strs.npy evaluator.init() evaluate_image_name, evaluate_image_feature, evaluate_text, evaluate_text_str = input_results[ input_app.fixed_input_valid_name] num_evaluate_examples = input_app.num_evaluate_examples pos_scores = eval_scores[:num_evaluate_examples, 0] if eval_neg_text is not None: neg_scores = eval_scores[:num_evaluate_examples, 1] #eval neg text strs evaluate_neg_text_str = eval_neg_text_str[:num_evaluate_examples, 0] #eval neg text ids evaluate_neg_text = eval_neg_text[:num_evaluate_examples, 0, :] generated_texts, generated_texts_score = predictor.build_predict_text_graph( evaluate_image_feature, decode_method=FLAGS.seq_decode_method, beam_size=FLAGS.beam_size, convert_unk=False) if FLAGS.show_beam_search: generated_texts_beam, generated_texts_score_beam = predictor.build_predict_text_graph( evaluate_image_feature, decode_method=SeqDecodeMethod.beam, #beam search(ingraph) beam_size=FLAGS.beam_size, convert_unk=False) else: generated_texts_beam, generated_texts_score_beam = generated_texts, generated_texts_score eval_ops = [evaluate_image_name, evaluate_text_str, evaluate_text, \ generated_texts, generated_texts_beam, generated_texts_score, generated_texts_score_beam, \ pos_scores] if eval_neg_text is not None: eval_ops += [neg_scores, evaluate_neg_text_str, evaluate_neg_text] print('eval_ops:') for eval_op in eval_ops: print(eval_op) return eval_ops
def gen_eval_show_ops(input_app, input_results, predictor, eval_scores, eval_neg_text, eval_neg_text_str): eval_ops = [] #need distinct_texts.npy distinct_text_strs.npy evaluator.init() evaluate_image_name, evaluate_image_feature, evaluate_text, evaluate_text_str = input_results[ input_app.fixed_input_valid_name] num_evaluate_examples = input_app.num_evaluate_examples #if shrink then pos with high score might still not recall! pay attention!!!! all_distinct_texts = evaluator.all_distinct_texts[:FLAGS. max_texts] #max_texts in evaluator.py deepiu.util #print(all_distinct_texts[0], evaluator.all_distinct_text_strs[0], text2ids.ids2text(all_distinct_texts[0])) predictor.init_evaluate_constant_text(all_distinct_texts) pos_scores = eval_scores[:num_evaluate_examples, 0] neg_scores = eval_scores[:num_evaluate_examples, 1] #eval neg text strs evaluate_neg_text_str = eval_neg_text_str[:num_evaluate_examples, 0] #eval neg text ids evaluate_neg_text = eval_neg_text[:num_evaluate_examples, 0, :] #--all from image to text show, only need num_evaluate_examples to calc to show! eval_score = predictor.build_evaluate_fixed_text_graph( evaluate_image_feature[:num_evaluate_examples, :], FLAGS.metric_eval_texts_size) print('---------eval_score:', eval_score) eval_max_score, eval_max_index = tf.nn.top_k(eval_score, FLAGS.num_text_topn) eval_word_score = predictor.build_evaluate_image_word_graph( evaluate_image_feature) eval_word_max_score, eval_word_max_index = tf.nn.top_k( eval_word_score, FLAGS.num_word_topn) eval_ops += [ eval_max_score, eval_max_index, eval_word_max_score, eval_word_max_index ] eval_ops += [evaluate_image_name, evaluate_text_str, evaluate_text] #notice evaluate_neg_text_str will run here so every thing related must be placeholder to avoid running twice (if FLAGS.feed_dict=1 and FLAGS.show_eval=1) #@TODO at some steps we might want to use eval_score eval_ops += [ pos_scores, neg_scores, evaluate_neg_text_str, evaluate_neg_text ] print('eval_ops:') for eval_op in eval_ops: print(eval_op) return eval_ops
def gen_eval_generated_texts_ops(input_app, input_results, predictor, eval_scores): #need distinct_texts.npy distinct_text_strs.npy evaluator.init() evaluate_image_name, \ evaluate_text, evaluate_text_str, \ evaluate_input_text, evaluate_input_text_str = input_results[input_app.fixed_input_valid_name] num_evaluate_examples = input_app.num_evaluate_examples pos_scores = eval_scores[:num_evaluate_examples, 0] build_predict_text_graph = functools.partial( predictor.build_predict_text_graph, input_text=evaluate_input_text, beam_size=FLAGS.beam_size, convert_unk=False) generated_texts, generated_texts_score = build_predict_text_graph( decode_method=FLAGS.seq_decode_method) #beam search(ingraph) if FLAGS.show_beam_search: generated_texts_beam, generated_texts_score_beam = build_predict_text_graph( decode_method=SeqDecodeMethod.beam) else: generated_texts_beam, generated_texts_score_beam = generated_texts, generated_texts_score eval_ops = [evaluate_image_name, evaluate_input_text_str, evaluate_input_text, \ evaluate_text_str, evaluate_text, \ generated_texts, generated_texts_beam, \ generated_texts_score, generated_texts_score_beam, \ pos_scores] print('eval_ops:') for eval_op in eval_ops: print(eval_op) return eval_ops
def main(_): #-----------init global resource logging.set_logging_path(gezi.get_dir(FLAGS.model_dir)) if not FLAGS.pre_calc_image_feature: melt.apps.image_processing.init() vocabulary.init() text2ids.init() evaluator.init() logging.info('algo:{}'.format(FLAGS.algo)) logging.info('monitor_level:{}'.format(FLAGS.monitor_level)) global sess sess = melt.get_session(log_device_placement=FLAGS.log_device_placement) global_scope = '' if FLAGS.add_global_scope: global_scope = FLAGS.global_scope if FLAGS.global_scope else FLAGS.algo with tf.variable_scope(global_scope): train()
def main(_): #-----------init global resource logging.set_logging_path(gezi.get_dir(FLAGS.model_dir)) melt.apps.train.init() InputApp.init() vocabulary.init() text2ids.init() #must init before main graph evaluator.init() logging.info('algo:{}'.format(FLAGS.algo)) logging.info('monitor_level:{}'.format(FLAGS.monitor_level)) global global_scope if FLAGS.add_global_scope: global_scope = FLAGS.global_scope if FLAGS.global_scope else FLAGS.algo global sess sess = melt.get_session(log_device_placement=FLAGS.log_device_placement) with tf.variable_scope(global_scope): train()
def evaluate_score(): FLAGS.valid_resource_dir = FLAGS.valid_resource_dir_ evaluator.init() predictor = melt.SimPredictor(FLAGS.model_dir) evaluator.evaluate_scores(predictor, random=FLAGS.random)
def evaluate_score(): evaluator.init() text_max_words = evaluator.all_distinct_texts.shape[1] print('text_max_words:', text_max_words) predictor = Predictor(FLAGS.model_dir) evaluator.evaluate_scores(predictor, random=True)
def main(_): print('eval_rank:', FLAGS.eval_rank, 'eval_translation:', FLAGS.eval_translation) epoch_dir = os.path.join(FLAGS.model_dir, 'epoch') logging.set_logging_path(gezi.get_dir(epoch_dir)) log_dir = epoch_dir sess = tf.Session() summary_writer = tf.summary.FileWriter(log_dir, sess.graph) Predictor = TextPredictor image_model = None if FLAGS.image_checkpoint_file: #feature_name = None, since in show and tell predictor will use gen_features not gen_feature image_model = melt.image.ImageModel(FLAGS.image_checkpoint_file, FLAGS.image_model_name, feature_name=None) evaluator.init(image_model) visited_path = os.path.join(epoch_dir, 'visited.pkl') if not os.path.exists(visited_path): visited_checkpoints = set() else: visited_checkpoints = pickle.load(open(visited_path, 'rb')) visited_checkpoints = set([x.split('/')[-1] for x in visited_checkpoints]) while True: suffix = '.data-00000-of-00001' files = glob.glob( os.path.join(epoch_dir, 'model.ckpt*.data-00000-of-00001')) #from epoch 1, 2, .. files.sort(key=os.path.getmtime) files = [file.replace(suffix, '') for file in files] for i, file in enumerate(files): if 'best' in file: continue if FLAGS.start_epoch and i + 1 < FLAGS.start_epoch: continue file_ = file.split('/')[-1] if file_ not in visited_checkpoints: visited_checkpoints.add(file_) epoch = int(file_.split('-')[-2]) logging.info('mointor_epoch:%d from %d model files' % (epoch, len(visited_checkpoints))) #will use predict_text in eval_translation , predict in eval_rank predictor = Predictor(file, image_model=image_model, feature_name=melt.get_features_name( FLAGS.image_model_name)) summary = tf.Summary() scores, metrics = evaluator.evaluate( predictor, eval_rank=FLAGS.eval_rank, eval_translation=FLAGS.eval_translation) melt.add_summarys(summary, scores, metrics) summary_writer.add_summary(summary, epoch) summary_writer.flush() pickle.dump(visited_checkpoints, open(visited_path, 'wb')) time.sleep(5)