def print_img(img, i): img_url = FLAGS.image_url_prefix + img if not img.startswith( "http://") else img logging.info( img_html.format(img_url, i, img, melt.epoch(), melt.step(), melt.train_loss(), melt.eval_loss(), melt.duration(), gezi.now_time()))
def print_img(img, i): img_url = get_img_url(img) logging.info(img_html.format( img_url, i, img, melt.epoch(), melt.step(), melt.train_loss(), melt.eval_loss(), melt.duration(), gezi.now_time()))
def evaluate_scores(predictor, random=False): timer = gezi.Timer('evaluate_scores') init() imgs, img_features = get_image_names_and_features() num_metric_eval_examples = min(FLAGS.num_metric_eval_examples, len(imgs)) step = FLAGS.metric_eval_batch_size if random: index = np.random.choice(len(imgs), num_metric_eval_examples, replace=False) imgs = imgs[index] img_features = img_features[index] text_max_words = all_distinct_texts.shape[1] rank_metrics = gezi.rank_metrics.RecallMetrics() print('text_max_words:', text_max_words) start = 0 while start < num_metric_eval_examples: end = start + step if end > num_metric_eval_examples: end = num_metric_eval_examples print('predicts start:', start, 'end:', end, file=sys.stderr) predicts(imgs[start:end], img_features[start:end], predictor, rank_metrics) start = end melt.logging_results( rank_metrics.get_metrics(), rank_metrics.get_names(), tag='evaluate: epoch:{} step:{} train:{} eval:{}'.format( melt.epoch(), melt.step(), melt.train_loss(), melt.eval_loss())) timer.print() return rank_metrics.get_metrics(), rank_metrics.get_names()
def evaluate_scores(predictor, random=False): timer = gezi.Timer('evaluate_scores') init() if FLAGS.eval_img2text: imgs, img_features = get_image_names_and_features() num_metric_eval_examples = min(FLAGS.num_metric_eval_examples, len(imgs)) step = FLAGS.metric_eval_batch_size if random: index = np.random.choice(len(imgs), num_metric_eval_examples, replace=False) imgs = imgs[index] img_features = img_features[index] rank_metrics = gezi.rank_metrics.RecallMetrics() start = 0 while start < num_metric_eval_examples: end = start + step if end > num_metric_eval_examples: end = num_metric_eval_examples print('predicts image start:', start, 'end:', end, file=sys.stderr) predicts(imgs[start:end], img_features[start:end], predictor, rank_metrics) start = end melt.logging_results( rank_metrics.get_metrics(), rank_metrics.get_names(), tag='evaluate: epoch:{} step:{} train:{} eval:{}'.format( melt.epoch(), melt.step(), melt.train_loss(), melt.eval_loss())) if FLAGS.eval_text2img: num_metric_eval_examples = min(FLAGS.num_metric_eval_examples, len(all_distinct_texts)) if random: index = np.random.choice(len(all_distinct_texts), num_metric_eval_examples, replace=False) text_strs = all_distinct_text_strs[index] texts = all_distinct_texts[index] rank_metrics2 = gezi.rank_metrics.RecallMetrics() start = 0 while start < num_metric_eval_examples: end = start + step if end > num_metric_eval_examples: end = num_metric_eval_examples print('predicts start:', start, 'end:', end, file=sys.stderr) predicts_txt2im(text_strs[start:end], texts[start:end], predictor, rank_metrics2) start = end melt.logging_results(rank_metrics2.get_metrics(), ['t2i' + x for x in rank_metrics2.get_names()], tag='text2img') timer.print() if FLAGS.eval_img2text and FLAGS.eval_text2img: return rank_metrics.get_metrics() + rank_metrics2.get_metrics( ), rank_metrics.get_names() + [ 't2i' + x for x in rank_metrics2.get_names() ] elif FLAGS.eval_img2text: return rank_metrics.get_metrics(), rank_metrics.get_names() else: return rank_metrics2.get_metrics(), rank_metrics2.get_names()
def evaluate_translation(predictor, random=False, index=None): timer = gezi.Timer('evaluate_translation') refs = prepare_refs() imgs, img_features = get_image_names_and_features() num_metric_eval_examples = min(FLAGS.num_metric_eval_examples, len(imgs)) if num_metric_eval_examples <= 0: num_metric_eval_examples = len(imgs) if num_metric_eval_examples == len(imgs): random = False step = FLAGS.metric_eval_batch_size if random: if index is None: index = np.random.choice(len(imgs), num_metric_eval_examples, replace=False) imgs = imgs[index] img_features = img_features[index] else: img_features = img_features[:num_metric_eval_examples] results = {} start = 0 while start < num_metric_eval_examples: end = start + step if end > num_metric_eval_examples: end = num_metric_eval_examples print('predicts image start:', start, 'end:', end, file=sys.stderr, end='\r') translation_predicts(imgs[start: end], img_features[start: end], predictor, results) start = end scorers = [ (Bleu(4), ["bleu_1", "bleu_2", "bleu_3", "bleu_4"]), (Meteor(),"meteor"), (Rouge(), "rouge_l"), (Cider(), "cider") ] score_list = [] metric_list = [] selected_refs = {} selected_results = {} #by doing this can force same .keys() for key in results: selected_refs[key] = refs[key] selected_results[key] = results[key] assert len(selected_results[key]) == 1, selected_results[key] assert selected_results.keys() == selected_refs.keys(), '%d %d'%(len(selected_results.keys()), len(selected_refs.keys())) if FLAGS.eval_translation_reseg: print('tokenization...', file=sys.stderr) global tokenizer if tokenizer is None: tokenizer = PTBTokenizer() selected_refs = tokenizer.tokenize(selected_refs) selected_results = tokenizer.tokenize(selected_results) logging.info('predict&label:{}{}{}'.format('|'.join(selected_results.items()[0][1]), '---', '|'.join(selected_refs.items()[0][1]))) for scorer, method in scorers: print('computing %s score...'%(scorer.method()), file=sys.stderr) score, scores = scorer.compute_score(selected_refs, selected_results) if type(method) == list: for sc, scs, m in zip(score, scores, method): score_list.append(sc) metric_list.append(m) if FLAGS.eval_result_dir: out = open(os.path.join(FLAGS.eval_result_dir, m+'.txt'), 'w') for i, sc in enumerate(scs): key = selected_results.keys()[i] result = selected_results[key] refs = '\x01'.join(selected_refs[key]) print(key, result, refs, sc, sep='\t', file=out) else: score_list.append(score) metric_list.append(method) if FLAGS.eval_result_dir: out = open(os.path.join(FLAGS.eval_result_dir, m+'.txt'), 'w') for i, sc in enumerate(scores): key = selected_results.keys()[i] result = selected_results[key] refs = '\x01'.join(selected_refs[key]) print(key, result, refs, sc, sep='\t', file=out) #exclude "bleu_1", "bleu_2", "bleu_3" score_list, metric_list = score_list[3:], metric_list[3:] assert(len(score_list) == 4) avg_score = sum(score_list) / len(score_list) score_list.append(avg_score) metric_list.append('avg') metric_list = ['trans_' + x for x in metric_list] melt.logging_results( score_list, metric_list, tag='evaluate: epoch:{} step:{} train:{} eval:{}'.format( melt.epoch(), melt.step(), melt.train_loss(), melt.eval_loss())) timer.print() return score_list, metric_list
def evaluate_scores(predictor, random=False, index=None, exact_predictor=None, exact_ratio=1.): """ actually this is rank metrics evaluation, by default recall@1,2,5,10,50 """ timer = gezi.Timer('evaluate_scores') init() if FLAGS.eval_img2text: imgs, img_features = get_image_names_and_features() num_metric_eval_examples = min(FLAGS.num_metric_eval_examples, len(imgs)) if num_metric_eval_examples <= 0: num_metric_eval_examples = len(imgs) if num_metric_eval_examples == len(imgs): random = False step = FLAGS.metric_eval_batch_size if random: if index is None: index = np.random.choice(len(imgs), num_metric_eval_examples, replace=False) imgs = imgs[index] img_features = img_features[index] else: img_features = img_features[:num_metric_eval_examples] rank_metrics = gezi.rank_metrics.RecallMetrics() start = 0 while start < num_metric_eval_examples: end = start + step if end > num_metric_eval_examples: end = num_metric_eval_examples print('predicts image start:', start, 'end:', end, file=sys.stderr, end='\r') predicts(imgs[start: end], img_features[start: end], predictor, rank_metrics, exact_predictor=exact_predictor, exact_ratio=exact_ratio) start = end melt.logging_results( rank_metrics.get_metrics(), rank_metrics.get_names(), tag='evaluate: epoch:{} step:{} train:{} eval:{}'.format( melt.epoch(), melt.step(), melt.train_loss(), melt.eval_loss())) if FLAGS.eval_text2img: num_metric_eval_examples = min(FLAGS.num_metric_eval_examples, len(all_distinct_texts)) if random: index = np.random.choice(len(all_distinct_texts), num_metric_eval_examples, replace=False) text_strs = all_distinct_text_strs[index] texts = all_distinct_texts[index] else: text_strs = all_distinct_text_strs texts = all_distinct_texts rank_metrics2 = gezi.rank_metrics.RecallMetrics() start = 0 while start < num_metric_eval_examples: end = start + step if end > num_metric_eval_examples: end = num_metric_eval_examples print('predicts start:', start, 'end:', end, file=sys.stderr, end='\r') predicts_txt2im(text_strs[start: end], texts[start: end], predictor, rank_metrics2, exact_predictor=exact_predictor) start = end melt.logging_results( rank_metrics2.get_metrics(), ['t2i' + x for x in rank_metrics2.get_names()], tag='text2img') timer.print() if FLAGS.eval_img2text and FLAGS.eval_text2img: return rank_metrics.get_metrics() + rank_metrics2.get_metrics(), rank_metrics.get_names() + ['t2i' + x for x in rank_metrics2.get_names()] elif FLAGS.eval_img2text: return rank_metrics.get_metrics(), rank_metrics.get_names() else: return rank_metrics2.get_metrics(), rank_metrics2.get_names()