def offset_eval(runs): summaries = [] for run in runs: run_info, model, loader = load_run(run, data=args.data, offset='all') params = run_info[-1] dataset = loader.dataset _, targets, confidences = predict(model, loader, cuda=params['cuda']) n_samples = len(dataset) // dataset.skip targets = targets[:n_samples] confidences = np.concatenate(confidences, axis=0) confidences = confidences.reshape(dataset.skip, n_samples, -1).mean(axis=0) predictions = np.argmax(confidences, axis=1) multi_offset_accuracy = accuracy_score(targets, predictions) summary = get_run_summary(run_info, multi_offset_acc=multi_offset_accuracy) summaries.append(summary) summary = pd.concat(summaries, ignore_index=True).sort_values('multi_offset_acc', ascending=False) if args.output: summary.to_csv(args.output, index=False) else: with pd.option_context('display.width', None), \ pd.option_context('max_columns', None): print(summary)
def main(args): run_info, model, loader = load_run(args.run_dir, data=args.data) params = run_info[-1] features = [] for x, _ in tqdm(loader): if params['cuda']: x = x.cuda() x = Variable(x, volatile=True) f = model.extract(x).data features.append(f.cpu().numpy().squeeze()) if args.format == 'jan': with open(args.output, 'w') as f: for sample, feature in tqdm(zip(loader.dataset.data, features), total=len(features)): f.write( '#objectKey messif.objects.keys.AbstractObjectKey {}\n'. format(sample['id'])) feature.tofile(f, sep=',') f.write('\n') return if args.format == 'numpy': features = np.stack(features) np.save(args.output, features) return
def main_old(args): dataset = MotionDataset('data/split1.pkl', fps=10) actions = dataset.actions.keys() run_info, model, loader = load_run(args.run_dir, args.data, keep_actions=actions) params = run_info[-1] out = os.path.join(args.run_dir, 'time-analysis.pdf') with PdfPages(out) as pdf: for i in tqdm(range(len(loader.dataset))): x, annotations = loader.dataset[i] if params['cuda']: x = x.cuda() x = Variable(x, volatile=True) outs = model.steps_forward(x) head = params.get('head', 'softmax') if head == 'softmax': outs = torch.nn.functional.softmax(outs, dim=1) elif head == 'sigmoid': outs = torch.nn.functional.sigmoid(outs) confidences = outs.data.cpu().numpy() # [:, y] n_samples, n_classes = outs.shape time = np.arange(n_samples) # time = torch.linspace(0, 1, seq_len).numpy() groundtruth = np.zeros_like(confidences, dtype=int) for a in annotations: class_id = loader.dataset.action_id_to_ix[a['action_id']] start = int(round(a['start_frame'] / loader.dataset.skip)) end = int(round((a['start_frame'] + a['duration']) / loader.dataset.skip)) groundtruth[start:end, class_id] = 1 cmap = plt.get_cmap('jet') colors = cmap(np.linspace(0, 1.0, n_classes)) fig, axes = plt.subplots(3, 1) for ax in axes: ax.set_ylim([0, 1]) ax.set_color_cycle(colors) (ax1, ax2, ax3) = axes ax1.plot(time, confidences) ax2.plot(time, groundtruth) ax3.plot(time, confidences * groundtruth) pdf.savefig() # plt.savefig('time-analysis.pdf') plt.close()
def confusion_plot(runs): for run in runs: run_info, model, loader = load_run(run, data=args.data) run_dir, _, label, _, params = run_info loader = loader[1] dataset = loader.dataset predictions, targets, _ = predict(model, loader, cuda=params['cuda']) overall_accuracy = accuracy_score(targets, predictions) confusion = confusion_matrix(targets, predictions) mask = confusion == 0 # Normalize it # confusion = confusion.astype('float') / confusion.sum(axis=1)[:, None] # fig, ax = plt.subplots() # im = ax.imshow(confusion, interpolation='nearest', cmap=plt.cm.Blues) # fig.colorbar(im) plt.figure(figsize=(30, 30)) plt.title('{}: (Overall Accuracy: {:4.2%}'.format( label, overall_accuracy)) ax = sns.heatmap(confusion, annot=True, fmt='d', mask=mask, cbar=False) classes = dataset.action_descriptions tick_marks = np.arange(len(classes)) for axis in [ax.xaxis, ax.yaxis]: axis.set_ticks(tick_marks + 0.5, minor=True) axis.set(ticks=tick_marks, ticklabels=classes) labels = ax.get_xticklabels() for label in labels: label.set_rotation(90) plt.tight_layout() ax.set_ylabel('True label') ax.set_xlabel('Predicted label') ax.grid(True, which='minor') plot_fname = os.path.join(run_dir, 'confusion.pdf') plt.savefig(plot_fname, bbox_inches='tight') plt.close() del model, loader, predictions, targets
for world_size in ((5, 5), ): dataset_name = '{}x{}_greedy_random'.format(*world_size) results_folder = base_path / '{}x{}_greedy_test'.format(*world_size) if results_folder.exists(): results_folder.rmdir() # will throw error if not empty os.makedirs(str(results_folder)) for episodes in (1, 5, 20, 50, 200): adhoc = AdhocAgent(3, mcts_c=mcts_c, mcts_k=mcts_k, mcts_n=mcts_n, behavior_model_size=bsize, environment_model_size=esize) load_run(dataset_folder / dataset_name, adhoc, episodes, fit=False, compute_metrics=False) adhoc_filename = 'adhoc_' + str(episodes) adhoc.save(adhoc_filename) for j in range(n_threads): threads.append( pool.apply_async(run, args=(progress_q, results_q, j, adhoc_filename, episodes, results_folder, world_size))) for thread in threads: thread.get() progress_q.put(None)
def main(args): if args.delay: delay_plot(args) return if args.pr: targets, predictions, _ = get_predictions(args.run_dir, stream=False) pr_fname = os.path.join(args.run_dir, 'pr.npz') _ = find_thresholds(targets, predictions, stream=False, pr=pr_fname) return if args.compute_metrics or args.plot_predictions: run_info, model, loader = load_run(args.run_dir, data=args.data) params = run_info[-1] labels = np.array([ a.replace('hdm05_', '') for a in loader[1].dataset.action_descriptions ]) if args.compute_metrics: rows = [] thr_tab = pd.DataFrame( index=labels, columns=pd.MultiIndex.from_product([['fair', 'unfair'], ['stream', 'sequences']])) for stream, fair in itertools.product((False, True), repeat=2): targets, predictions, annot_time = get_predictions( args.run_dir, stream=stream, force=args.force) thr_targets, thr_predictions = targets, predictions if fair: thr_targets, thr_predictions, _ = get_predictions( args.run_dir, train=True, stream=stream, force=args.force) train_targets = thr_targets print('Stream: {} Fair: {}'.format(stream, fair)) thrs = find_thresholds(thr_targets, thr_predictions, stream=stream) thr_tab[(('fair' if fair else 'unfair'), ('stream' if stream else 'sequences'))] = thrs[1] metrics = compute_metrics(targets, predictions, thrs, stream=stream) row = (stream, fair) + metrics + (annot_time, ) rows.append(row) thr_tab['train_support'] = train_targets.sum(axis=0) thr_tab['test_support'] = targets.sum(axis=0) thresholds_file = os.path.join(args.run_dir, 'thresholds.csv') thr_tab.to_csv(thresholds_file) columns = ('Stream', 'Fair', 'microAP', 'macroAP', 'microF1', 'macroF1', 'catMicroF1', 'catMacroF1', 'AnnotTime') metrics = pd.DataFrame.from_records(rows, columns=columns) metrics_file = os.path.join(args.run_dir, 'metrics.csv') metrics.to_csv(metrics_file) print(metrics) if args.plot_predictions: stream, fair = False, False targets, predictions, annot_time = get_predictions(args.run_dir, stream=stream, force=args.force) thr_targets, thr_predictions = targets, predictions if fair: thr_targets, thr_predictions, _ = get_predictions(args.run_dir, train=True, stream=stream, force=args.force) train_targets = thr_targets thrs = find_thresholds(thr_targets, thr_predictions, stream=stream) global_thr, multiple_thrs = thrs out = os.path.join(args.run_dir, 'time-analysis.pdf') seq_ids = [ int(loader[1].dataset.data[i]['seq_id']) for i in range(len(targets)) ] plot_preditctions(targets, predictions, seq_ids, labels, global_thr, out) return
parser.add_argument('--queries', required=True, help='Queries file') parser.add_argument('--run', required=True, help='Run file') parser.add_argument('--metadata', required=True, help='Metadata file') parser.add_argument('--k', type=int, default=10, help='number of documents per query to print.') parser.add_argument('--abstract', action='store_true', default=False, help='Print abstract.') args = parser.parse_args() queries = utils.load_queries(args.queries) run = utils.load_run(args.run) metadata = load_metadata(args.metadata) for query_id, (query, question) in queries.items(): if query_id not in run: print(f'>> Missing query_id: {query_id}') continue print(f'query id: {query_id} | query: {query} | question: {question}') output = 'rank | doc_id | title' if args.abstract: output += ' | abstract' print(output) for rank, doc_id in enumerate(run[query_id][:args.k]): title, abstract = metadata[doc_id] output = [str(rank + 1), doc_id, title]
def main(args): run_info, model, loader = load_run(args.run_dir, data=args.data) params = run_info[-1] out = os.path.join(args.run_dir, 'time-analysis.pdf') labels = np.array([a.replace('hdm05_', '') for a in loader.dataset.action_descriptions]) best_f1s = [] targets = [] predictions = [] with PdfPages('/tmp/app.pdf') as pdf: for i, (x, y) in enumerate(tqdm(loader)): y = y.numpy().squeeze() targets.append(y) if params['cuda']: x = x.cuda() x = Variable(x, volatile=True) logits = model.segment(x) y_hat = torch.nn.functional.sigmoid(logits) y_hat = y_hat.data.cpu().numpy().squeeze() # [:, y] predictions.append(y_hat) n_samples, n_classes = y_hat.shape time = np.arange(n_samples) # time = torch.linspace(0, 1, seq_len).numpy() # np.savez('segmentation_outs_n_preds.npz', y=y, y_hat=y_hat) # break ap = average_precision_score(y, y_hat, average='micro') p, r, t = precision_recall_curve(y.ravel(), y_hat.ravel()) t = np.insert(t, 0, 0) f1 = 2 * (p * r) / (p + r) best_f1, best_thr = max(zip(f1, t)) best_f1s.append(best_f1) cmap = plt.get_cmap('jet') colors = cmap(np.linspace(0, 1.0, n_classes)) fig, axes = plt.subplots(3, 1) for ax in axes: ax.set_ylim([0, 1.1]) ax.set_prop_cycle('color', colors) (ax1, ax2, ax3) = axes # (ax1, ax2) = axes ax1.set_title('Prediction [AP={:.1%}, F1={:.1%} (thr={})]'.format(ap, best_f1, best_thr)) ax1.plot(time, y_hat, label=labels) ax2.set_title('Groundtruth ({})'.format(loader.dataset.data[i]['seq_id'])) lines = ax2.plot(time, y) ax3.set_title('Masked Prediction') lines = ax3.plot(time, y_hat * y) legends_ix = set(y.sum(axis=0).nonzero()[0].tolist() + (y_hat > 0.2).sum(axis=0).nonzero()[0].tolist()) legends_ix = np.array(list(legends_ix)) lines = np.array(lines) lines = lines[legends_ix] legends = labels[legends_ix] sns.despine() lgd = ax2.legend(lines, legends, loc='center', ncol=6, bbox_to_anchor=(0.5, -0.42)) plt.tight_layout() pdf.savefig(bbox_extra_artists=(lgd,), bbox_inches='tight') # plt.savefig('time-analysis.pdf') plt.close() best_f1s = np.array(best_f1s) order = np.argsort(best_f1s)[::-1] + 1 order = " ".join(map(str, order)) os.system('pdftk /tmp/app.pdf cat {} output {}'.format(order, out)) targets = np.concatenate(targets, axis=0) predictions = np.concatenate(predictions, axis=0) p, r, t = precision_recall_curve(targets.ravel(), predictions.ravel()) t = np.insert(t, 0, 0) f1 = 2 * (p * r) / (p + r) best_f1, best_thr = max(zip(f1, t)) print('Single Thr F1: {} {}'.format(best_f1, best_thr)) cat_f1s = [] cat_thr = [] for i in range(n_classes): p, r, t = precision_recall_curve(targets[:, i], predictions[:, i]) f1 = 2 * (p * r) / (p + r) t = np.insert(t, 0, 0) b_f1, b_thr = max(zip(f1, t)) cat_f1s.append(b_f1) cat_thr.append(b_thr) data = pd.DataFrame(dict(BestF1=cat_f1s, Threshold=cat_thr), index=labels) print(data) support = targets.sum(axis=0) avgF1 = (data['BestF1'].values * support).sum() / support.sum() print(avgF1)
def main(_): bert_config = modeling.BertConfig.from_json_file(config_dict[FLAGS.model_size]) if FLAGS.max_seq_length > bert_config.max_position_embeddings: raise ValueError( "Cannot use sequence length %d because the BERT model " "was only trained up to sequence length %d" % (FLAGS.max_seq_length, bert_config.max_position_embeddings)) tpu_cluster_resolver = None if use_tpu: tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver( FLAGS.tpu) is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2 run_config = tf.contrib.tpu.RunConfig( cluster=tpu_cluster_resolver, keep_checkpoint_max=1, model_dir=FLAGS.output_path, tpu_config=tf.contrib.tpu.TPUConfig( iterations_per_loop=iterations_per_loop, num_shards=num_tpu_cores, per_host_input_for_training=is_per_host)) model_fn = model_fn_builder( bert_config=bert_config, num_labels=2, init_checkpoint=init_checkpoint, use_tpu=use_tpu, use_one_hot_embeddings=use_tpu) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. estimator = tf.contrib.tpu.TPUEstimator( use_tpu=use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.batch_size, eval_batch_size=FLAGS.batch_size, predict_batch_size=FLAGS.batch_size, params={"qc_scores": "qc_scores"}) tf.logging.info("***** Running evaluation *****") tf.logging.info(" Batch size = %d", FLAGS.batch_size) for split in ["valid", "test"]: maxp_run = load_run(os.path.join(FLAGS.first_model_path, "{}_{}_result.trec".format(FLAGS.dataset, split))) query_docids_map = [] data_path = os.path.join(FLAGS.output_path, "rerank-{0}_kc-{1}".format(FLAGS.rerank_num, FLAGS.kc), "data") result_path = os.path.join(FLAGS.output_path, "rerank-{0}_kc-{1}".format(FLAGS.rerank_num, FLAGS.kc), "result") if not tf.gfile.Exists(result_path): tf.gfile.MakeDirs(result_path) with tf.gfile.Open(os.path.join(data_path, "chunk_passage_ids_{0}.txt".format(split))) as ref_file: for line in ref_file: query_docids_map.append(line.strip().split("\t")) predict_input_fn = input_fn_builder( dataset_path=os.path.join(data_path, "chunk_passage_{0}.tf".format(split)), is_training=False, seq_length=FLAGS.max_seq_length, drop_remainder=False) total_count = 0 result_file = tf.gfile.Open(os.path.join(result_path, "{0}_{1}_result.trec".format(FLAGS.dataset, split)), 'w') ckpt = tf.train.latest_checkpoint(checkpoint_dir=FLAGS.third_model_path) print("use latest ckpt: {0}".format(ckpt)) result = estimator.predict(input_fn=predict_input_fn, yield_single_examples=True, checkpoint_path=ckpt) start_time = time.time() results = [] result_dict = collections.OrderedDict() for item in result: results.append((item["qc_scores"], item["probs"])) total_count += 1 if total_count == len(query_docids_map) or query_docids_map[total_count][0] != \ query_docids_map[total_count - 1][0]: chunk_num = len(results) // FLAGS.rerank_num assert chunk_num <= FLAGS.kc qc_scores, probs = list(zip(*results)) qc_scores = np.stack(qc_scores) cp_scores = np.stack(probs)[:, 1] qc_scores = np.reshape(qc_scores, [FLAGS.rerank_num, chunk_num]) cp_scores = np.reshape(cp_scores, [FLAGS.rerank_num, chunk_num]) # softmax normalization qc_scores = softmax(qc_scores, axis=-1) scores = np.sum(np.multiply(qc_scores, cp_scores), axis=-1, keepdims=False) start_idx = total_count - FLAGS.rerank_num * chunk_num end_idx = total_count query_ids, chunk_ids, passage_ids, labels, qc_scores = zip(*query_docids_map[start_idx:end_idx]) assert len(set(query_ids)) == 1, "Query ids must be all the same." query_id = query_ids[0] candidate_docs = list() for pid in passage_ids: doc_id = pid.split("_")[0] if doc_id not in candidate_docs: candidate_docs.append(doc_id) result_dict[query_id] = dict() for i, doc in enumerate(candidate_docs): result_dict[query_id][doc] = scores[i] rerank_list = sorted(result_dict[query_id].items(), key=lambda x: x[1], reverse=True) last_score = rerank_list[-1][1] for doc in maxp_run[query_id][FLAGS.rerank_num:]: current_score = last_score - 0.01 result_dict[query_id][doc] = current_score last_score = current_score ranking_list = sorted(result_dict[query_id].items(), key=lambda x: x[1], reverse=True) for rank, (doc_id, score) in enumerate(ranking_list): result_file.write( "\t".join([query_id, "Q0", doc_id, str(rank + 1), str(score), "chunk_passage_PRF"]) + "\n") results = [] if total_count % 1000 == 0: tf.logging.warn("Read {} examples in {} secs".format( total_count, int(time.time() - start_time))) result_file.close() tf.logging.info("Done Evaluating!")
def main(_): print('Loading Tokenizer...') tokenizer = tokenization.FullTokenizer(vocab_file=FLAGS.vocab, do_lower_case=True) if not tf.gfile.Exists(FLAGS.output_path): tf.gfile.MakeDirs(FLAGS.output_path) qrels = None if FLAGS.qrels: qrels = load_qrels(path=FLAGS.qrels) if FLAGS.task == "passage": run = load_run(path=FLAGS.run_file) useful_docids = set() for ids in run.values(): for docid in ids: useful_docids.add(docid) queries = load_queries(path=FLAGS.queries, type="title", dataset=FLAGS.dataset) data = merge(qrels=qrels, run=run, queries=queries) print('Loading Collection...') collection = load_collection(FLAGS.collection_file, FLAGS.dataset, useful_docids) print("queries_num:{}".format(len(queries))) print('Converting to TFRecord...') convert_dataset(main_path=FLAGS.output_path, data=data, collection=collection, tokenizer=tokenizer) else: for split in ["valid", "test"]: run_file = os.path.join( FLAGS.first_model_path, "{0}_{1}_result.tsv".format(FLAGS.dataset, split)) run = load_run(path=run_file, has_pid=True, return_pid=True) queries = load_queries(path=FLAGS.queries, type="title", dataset=FLAGS.dataset, fold=FLAGS.fold, split=split) data = merge(qrels=qrels, run=run, queries=queries) print('Loading Collection...') collection = load_two_columns_file(FLAGS.collection_file) print("queries_num:{}".format(len(queries))) print('Converting to TFRecord...') convert_dataset(main_path=os.path.join(FLAGS.output_path, "fold-" + str(FLAGS.fold)), data=data, collection=collection, tokenizer=tokenizer, split=split) print('done!')