def _read(self, feat_path, meta_path, proposal_folders): fn_node_pattern = '*_node.npz' fn_edge_pattern = '*_edge.npz' with Timer('read meta and feature'): self.lb2idxs, self.idx2lb = read_meta(meta_path) inst_num = len(self.idx2lb) if not self.featureless: features = read_probs(feat_path, inst_num, self.feature_dim) self.features = l2norm(features) else: self.feature_dim = 1 self.features = np.ones(inst_num).reshape(-1, 1) with Timer('read proposal list'): self.lst = [] for proposal_folder in proposal_folders: print('read proposals from folder: ', proposal_folder) fn_nodes = sorted( glob.glob(os.path.join(proposal_folder, fn_node_pattern))) fn_edges = sorted( glob.glob(os.path.join(proposal_folder, fn_edge_pattern))) assert len(fn_nodes) == len( fn_edges), "node files({}) vs edge files({})".format( len(fn_nodes), len(fn_edges)) assert len(fn_nodes) > 0, 'files under {} is 0'.format( proposal_folder) for fn_node, fn_edge in zip(fn_nodes, fn_edges): assert fn_node[:fn_node.rfind( '_')] == fn_edge[:fn_edge.rfind('_' )], "{} vs {}".format( fn_node, fn_edge) self.lst.append([fn_node, fn_edge]) self.size = len(self.lst)
def _read(self, feat_path, label_path, proposal_folders): with Timer('read meta and feature'): if label_path is not None: self.lb2idxs, self.idx2lb = read_meta(label_path) self.labels = intdict2ndarray(self.idx2lb) self.inst_num = len(self.idx2lb) self.ignore_label = False else: self.lb2idxs, self.idx2lb = None, None self.labels = None self.inst_num = -1 self.ignore_label = True if not self.featureless: features = read_probs(feat_path, self.inst_num, self.feature_dim) self.features = l2norm(features) if self.inst_num == -1: self.inst_num = features.shape[0] else: assert self.inst_num > 0 self.feature_dim = 1 self.features = np.ones(self.inst_num).reshape(-1, 1) with Timer('read proposal list'): self.lst = [] self.tot_lst = [] if callable(proposal_folders): proposal_folders = proposal_folders() for proposal_folder in proposal_folders: print('read proposals from folder: ', proposal_folder) fn_nodes = sorted( glob.glob(osp.join(proposal_folder, self.fn_node_pattern))) fn_edges = sorted( glob.glob(osp.join(proposal_folder, self.fn_edge_pattern))) assert len(fn_nodes) == len( fn_edges), "node files({}) vs edge files({})".format( len(fn_nodes), len(fn_edges)) assert len(fn_nodes) > 0, 'files under {} is 0'.format( proposal_folder) for fn_node, fn_edge in zip(fn_nodes, fn_edges): # sanity check assert fn_node[:fn_node.rfind( '_')] == fn_edge[:fn_edge.rfind('_' )], "{} vs {}".format( fn_node, fn_edge) if self._check_iop(fn_node): self.lst.append([fn_node, fn_edge]) self.tot_lst.append([fn_node, fn_edge]) self.size = len(self.lst) self.tot_size = len(self.tot_lst) assert self.size <= self.tot_size if self.size < self.tot_size: print('select {} / {} = {:.2f} proposals ' 'with iop between ({:.2f}, {:.2f})'.format( self.size, self.tot_size, 1. * self.size / self.tot_size, self.th_iop_min, self.th_iop_max))
def generate_basic_proposals(oprefix, knn_prefix, feats, feat_dim=256, knn_method='faiss', k=80, th_knn=0.6, th_step=0.05, minsz=3, maxsz=300, is_rebuild=False, is_save_proposals=True, force=False, **kwargs): print('k={}, th_knn={}, th_step={}, maxsz={}, is_rebuild={}'.format( k, th_knn, th_step, maxsz, is_rebuild)) # build knns knns = build_knns(knn_prefix, feats, knn_method, k, is_rebuild) # obtain cluster proposals ofolder = osp.join( oprefix, '{}_k_{}_th_{}_step_{}_minsz_{}_maxsz_{}_iter_0'.format( knn_method, k, th_knn, th_step, minsz, maxsz)) ofn_pred_labels = osp.join(ofolder, 'pred_labels.txt') if not osp.exists(ofolder): os.makedirs(ofolder) if not osp.isfile(ofn_pred_labels) or is_rebuild: with Timer('build super vertices'): clusters = super_vertex(knns, k, th_knn, th_step, maxsz) with Timer('dump clustering to {}'.format(ofn_pred_labels)): labels = clusters2labels(clusters) write_meta(ofn_pred_labels, labels) else: print('read clusters from {}'.format(ofn_pred_labels)) lb2idxs, _ = read_meta(ofn_pred_labels) clusters = labels2clusters(lb2idxs) clusters = filter_clusters(clusters, minsz) # output cluster proposals ofolder_proposals = osp.join(ofolder, 'proposals') if is_save_proposals: print('saving cluster proposals to {}'.format(ofolder_proposals)) if not osp.exists(ofolder_proposals): os.makedirs(ofolder_proposals) save_proposals(clusters, knns, ofolder=ofolder_proposals, force=force) return ofolder_proposals, ofn_pred_labels
def _read(self, feat_path, label_path, proposal_folders): fn_node_pattern = '*_node.npz' fn_edge_pattern = '*_edge.npz' with Timer('read meta and feature'): if label_path is not None: self.lb2idxs, self.idx2lb = read_meta(label_path) self.labels = intdict2ndarray(self.idx2lb) self.inst_num = len(self.idx2lb) self.ignore_label = False else: self.lb2idxs, self.idx2lb = None, None self.labels = None self.inst_num = -1 self.ignore_label = True if not self.featureless: features = read_probs(feat_path, self.inst_num, self.feature_dim) self.features = l2norm(features) if self.inst_num == -1: self.inst_num = features.shape[0] else: assert self.inst_num > 0 self.feature_dim = 1 self.features = np.ones(self.inst_num).reshape(-1, 1) with Timer('read proposal list'): self.lst = [] if callable(proposal_folders): proposal_folders = proposal_folders() for proposal_folder in proposal_folders: print('read proposals from folder: ', proposal_folder) fn_nodes = sorted( glob.glob(os.path.join(proposal_folder, fn_node_pattern))) fn_edges = sorted( glob.glob(os.path.join(proposal_folder, fn_edge_pattern))) assert len(fn_nodes) == len( fn_edges), "node files({}) vs edge files({})".format( len(fn_nodes), len(fn_edges)) assert len(fn_nodes) > 0, 'files under {} is 0'.format( proposal_folder) for fn_node, fn_edge in zip(fn_nodes, fn_edges): assert fn_node[:fn_node.rfind( '_')] == fn_edge[:fn_edge.rfind('_' )], "{} vs {}".format( fn_node, fn_edge) self.lst.append([fn_node, fn_edge]) self.size = len(self.lst)
def generate_proposals(oprefix, knn_prefix, feats, feat_dim=256, knn_method='faiss', k=80, th_knn=0.6, th_step=0.05, min_size=3, max_size=300, is_rebuild=False, is_save_proposals=False): print('k={}, th_knn={}, th_step={}, max_size={}, is_rebuild={}'.\ format(k, th_knn, th_step, max_size, is_rebuild)) # build knns # each node and it's top k nearest nodes also distancess knns = build_knns(knn_prefix, feats, knn_method, k, is_rebuild) # obtain cluster proposals ofolder = os.path.join(oprefix, '{}_k_{}_th_{}_step_{}_minsz_{}_maxsz_{}_iter_0'.\ format(knn_method, k, th_knn, th_step, min_size, max_size)) ofn_pred_labels = os.path.join(ofolder, 'pred_labels.txt') if not os.path.exists(ofolder): os.makedirs(ofolder) if not os.path.isfile(ofn_pred_labels) or is_rebuild: with Timer('build super vertices'): clusters = super_vertex(knns, k, th_knn, th_step, max_size) with Timer('dump clustering to {}'.format(ofn_pred_labels)): labels = clusters2labels(clusters) write_meta(ofn_pred_labels, labels) else: print('read clusters from {}'.format(ofn_pred_labels)) lb2idxs, _ = read_meta(ofn_pred_labels) clusters = labels2clusters(lb2idxs) clusters = filter_clusters(clusters, min_size) # output cluster proposals if is_save_proposals: ofolder = os.path.join(ofolder, 'proposals') print('saving cluster proposals to {}'.format(ofolder)) if not os.path.exists(ofolder): os.makedirs(ofolder) save_proposals(clusters, knns, ofolder=ofolder, force=True)
def perform_val(model, HEAD1, HEAD_test1, cfg, feature_dim, pair_a, pair_b): test_lb2idxs, test_idx2lb = read_meta(cfg.test_data['label_path']) test_inst_num = len(test_idx2lb) model.eval() HEAD1.eval() HEAD_test1.eval() for k, v in cfg.model['kwargs'].items(): setattr(cfg.test_data, k, v) dataset = build_dataset(cfg.model['type'], cfg.test_data) features = torch.FloatTensor(dataset.features) adj = sparse_mx_to_torch_sparse_tensor(dataset.adj) labels = torch.LongTensor(dataset.gt_labels) if cfg.cuda: features = features.cuda() adj = adj.cuda() labels = labels.cuda() HEAD_test1 = HEAD_test1.cuda() test_data = [features, adj, labels] HEAD_test1.load_state_dict(HEAD1.state_dict(), False) with torch.no_grad(): output_feature = model(test_data) sum_acc = 0 patch_num = 10 patch_size = int(test_inst_num / patch_num) for i in range(patch_num): score = HEAD_test1(output_feature[pair_a[i * patch_size:(i + 1) * patch_size]], output_feature[pair_b[i * patch_size:(i + 1) * patch_size]], no_list=True) #print(score) pre_labels = (score > 0.5).long() #print(pre_labels) gt_labels = (labels[pair_a[i * patch_size:(i + 1) * patch_size]] == labels[pair_b[i * patch_size:(i + 1) * patch_size]]).long() acc = (pre_labels == gt_labels).long().sum() sum_acc += acc avg_acc = float(sum_acc) / test_inst_num return avg_acc
def __init__(self, cfg): feat_path = cfg['feat_path'] label_path = cfg.get('label_path', None) knn_graph_path = cfg['knn_graph_path'] self.k_at_hop = cfg['k_at_hop'] self.depth = len(self.k_at_hop) self.active_connection = cfg['active_connection'] self.feature_dim = cfg['feature_dim'] self.is_norm_feat = cfg.get('is_norm_feat', True) self.is_sort_knns = cfg.get('is_sort_knns', True) self.is_test = cfg.get('is_test', False) with Timer('read meta and feature'): if label_path is not None: _, idx2lb = read_meta(label_path) self.inst_num = len(idx2lb) self.labels = intdict2ndarray(idx2lb) self.ignore_label = False else: self.labels = None self.inst_num = -1 self.ignore_label = True self.features = read_probs(feat_path, self.inst_num, self.feature_dim) if self.is_norm_feat: self.features = l2norm(self.features) if self.inst_num == -1: self.inst_num = self.features.shape[0] self.size = self.inst_num with Timer('read knn graph'): knns = np.load(knn_graph_path)['data'] _, self.knn_graph = knns2ordered_nbrs(knns, sort=self.is_sort_knns) assert np.mean(self.k_at_hop) >= self.active_connection print('feature shape: {}, norm_feat: {}, sort_knns: {} ' 'k_at_hop: {}, active_connection: {}'.format( self.features.shape, self.is_norm_feat, self.is_sort_knns, self.k_at_hop, self.active_connection))
def __init__(self, cfg): feat_path = cfg['feat_path'] label_path = cfg.get('label_path', None) knn_graph_path = cfg.get('knn_graph_path', None) self.k = cfg['k'] self.feature_dim = cfg['feature_dim'] self.is_norm_feat = cfg.get('is_norm_feat', True) self.th_sim = cfg.get('th_sim', 0.) self.max_conn = cfg.get('max_conn', 1) self.ignore_ratio = cfg.get('ignore_ratio', 0.8) self.ignore_small_confs = cfg.get('ignore_small_confs', True) self.use_candidate_set = cfg.get('use_candidate_set', True) self.nproc = cfg.get('nproc', 1) self.max_qsize = cfg.get('max_qsize', int(1e5)) with Timer('read meta and feature'): if label_path is not None: self.lb2idxs, self.idx2lb = read_meta(label_path) self.inst_num = len(self.idx2lb) self.gt_labels = intdict2ndarray(self.idx2lb) self.ignore_label = False else: self.inst_num = -1 self.ignore_label = True self.features = read_probs(feat_path, self.inst_num, self.feature_dim) if self.is_norm_feat: self.features = l2norm(self.features) if self.inst_num == -1: self.inst_num = self.features.shape[0] self.size = self.inst_num assert self.size == self.features.shape[0] print('feature shape: {}, k: {}, norm_feat: {}'.format( self.features.shape, self.k, self.is_norm_feat)) with Timer('read knn graph'): if knn_graph_path is not None: knns = np.load(knn_graph_path)['data'] else: prefix = osp.dirname(feat_path) name = rm_suffix(osp.basename(feat_path)) # find root folder of `features` prefix = osp.dirname(prefix) knn_prefix = osp.join(prefix, 'knns', name) knns = build_knns(knn_prefix, self.features, cfg.knn_method, cfg.knn) assert self.inst_num == len(knns), "{} vs {}".format( self.inst_num, len(knns)) adj = fast_knns2spmat(knns, self.k, self.th_sim, use_sim=True) # build symmetric adjacency matrix adj = build_symmetric_adj(adj, self_loop=True) self.adj = row_normalize(adj) # convert knns to (dists, nbrs) self.dists, self.nbrs = knns2ordered_nbrs(knns, sort=True) if cfg.pred_confs != '': print('read estimated confidence from {}'.format( cfg.pred_confs)) self.confs = np.load(cfg.pred_confs)['pred_confs'] else: print('use unsupervised density as confidence') assert self.radius from vegcn.confidence import density self.confs = density(self.dists, radius=self.radius) assert 0 <= self.ignore_ratio <= 1 if self.ignore_ratio == 1: self.ignore_set = set(np.arange(len(self.confs))) else: num = int(len(self.confs) * self.ignore_ratio) confs = self.confs if not self.ignore_small_confs: confs = -confs self.ignore_set = set(np.argpartition(confs, num)[:num]) print( 'ignore_ratio: {}, ignore_small_confs: {}, use_candidate_set: {}'. format(self.ignore_ratio, self.ignore_small_confs, self.use_candidate_set)) print('#ignore_set: {} / {} = {:.3f}'.format( len(self.ignore_set), self.inst_num, 1. * len(self.ignore_set) / self.inst_num)) with Timer('Prepare sub-graphs'): # construct subgraphs with larger confidence self.peaks = {i: [] for i in range(self.inst_num)} self.dist2peak = {i: [] for i in range(self.inst_num)} if self.nproc > 1: # multi-process import multiprocessing as mp pool = mp.Pool(self.nproc) results = [] num = int(self.inst_num / self.max_qsize) + 1 for i in tqdm(range(num)): beg = int(i * self.max_qsize) end = min(beg + self.max_qsize, self.inst_num) lst = [j for j in range(beg, end)] results.extend( list( tqdm(pool.map(self.get_subgraph, lst), total=len(lst)))) pool.close() pool.join() else: results = [ self.get_subgraph(i) for i in tqdm(range(self.inst_num)) ] self.adj_lst = [] self.feat_lst = [] self.lb_lst = [] self.subset_gt_labels = [] self.subset_idxs = [] self.subset_nbrs = [] self.subset_dists = [] for result in results: if result is None: continue elif len(result) == 3: i, nbr, dist = result self.peaks[i].extend(nbr) self.dist2peak[i].extend(dist) continue i, nbr, dist, feat, adj, lb = result self.subset_idxs.append(i) self.subset_nbrs.append(nbr) self.subset_dists.append(dist) self.feat_lst.append(feat) self.adj_lst.append(adj) if not self.ignore_label: self.subset_gt_labels.append(self.idx2lb[i]) self.lb_lst.append(lb) self.subset_gt_labels = np.array(self.subset_gt_labels) self.size = len(self.feat_lst) assert self.size == len(self.adj_lst) if not self.ignore_label: assert self.size == len(self.lb_lst)
def __init__(self, cfg): feat_path = cfg['feat_path'] label_path = cfg.get('label_path', None) knn_graph_path = cfg.get('knn_graph_path', None) self.k = cfg['k'] self.feature_dim = cfg['feature_dim'] self.is_norm_feat = cfg.get('is_norm_feat', True) self.save_decomposed_adj = cfg.get('save_decomposed_adj', False) self.th_sim = cfg.get('th_sim', 0.) self.max_conn = cfg.get('max_conn', 1) self.conf_metric = cfg.get('conf_metric') with Timer('read meta and feature'): if label_path is not None: self.lb2idxs, self.idx2lb = read_meta(label_path) self.inst_num = len(self.idx2lb) self.gt_labels = intdict2ndarray(self.idx2lb) self.ignore_label = False else: self.inst_num = -1 self.ignore_label = True self.features = read_probs(feat_path, self.inst_num, self.feature_dim) if self.is_norm_feat: self.features = l2norm(self.features) if self.inst_num == -1: self.inst_num = self.features.shape[0] self.size = 1 # take the entire graph as input with Timer('read knn graph'): if os.path.isfile(knn_graph_path): knns = np.load(knn_graph_path)['data'] else: if knn_graph_path is not None: print('knn_graph_path does not exist: {}'.format( knn_graph_path)) prefix = osp.dirname(feat_path) name = rm_suffix(osp.basename(feat_path)) # find root folder of `features` prefix = osp.dirname(prefix) knn_prefix = osp.join(prefix, 'knns', name) knns = build_knns(knn_prefix, self.features, cfg.knn_method, cfg.knn) adj = fast_knns2spmat(knns, self.k, self.th_sim, use_sim=True) # build symmetric adjacency matrix adj = build_symmetric_adj(adj, self_loop=True) adj = row_normalize(adj) if self.save_decomposed_adj: adj = sparse_mx_to_indices_values(adj) self.adj_indices, self.adj_values, self.adj_shape = adj else: self.adj = adj # convert knns to (dists, nbrs) self.dists, self.nbrs = knns2ordered_nbrs(knns) print('feature shape: {}, k: {}, norm_feat: {}'.format( self.features.shape, self.k, self.is_norm_feat)) if not self.ignore_label: with Timer('Prepare ground-truth label'): self.labels = confidence(feats=self.features, dists=self.dists, nbrs=self.nbrs, metric=self.conf_metric, idx2lb=self.idx2lb, lb2idxs=self.lb2idxs) if cfg.eval_interim: _, self.peaks = confidence_to_peaks( self.dists, self.nbrs, self.labels, self.max_conn)
if not os.path.exists(args.output_folder): os.makedirs(args.output_folder) cluster_name = args.output_name + '_' if args.output_name != '' else '' pred_label_fn = os.path.join(args.output_folder, '{}th_iou_{}_pos_{}_pred_labels.txt'.\ format(cluster_name, args.th_iou, args.th_pos)) if os.path.exists(pred_label_fn) and not args.force: print('{} has already existed. Please set force=True to overwrite.'. format(pred_label_fn)) exit() # read label lb2idxs, idx2lb = read_meta(args.gt_labels) tot_inst_num = len(idx2lb) clusters = [] for path in args.cluster_path: path = path.replace('\\', '') if path.endswith('.npz'): clusters.extend(load_data(path)) elif path.endswith('.txt'): lb2idxs_, _ = read_meta(path) clusters.extend(labels2clusters(lb2idxs_)) else: raise ValueError('Unkown suffix', path) # get ground-truth iou ious = []
def generate_iter_proposals(oprefix, knn_prefix, feats, feat_dim=256, knn_method='faiss', k=80, th_knn=0.6, th_step=0.05, minsz=3, maxsz=300, sv_minsz=2, sv_maxsz=5, sv_labels=None, sv_knn_prefix=None, is_rebuild=False, is_save_proposals=True, force=False, **kwargs): assert sv_minsz >= 2, "sv_minsz >= 2 to avoid duplicated proposals" print('k={}, th_knn={}, th_step={}, minsz={}, maxsz={}, ' 'sv_minsz={}, sv_maxsz={}, is_rebuild={}'.format( k, th_knn, th_step, minsz, maxsz, sv_minsz, sv_maxsz, is_rebuild)) if not os.path.exists(sv_labels): raise FileNotFoundError('{} not found.'.format(sv_labels)) if sv_knn_prefix is None: sv_knn_prefix = knn_prefix # get iter and knns from super vertex path _iter = get_iter_from_path(sv_labels) + 1 knns_inst = get_knns_from_path(sv_labels, sv_knn_prefix, feats) print('read sv_clusters from {}'.format(sv_labels)) sv_lb2idxs, sv_idx2lb = read_meta(sv_labels) inst_num = len(sv_idx2lb) sv_clusters = labels2clusters(sv_lb2idxs) # sv_clusters = filter_clusters(sv_clusters, minsz) feats = np.array([feats[c, :].mean(axis=0) for c in sv_clusters]) print('average feature of super vertices:', feats.shape) # build knns knns = build_knns(knn_prefix, feats, knn_method, k, is_rebuild) # obtain cluster proposals ofolder = os.path.join( oprefix, '{}_k_{}_th_{}_step_{}_minsz_{}_maxsz_{}_sv_minsz_{}_maxsz_{}_iter_{}'. format(knn_method, k, th_knn, th_step, minsz, maxsz, sv_minsz, sv_maxsz, _iter)) ofn_pred_labels = os.path.join(ofolder, 'pred_labels.txt') if not os.path.exists(ofolder): os.makedirs(ofolder) if not os.path.isfile(ofn_pred_labels) or is_rebuild: with Timer('build super vertices (iter={})'.format(_iter)): clusters = super_vertex(knns, k, th_knn, th_step, sv_maxsz) clusters = filter_clusters(clusters, sv_minsz) clusters = [[x for c in cluster for x in sv_clusters[c]] for cluster in clusters] with Timer('dump clustering to {}'.format(ofn_pred_labels)): labels = clusters2labels(clusters) write_meta(ofn_pred_labels, labels, inst_num=inst_num) else: print('read clusters from {}'.format(ofn_pred_labels)) lb2idxs, _ = read_meta(ofn_pred_labels) clusters = labels2clusters(lb2idxs) clusters = filter_clusters(clusters, minsz, maxsz) # output cluster proposals ofolder_proposals = os.path.join(ofolder, 'proposals') if is_save_proposals: print('saving cluster proposals to {}'.format(ofolder_proposals)) if not os.path.exists(ofolder_proposals): os.makedirs(ofolder_proposals) save_proposals(clusters, knns_inst, ofolder=ofolder_proposals, force=force) return ofolder_proposals, ofn_pred_labels
def run_in_movie(data_dir, subset, algorithm, temporal_link, gpu_id): affinity_dir = osp.join(data_dir, 'affinity', subset, 'in') list_file = osp.join(data_dir, 'meta', subset + '.json') mid_list, meta_info = read_meta(list_file) average_mAP = 0 search_count = 0 average_top1 = 0 average_top3 = 0 average_top5 = 0 for i, mid in enumerate(mid_list): # read data tnum = meta_info[mid]['num_tracklet'] pids = meta_info[mid]['pids'] gt_list, gt_dict = parse_label(meta_info, mid) # read affinity matrix if temporal_link: link_type = 'max' else: link_type = 'mean' ct_affmat = read_affmat_of_one_movie(affinity_dir, mid, region='face', data_type='ct', link_type=link_type) tt_affmat = read_affmat_of_one_movie(affinity_dir, mid, region='body', data_type='tt', link_type=link_type) # run algorithm if algorithm == 'ppcc': result = run_ccpp(ct_affmat, tt_affmat, gpu_id) elif algorithm == 'lp': result = run_lp(ct_affmat, tt_affmat, gpu_id) else: raise ValueError('No such algrothm: {}'.format(algorithm)) # parse results and get performance ret_dict = affmat2retdict(result, pids) ret_list = affmat2retlist(result, pids) mAP = get_mAP(gt_dict, ret_dict) topk = get_topk(gt_list, ret_list) average_mAP += mAP * len(pids) search_count += len(pids) max_k = len(topk) if max_k < 3: top3 = 1 else: top3 = topk[2] if max_k < 5: top5 = 1 else: top5 = topk[4] average_top1 += topk[0] average_top3 += top3 average_top5 += top5 # get average performance average_mAP = average_mAP / search_count average_top1 = average_top1 / len(mid_list) average_top3 = average_top3 / len(mid_list) average_top5 = average_top5 / len(mid_list) print( 'Average mAP: {:.4f}\tAverage top1: {:.4f}\tAverage top3: {:.4f}\tAverage top5: {:.4f}' .format(average_mAP, average_top1, average_top3, average_top5))
def generate_proposals(oprefix, feats, feat_dim=256, knn_method='hnsw', k=80, th_knn=0.6, th_step=0.05, min_size=3, max_size=300, is_rebuild=False, is_save_proposals=False): print('k={}, th_knn={}, th_step={}, max_size={}, is_rebuild={}'.\ format(k, th_knn, th_step, max_size, is_rebuild)) ## knn retrieval oprefix = os.path.join(oprefix, '{}_k_{}'.format(knn_method, k)) knn_fn = oprefix + '.npz' if not os.path.isfile(knn_fn) or is_rebuild: index_fn = oprefix + '.index' with Timer('build index'): if knn_method == 'hnsw': from proposals import knn_hnsw index = knn_hnsw(feats, k, index_fn) elif knn_method == 'faiss': from proposals import knn_faiss index = knn_faiss(feats, k, index_fn) else: raise KeyError('Unsupported method({}). \ Only support hnsw and faiss currently'.format( knn_method)) knns = index.get_knns() with Timer('dump knns to {}'.format(knn_fn)): dump_data(knn_fn, knns, force=True) else: print('read knn from {}'.format(knn_fn)) knns = load_data(knn_fn) # obtain cluster proposals ofolder = oprefix + '_th_{}_step_{}_minsz_{}_maxsz_{}_iter0'.\ format(th_knn, th_step, min_size, max_size) ofn_pred_labels = os.path.join(ofolder, 'pred_labels.txt') if not os.path.exists(ofolder): os.makedirs(ofolder) if not os.path.isfile(ofn_pred_labels) or is_rebuild: with Timer('build super vertices'): clusters = super_vertex(knns, k, th_knn, th_step, max_size) with Timer('dump clustering to {}'.format(ofn_pred_labels)): labels = clusters2labels(clusters) write_meta(ofn_pred_labels, labels) else: print('read clusters from {}'.format(ofn_pred_labels)) lb2idxs, _ = read_meta(ofn_pred_labels) clusters = labels2clusters(lb2idxs) clusters = filter_clusters(clusters, min_size) # output cluster proposals if is_save_proposals: ofolder = os.path.join(ofolder, 'proposals') print('saving cluster proposals to {}'.format(ofolder)) if not os.path.exists(ofolder): os.makedirs(ofolder) save_proposals(clusters, knns, ofolder=ofolder, force=True)
def run_in_movie(data_dir, subset, data_type, face_ratio): affinity_dir = osp.join(data_dir, 'affinity', subset, 'in') list_file = osp.join(data_dir, 'meta', subset + '.json') mid_list, meta_info = read_meta(list_file) average_mAP = 0 search_count = 0 average_top1 = 0 average_top3 = 0 average_top5 = 0 for i, mid in enumerate(mid_list): # read data tnum = meta_info[mid]['num_tracklet'] pids = meta_info[mid]['pids'] gt_list, gt_dict = parse_label(meta_info, mid) # read affinity matrix if data_type == 'face': affmat = read_affmat_of_one_movie(affinity_dir, mid, region='face', data_type='ct') elif data_type == 'body': affmat = read_affmat_of_one_movie(affinity_dir, mid, region='body', data_type='ct') else: face_affmat = read_affmat_of_one_movie(affinity_dir, mid, region='face', data_type='ct') body_affmat = read_affmat_of_one_movie(affinity_dir, mid, region='body', data_type='ct') if data_type == 'ave_fusion': affmat = face_ratio * face_affmat + (1 - face_ratio) * body_affmat else: affmat = np.maximum(face_affmat, body_affmat) # parse results and get performance ret_dict = affmat2retdict(affmat, pids) ret_list = affmat2retlist(affmat, pids) mAP = get_mAP(gt_dict, ret_dict) topk = get_topk(gt_list, ret_list) average_mAP += mAP * len(pids) search_count += len(pids) max_k = len(topk) if max_k < 3: top3 = 1 else: top3 = topk[2] if max_k < 5: top5 = 1 else: top5 = topk[4] average_top1 += topk[0] average_top3 += top3 average_top5 += top5 # get average performance average_mAP = average_mAP / search_count average_top1 = average_top1 / len(mid_list) average_top3 = average_top3 / len(mid_list) average_top5 = average_top5 / len(mid_list) print( 'Average mAP: {:.4f}\tAverage top1: {:.4f}\tAverage top3: {:.4f}\tAverage top5: {:.4f}' .format(average_mAP, average_top1, average_top3, average_top5))
def __init__(self, cfg): feat_path = cfg['feat_path'] label_path = cfg.get('label_path', None) knn_graph_path = cfg.get('knn_graph_path', None) self.k = cfg['k'] self.feature_dim = cfg['feature_dim'] self.is_norm_feat = cfg.get('is_norm_feat', True) self.save_decomposed_adj = cfg.get('save_decomposed_adj', False) self.th_sim = cfg.get('th_sim', 0.) self.max_conn = cfg.get('max_conn', 1) self.conf_metric = cfg.get('conf_metric') self.num_process = cfg.get('num_process',16) with Timer('read meta and feature'): if label_path is not None: self.lb2idxs, self.idx2lb = read_meta(label_path) self.inst_num = len(self.idx2lb) self.gt_labels = intdict2ndarray(self.idx2lb) self.ignore_label = False else: self.inst_num = -1 self.ignore_label = True self.features = read_probs(feat_path, self.inst_num, self.feature_dim) if self.is_norm_feat: self.features = l2norm(self.features) if self.inst_num == -1: self.inst_num = self.features.shape[0] self.size = 1 # take the entire graph as input with Timer('read knn graph'): if os.path.isfile(knn_graph_path): knns = np.load(knn_graph_path)['data'] # num_imgs*2*k else: if knn_graph_path is not None: print('knn_graph_path does not exist: {}'.format( knn_graph_path)) knn_prefix = os.path.join(cfg.prefix, 'knns', cfg.name) # 通过faiss实现k近邻搜索,此处作者faiss_gpu版本实现可能有问题,但faiss大规模在cpu上跑还是慢 # 当然faiss有针内存和计算速度方面的优化,PQ,IVF等,可参考faiss knns = build_knns(knn_prefix, self.features, cfg.knn_method, cfg.knn,self.num_process) # 依据k近邻搜索结果构建邻接矩阵 adj = fast_knns2spmat(knns, self.k, self.th_sim, use_sim=True) # build symmetric adjacency matrix adj = build_symmetric_adj(adj, self_loop=True) adj = row_normalize(adj) if self.save_decomposed_adj: adj = sparse_mx_to_indices_values(adj) self.adj_indices, self.adj_values, self.adj_shape = adj else: self.adj = adj # convert knns to (dists, nbrs) self.dists, self.nbrs = knns2ordered_nbrs(knns) # num_imgs*k print('feature shape: {}, k: {}, norm_feat: {}'.format( self.features.shape, self.k, self.is_norm_feat)) if not self.ignore_label: with Timer('Prepare ground-truth label'): self.labels = confidence(feats=self.features, dists=self.dists, nbrs=self.nbrs, metric=self.conf_metric, idx2lb=self.idx2lb, lb2idxs=self.lb2idxs) if cfg.eval_interim: _, self.peaks = confidence_to_peaks( self.dists, self.nbrs, self.labels, self.max_conn)
def __init__(self, cfg): feat_path = cfg['feat_path'] label_path = cfg.get('label_path', None) knn_graph_path = cfg.get('knn_graph_path', None) self.k = cfg['k'] self.feature_dim = cfg['feature_dim'] self.is_norm_feat = cfg.get('is_norm_feat', True) self.save_decomposed_adj = cfg.get('save_decomposed_adj', False) self.th_sim = cfg.get('th_sim', 0.) self.conf_metric = cfg.get('conf_metric') with Timer('read meta and feature'): if label_path is not None: self.lb2idxs, self.idx2lb = read_meta(label_path) self.inst_num = len(self.idx2lb) self.cls_num = len(self.lb2idxs) self.gt_labels = intdict2ndarray(self.idx2lb) self.ignore_label = False else: self.inst_num = -1 self.ignore_label = True self.features = read_probs(feat_path, self.inst_num, self.feature_dim) if self.is_norm_feat: self.features = l2norm(self.features) if self.inst_num == -1: self.inst_num = self.features.shape[0] self.size = 1 # take the entire graph as input with Timer('Compute center feature'): self.center_fea = np.zeros((self.cls_num, self.features.shape[1])) for i in range(self.cls_num): self.center_fea[i] = np.mean(self.features[self.lb2idxs[i]], 0) self.center_fea = l2norm(self.center_fea) with Timer('read knn graph'): if os.path.isfile(knn_graph_path): print("load knns from the knn_path") self.knns = np.load(knn_graph_path)['data'] else: if knn_graph_path is not None: print('knn_graph_path does not exist: {}'.format( knn_graph_path)) knn_prefix = os.path.join(cfg.prefix, 'knns', cfg.name) self.knns = build_knns(knn_prefix, self.features, cfg.knn_method, cfg.knn) adj = fast_knns2spmat(self.knns, self.k, self.th_sim, use_sim=True) # build symmetric adjacency matrix adj = build_symmetric_adj(adj, self_loop=True) #print('adj before norm') #print(adj) adj = row_normalize(adj) if self.save_decomposed_adj: adj = sparse_mx_to_indices_values(adj) self.adj_indices, self.adj_values, self.adj_shape = adj else: self.adj = adj # convert knns to (dists, nbrs) self.dists, self.nbrs = knns2ordered_nbrs(self.knns) print('feature shape: {}, k: {}, norm_feat: {}'.format( self.features.shape, self.k, self.is_norm_feat))