def generate_proposals(oprefix, knn_prefix, feats, feat_dim=256, knn_method='faiss', k=80, th_knn=0.6, th_step=0.05, min_size=3, max_size=300, is_rebuild=False, is_save_proposals=False): print('k={}, th_knn={}, th_step={}, max_size={}, is_rebuild={}'.\ format(k, th_knn, th_step, max_size, is_rebuild)) # build knns knns = build_knns(knn_prefix, feats, knn_method, k, is_rebuild) # obtain cluster proposals ofolder = os.path.join(oprefix, '{}_k_{}_th_{}_step_{}_minsz_{}_maxsz_{}_iter0'.\ format(knn_method, k, th_knn, th_step, min_size, max_size)) ofn_pred_labels = os.path.join(ofolder, 'pred_labels.txt') if not os.path.exists(ofolder): os.makedirs(ofolder) if not os.path.isfile(ofn_pred_labels) or is_rebuild: with Timer('build super vertices'): clusters = super_vertex(knns, k, th_knn, th_step, max_size) with Timer('dump clustering to {}'.format(ofn_pred_labels)): labels = clusters2labels(clusters) write_meta(ofn_pred_labels, labels) else: print('read clusters from {}'.format(ofn_pred_labels)) lb2idxs, _ = read_meta(ofn_pred_labels) clusters = labels2clusters(lb2idxs) clusters = filter_clusters(clusters, min_size) # output cluster proposals if is_save_proposals: ofolder = os.path.join(ofolder, 'proposals') print('saving cluster proposals to {}'.format(ofolder)) if not os.path.exists(ofolder): os.makedirs(ofolder) save_proposals(clusters, knns, ofolder=ofolder, force=True)
def generate_iter_proposals(oprefix, knn_prefix, feats, feat_dim=256, knn_method='faiss', k=80, th_knn=0.6, th_step=0.05, minsz=3, maxsz=300, sv_minsz=2, sv_maxsz=5, sv_labels=None, sv_knn_prefix=None, is_rebuild=False, is_save_proposals=True, force=False, **kwargs): assert sv_minsz >= 2, "sv_minsz >= 2 to avoid duplicated proposals" print('k={}, th_knn={}, th_step={}, minsz={}, maxsz={}, ' 'sv_minsz={}, sv_maxsz={}, is_rebuild={}'.format( k, th_knn, th_step, minsz, maxsz, sv_minsz, sv_maxsz, is_rebuild)) if not os.path.exists(sv_labels): raise FileNotFoundError('{} not found.'.format(sv_labels)) if sv_knn_prefix is None: sv_knn_prefix = knn_prefix # get iter and knns from super vertex path _iter = get_iter_from_path(sv_labels) + 1 knns_inst = get_knns_from_path(sv_labels, sv_knn_prefix, feats) print('read sv_clusters from {}'.format(sv_labels)) sv_lb2idxs, sv_idx2lb = read_meta(sv_labels) inst_num = len(sv_idx2lb) sv_clusters = labels2clusters(sv_lb2idxs) # sv_clusters = filter_clusters(sv_clusters, minsz) feats = np.array([feats[c, :].mean(axis=0) for c in sv_clusters]) print('average feature of super vertices:', feats.shape) # build knns knns = build_knns(knn_prefix, feats, knn_method, k, is_rebuild) # obtain cluster proposals ofolder = os.path.join( oprefix, '{}_k_{}_th_{}_step_{}_minsz_{}_maxsz_{}_sv_minsz_{}_maxsz_{}_iter_{}'. format(knn_method, k, th_knn, th_step, minsz, maxsz, sv_minsz, sv_maxsz, _iter)) ofn_pred_labels = os.path.join(ofolder, 'pred_labels.txt') if not os.path.exists(ofolder): os.makedirs(ofolder) if not os.path.isfile(ofn_pred_labels) or is_rebuild: with Timer('build super vertices (iter={})'.format(_iter)): clusters = super_vertex(knns, k, th_knn, th_step, sv_maxsz) clusters = filter_clusters(clusters, sv_minsz) clusters = [[x for c in cluster for x in sv_clusters[c]] for cluster in clusters] with Timer('dump clustering to {}'.format(ofn_pred_labels)): labels = clusters2labels(clusters) write_meta(ofn_pred_labels, labels, inst_num=inst_num) else: print('read clusters from {}'.format(ofn_pred_labels)) lb2idxs, _ = read_meta(ofn_pred_labels) clusters = labels2clusters(lb2idxs) clusters = filter_clusters(clusters, minsz, maxsz) # output cluster proposals ofolder_proposals = os.path.join(ofolder, 'proposals') if is_save_proposals: print('saving cluster proposals to {}'.format(ofolder_proposals)) if not os.path.exists(ofolder_proposals): os.makedirs(ofolder_proposals) save_proposals(clusters, knns_inst, ofolder=ofolder_proposals, force=force) return ofolder_proposals, ofn_pred_labels
def generate_proposals(oprefix, feats, feat_dim=256, knn_method='hnsw', k=80, th_knn=0.6, th_step=0.05, min_size=3, max_size=300, is_rebuild=False, is_save_proposals=False): print('k={}, th_knn={}, th_step={}, max_size={}, is_rebuild={}'.\ format(k, th_knn, th_step, max_size, is_rebuild)) ## knn retrieval oprefix = os.path.join(oprefix, '{}_k_{}'.format(knn_method, k)) knn_fn = oprefix + '.npz' if not os.path.isfile(knn_fn) or is_rebuild: index_fn = oprefix + '.index' with Timer('build index'): if knn_method == 'hnsw': from proposals import knn_hnsw index = knn_hnsw(feats, k, index_fn) elif knn_method == 'faiss': from proposals import knn_faiss index = knn_faiss(feats, k, index_fn) else: raise KeyError('Unsupported method({}). \ Only support hnsw and faiss currently'.format( knn_method)) knns = index.get_knns() with Timer('dump knns to {}'.format(knn_fn)): dump_data(knn_fn, knns, force=True) else: print('read knn from {}'.format(knn_fn)) knns = load_data(knn_fn) # obtain cluster proposals ofolder = oprefix + '_th_{}_step_{}_minsz_{}_maxsz_{}_iter0'.\ format(th_knn, th_step, min_size, max_size) ofn_pred_labels = os.path.join(ofolder, 'pred_labels.txt') if not os.path.exists(ofolder): os.makedirs(ofolder) if not os.path.isfile(ofn_pred_labels) or is_rebuild: with Timer('build super vertices'): clusters = super_vertex(knns, k, th_knn, th_step, max_size) with Timer('dump clustering to {}'.format(ofn_pred_labels)): labels = clusters2labels(clusters) write_meta(ofn_pred_labels, labels) else: print('read clusters from {}'.format(ofn_pred_labels)) lb2idxs, _ = read_meta(ofn_pred_labels) clusters = labels2clusters(lb2idxs) clusters = filter_clusters(clusters, min_size) # output cluster proposals if is_save_proposals: ofolder = os.path.join(ofolder, 'proposals') print('saving cluster proposals to {}'.format(ofolder)) if not os.path.exists(ofolder): os.makedirs(ofolder) save_proposals(clusters, knns, ofolder=ofolder, force=True)