def generate_proposals(oprefix,
                       knn_prefix,
                       feats,
                       feat_dim=256,
                       knn_method='faiss',
                       k=80,
                       th_knn=0.6,
                       th_step=0.05,
                       min_size=3,
                       max_size=300,
                       is_rebuild=False,
                       is_save_proposals=False):
    print('k={}, th_knn={}, th_step={}, max_size={}, is_rebuild={}'.\
            format(k, th_knn, th_step, max_size, is_rebuild))

    # build knns
    knns = build_knns(knn_prefix, feats, knn_method, k, is_rebuild)

    # obtain cluster proposals
    ofolder = os.path.join(oprefix,
            '{}_k_{}_th_{}_step_{}_minsz_{}_maxsz_{}_iter0'.\
            format(knn_method, k, th_knn, th_step, min_size, max_size))
    ofn_pred_labels = os.path.join(ofolder, 'pred_labels.txt')
    if not os.path.exists(ofolder):
        os.makedirs(ofolder)
    if not os.path.isfile(ofn_pred_labels) or is_rebuild:
        with Timer('build super vertices'):
            clusters = super_vertex(knns, k, th_knn, th_step, max_size)
        with Timer('dump clustering to {}'.format(ofn_pred_labels)):
            labels = clusters2labels(clusters)
            write_meta(ofn_pred_labels, labels)
    else:
        print('read clusters from {}'.format(ofn_pred_labels))
        lb2idxs, _ = read_meta(ofn_pred_labels)
        clusters = labels2clusters(lb2idxs)
    clusters = filter_clusters(clusters, min_size)

    # output cluster proposals
    if is_save_proposals:
        ofolder = os.path.join(ofolder, 'proposals')
        print('saving cluster proposals to {}'.format(ofolder))
        if not os.path.exists(ofolder):
            os.makedirs(ofolder)
        save_proposals(clusters, knns, ofolder=ofolder, force=True)
Beispiel #2
0
def generate_iter_proposals(oprefix,
                            knn_prefix,
                            feats,
                            feat_dim=256,
                            knn_method='faiss',
                            k=80,
                            th_knn=0.6,
                            th_step=0.05,
                            minsz=3,
                            maxsz=300,
                            sv_minsz=2,
                            sv_maxsz=5,
                            sv_labels=None,
                            sv_knn_prefix=None,
                            is_rebuild=False,
                            is_save_proposals=True,
                            force=False,
                            **kwargs):

    assert sv_minsz >= 2, "sv_minsz >= 2 to avoid duplicated proposals"
    print('k={}, th_knn={}, th_step={}, minsz={}, maxsz={}, '
          'sv_minsz={}, sv_maxsz={}, is_rebuild={}'.format(
              k, th_knn, th_step, minsz, maxsz, sv_minsz, sv_maxsz,
              is_rebuild))

    if not os.path.exists(sv_labels):
        raise FileNotFoundError('{} not found.'.format(sv_labels))

    if sv_knn_prefix is None:
        sv_knn_prefix = knn_prefix

    # get iter and knns from super vertex path
    _iter = get_iter_from_path(sv_labels) + 1
    knns_inst = get_knns_from_path(sv_labels, sv_knn_prefix, feats)
    print('read sv_clusters from {}'.format(sv_labels))
    sv_lb2idxs, sv_idx2lb = read_meta(sv_labels)
    inst_num = len(sv_idx2lb)
    sv_clusters = labels2clusters(sv_lb2idxs)
    # sv_clusters = filter_clusters(sv_clusters, minsz)
    feats = np.array([feats[c, :].mean(axis=0) for c in sv_clusters])
    print('average feature of super vertices:', feats.shape)

    # build knns
    knns = build_knns(knn_prefix, feats, knn_method, k, is_rebuild)

    # obtain cluster proposals
    ofolder = os.path.join(
        oprefix,
        '{}_k_{}_th_{}_step_{}_minsz_{}_maxsz_{}_sv_minsz_{}_maxsz_{}_iter_{}'.
        format(knn_method, k, th_knn, th_step, minsz, maxsz, sv_minsz,
               sv_maxsz, _iter))
    ofn_pred_labels = os.path.join(ofolder, 'pred_labels.txt')
    if not os.path.exists(ofolder):
        os.makedirs(ofolder)
    if not os.path.isfile(ofn_pred_labels) or is_rebuild:
        with Timer('build super vertices (iter={})'.format(_iter)):
            clusters = super_vertex(knns, k, th_knn, th_step, sv_maxsz)
            clusters = filter_clusters(clusters, sv_minsz)
            clusters = [[x for c in cluster for x in sv_clusters[c]]
                        for cluster in clusters]
        with Timer('dump clustering to {}'.format(ofn_pred_labels)):
            labels = clusters2labels(clusters)
            write_meta(ofn_pred_labels, labels, inst_num=inst_num)
    else:
        print('read clusters from {}'.format(ofn_pred_labels))
        lb2idxs, _ = read_meta(ofn_pred_labels)
        clusters = labels2clusters(lb2idxs)
    clusters = filter_clusters(clusters, minsz, maxsz)

    # output cluster proposals
    ofolder_proposals = os.path.join(ofolder, 'proposals')
    if is_save_proposals:
        print('saving cluster proposals to {}'.format(ofolder_proposals))
        if not os.path.exists(ofolder_proposals):
            os.makedirs(ofolder_proposals)
        save_proposals(clusters,
                       knns_inst,
                       ofolder=ofolder_proposals,
                       force=force)

    return ofolder_proposals, ofn_pred_labels
Beispiel #3
0
def generate_proposals(oprefix,
                       feats,
                       feat_dim=256,
                       knn_method='hnsw',
                       k=80,
                       th_knn=0.6,
                       th_step=0.05,
                       min_size=3,
                       max_size=300,
                       is_rebuild=False,
                       is_save_proposals=False):
    print('k={}, th_knn={}, th_step={}, max_size={}, is_rebuild={}'.\
            format(k, th_knn, th_step, max_size, is_rebuild))

    ## knn retrieval
    oprefix = os.path.join(oprefix, '{}_k_{}'.format(knn_method, k))
    knn_fn = oprefix + '.npz'
    if not os.path.isfile(knn_fn) or is_rebuild:
        index_fn = oprefix + '.index'
        with Timer('build index'):
            if knn_method == 'hnsw':
                from proposals import knn_hnsw
                index = knn_hnsw(feats, k, index_fn)
            elif knn_method == 'faiss':
                from proposals import knn_faiss
                index = knn_faiss(feats, k, index_fn)
            else:
                raise KeyError('Unsupported method({}). \
                        Only support hnsw and faiss currently'.format(
                    knn_method))
            knns = index.get_knns()
        with Timer('dump knns to {}'.format(knn_fn)):
            dump_data(knn_fn, knns, force=True)
    else:
        print('read knn from {}'.format(knn_fn))
        knns = load_data(knn_fn)

    # obtain cluster proposals
    ofolder = oprefix + '_th_{}_step_{}_minsz_{}_maxsz_{}_iter0'.\
                format(th_knn, th_step, min_size, max_size)
    ofn_pred_labels = os.path.join(ofolder, 'pred_labels.txt')
    if not os.path.exists(ofolder):
        os.makedirs(ofolder)
    if not os.path.isfile(ofn_pred_labels) or is_rebuild:
        with Timer('build super vertices'):
            clusters = super_vertex(knns, k, th_knn, th_step, max_size)
        with Timer('dump clustering to {}'.format(ofn_pred_labels)):
            labels = clusters2labels(clusters)
            write_meta(ofn_pred_labels, labels)
    else:
        print('read clusters from {}'.format(ofn_pred_labels))
        lb2idxs, _ = read_meta(ofn_pred_labels)
        clusters = labels2clusters(lb2idxs)
    clusters = filter_clusters(clusters, min_size)

    # output cluster proposals
    if is_save_proposals:
        ofolder = os.path.join(ofolder, 'proposals')
        print('saving cluster proposals to {}'.format(ofolder))
        if not os.path.exists(ofolder):
            os.makedirs(ofolder)
        save_proposals(clusters, knns, ofolder=ofolder, force=True)