Esempio n. 1
0
    def batch_query(em, force_recomp=False, test_cxs=None):
        '''Runs each test_cxs as a query. If test_cxs is None, then all queries
        are run'''
        'TODO: Fix up the VM dependencies'
        vm, iom, am, cm = em.hs.get_managers('vm', 'iom', 'am', 'cm')
        # Compute the matches
        qm = vm.hs.qm
        vm.sample_train_set()
        vm.build_model(force_recomp=force_recomp)
        if test_cxs == None:
            test_cxs = vm.get_train_cx()
        logmsg('Building matching graph. This may take awhile')

        depends = ['chiprep', 'preproc', 'model', 'query']
        algo_suffix = am.get_algo_suffix(depends)
        samp_suffix = vm.get_samp_suffix()
        result_dpath = iom.ensure_directory(iom.get_temp_fpath('raw_results'))
        rr_fmtstr_cid = os.path.join(
            result_dpath, 'rr_cid%07d' + samp_suffix + algo_suffix + '.pkl')

        # Find the Queries which need to be run
        unsaved_cxs = []
        for cx in iter(test_cxs):
            cid = cm.cx2_cid[cx]
            rr_fpath = rr_fmtstr_cid % cid
            if not os.path.exists(rr_fpath):
                unsaved_cxs.append(cx)

        # Run Unsaved Query
        total = len(unsaved_cxs)
        for count, cx in enumerate(unsaved_cxs):
            logmsg('Query %d/%d' % (count, total))
            em.run_and_save_query(cx, rr_fmtstr_cid)

        # Read Each Query
        cx2_rr = alloc_lists(test_cxs.max() + 1)
        total = len(test_cxs)
        for count, cx in enumerate(test_cxs):
            logmsg('Loading Result %d/%d' % (count, total))
            cid = cm.cx2_cid[cx]
            rr_fpath = rr_fmtstr_cid % cid
            if not os.path.exists(rr_fpath):
                logwarn('Result does not exist for CID=%d' % cid)
            rr_file = open(rr_fpath, 'rb')
            try:
                rr = cPickle.load(rr_file)
            except EOFError:
                rr_file.close()
                os.remove(rr_fpath)
                logwarn('Result was corrupted for CID=%d' % cid)

            rr_file.close()
            rr.cx2_cscore_ = []
            rr.cx2_fs_ = []
            rr.qfdsc = []
            rr.qfpts = []
            cx2_rr[cx] = rr

        return cx2_rr
Esempio n. 2
0
    def batch_query(em, force_recomp=False, test_cxs=None):
        '''Runs each test_cxs as a query. If test_cxs is None, then all queries
        are run'''
        'TODO: Fix up the VM dependencies'
        vm, iom, am, cm = em.hs.get_managers('vm','iom','am', 'cm')
        # Compute the matches
        qm = vm.hs.qm
        vm.sample_train_set()
        vm.build_model(force_recomp=force_recomp)
        if test_cxs == None:
            test_cxs = vm.get_train_cx()
        logmsg('Building matching graph. This may take awhile')

        depends = ['chiprep','preproc','model','query']
        algo_suffix = am.get_algo_suffix(depends)
        samp_suffix = vm.get_samp_suffix()
        result_dpath = iom.ensure_directory(iom.get_temp_fpath('raw_results'))
        rr_fmtstr_cid = os.path.join(result_dpath, 'rr_cid%07d'+samp_suffix+algo_suffix+'.pkl')

        # Find the Queries which need to be run
        unsaved_cxs = []
        for cx in iter(test_cxs):
            cid = cm.cx2_cid[cx]
            rr_fpath = rr_fmtstr_cid % cid
            if not os.path.exists(rr_fpath):
                unsaved_cxs.append(cx)
        
        # Run Unsaved Query
        total = len(unsaved_cxs)
        for count, cx in enumerate(unsaved_cxs):   
            logmsg('Query %d/%d' % (count, total))
            em.run_and_save_query(cx, rr_fmtstr_cid)

        # Read Each Query 
        cx2_rr = alloc_lists(test_cxs.max()+1)
        total = len(test_cxs)
        for count, cx in enumerate(test_cxs):
            logmsg('Loading Result %d/%d' % (count, total))
            cid = cm.cx2_cid[cx]
            rr_fpath = rr_fmtstr_cid % cid
            if not os.path.exists(rr_fpath):
                logwarn('Result does not exist for CID=%d' % cid)
            rr_file = open(rr_fpath,'rb')
            try: 
                rr = cPickle.load(rr_file)
            except EOFError:
                rr_file.close()
                os.remove(rr_fpath)
                logwarn('Result was corrupted for CID=%d' % cid)

            rr_file.close()
            rr.cx2_cscore_ = []
            rr.cx2_fs_ = []
            rr.qfdsc = []
            rr.qfpts = []
            cx2_rr[cx] = rr

        return cx2_rr
Esempio n. 3
0
 def query_db_vs_db(hsA, hsB):
     'Runs cross database queries / reloads cross database queries'
     vs_str = get_results_name(hsA, hsB)
     print('Running/Loading ' + vs_str)
     query_cxs = hsA.cm.get_valid_cxs()
     total = len(query_cxs)
     cx2_rr = alloc_lists(total)
     for count, qcx in enumerate(query_cxs):
         #with Timer() as t:
         #print(('Query %d / %d   ' % (count, total)) + vs_str)
         rr = hsB.qm.cx2_rr(qcx, hsA)
         cx2_rr[count] = rr
     return cx2_rr
Esempio n. 4
0
 def query_db_vs_db(hsA, hsB):
     'Runs cross database queries / reloads cross database queries'
     vs_str = get_results_name(hsA, hsB) 
     print('Running/Loading '+vs_str)
     query_cxs = hsA.cm.get_valid_cxs()
     total = len(query_cxs)
     cx2_rr = alloc_lists(total)
     for count, qcx in enumerate(query_cxs):
         #with Timer() as t:
             #print(('Query %d / %d   ' % (count, total)) + vs_str)
             rr = hsB.qm.cx2_rr(qcx, hsA)
             cx2_rr[count] = rr
     return cx2_rr 
Esempio n. 5
0
def approximate_kmeans(data, K=1e6, max_iters=1000, flann_pref=None):
    if flann_pref == None:
        flann_pref = Pref()
        flann_pref.algorithm = Pref("kdtree")
        flann_pref.trees = Pref(8)
        flann_pref.checks = Pref(128)
    flann_args = flann_pref.to_dict()
    float_data = np.array(data, dtype=np.float32)
    N = float_data.shape[0]
    print ("Approximately clustering %d data vectors into %d clusters" % (N, K))
    np.random.seed(seed=0)  # For Reproducibility
    # Initialize to Random Cluster Centers
    centx = np.random.choice(N, size=K, replace=False)
    cent = np.copy(float_data[centx])
    assign = alloc_lists(K)  # List for each cluster center with assigned indexes
    for iterx in xrange(0, max_iters):
        print "Iteration " + str(iterx)
        # Step 1: Find Nearest Neighbors
        flann = FLANN()
        flann.build_index(data_vecs, **flann_args)
        (index_list, dist_list) = flann.nn_index(query_vecs, K, checks=flann_args["checks"])
        return (index_list, dist_list)
        datax2_centx, _ = flann_one_time(cent, float_data, 1, flann_args)
        # Step 2: Assign data to cluster centers
        datax_sort = datax2_centx.argsort()
        centx_sort = datax2_centx[datax_sort]
        # Efficiently Trace over sorted centers with two pointers. Take care
        # To include the last batch of datavecs with the same center_index
        converged = True
        prev_centx = -1
        _L = 0
        dbg_total_assigned = 0
        dbg_assigned_list = []
        for _R in xrange(N + 1):  # Loop over datapoints, going 1 past the end, and group them
            # data  =  0[  . . . . . . . . . . . . .]N
            # ptrs  =          L         R
            #                  |-   k  -|L       R
            #                            |- k+1 |L   R
            #                                    |_K|
            if _R == N or centx_sort[_L] != centx_sort[_R]:  # We found a group
                centx = centx_sort[_L]  # Assign this group cluster index: centx
                # SPECIAL CASE: ( akmeans might not assign everything )
                if centx - prev_centx > 1:  # Check if a cluster got skipped
                    for skipx in xrange(prev_centx + 1, centx):
                        print ("    Skipping Index:" + str(skipx))
                        if len(assign[skipx]) != 0:
                            converged = False
                        assign[skipx] = []
                prev_centx = centx
                # Set Assignments
                num_members = np.float32(_R - _L)
                dbg_total_assigned += num_members
                centx_membx = datax_sort[_L:_R]
                # DBG CODE, keep track of data vectors you've assigned
                # print('    Assigning %d data vectors to center index: %d' % (num_members, centx) )
                # for x in centx_membx:
                # dbg_assigned_list.append(x)
                # /DBGCODE
                if np.all(assign[centx] != centx_membx):
                    converged = False
                assign[centx] = centx_membx
                # Recompute Centers
                cent[centx] = float_data[centx_membx, :].sum(axis=0) / num_members
                _L = _R
        # print('    Did Assignment of %d centers' % prev_centx)
        # print('    Assigned %d datavectors in total' % dbg_total_assigned)
        # SPECIAL CASE: has to run at the end again
        if prev_centx < K:  # Check if a cluster got skipped at the end
            for skipx in xrange(prev_centx + 1, K):
                print ("    Cluster Index %d was empty:" % skipx)
                if len(assign[skipx]) != 0:
                    converged = False
                assign[skipx] = []
        prev_centx = centx

        if converged:  # Assignments have not changed
            print "akmeans converged in " + str(iterx) + " iterations"
            break
    return cent, assign
Esempio n. 6
0
def approximate_kmeans(data, K=1e6, max_iters=1000, flann_pref=None):
    if flann_pref == None:
        flann_pref = Pref()
        flann_pref.algorithm = Pref('kdtree')
        flann_pref.trees = Pref(8)
        flann_pref.checks = Pref(128)
    flann_args = flann_pref.to_dict()
    float_data = np.array(data, dtype=np.float32)
    N = float_data.shape[0]
    print('Approximately clustering %d data vectors into %d clusters' % (N, K))
    np.random.seed(seed=0)  # For Reproducibility
    # Initialize to Random Cluster Centers
    centx = np.random.choice(N, size=K, replace=False)
    cent = np.copy(float_data[centx])
    assign = alloc_lists(
        K)  # List for each cluster center with assigned indexes
    for iterx in xrange(0, max_iters):
        print "Iteration " + str(iterx)
        # Step 1: Find Nearest Neighbors
        flann = FLANN()
        flann.build_index(data_vecs, **flann_args)
        (index_list, dist_list) = flann.nn_index(query_vecs,
                                                 K,
                                                 checks=flann_args['checks'])
        return (index_list, dist_list)
        datax2_centx, _ = flann_one_time(cent, float_data, 1, flann_args)
        # Step 2: Assign data to cluster centers
        datax_sort = datax2_centx.argsort()
        centx_sort = datax2_centx[datax_sort]
        # Efficiently Trace over sorted centers with two pointers. Take care
        # To include the last batch of datavecs with the same center_index
        converged = True
        prev_centx = -1
        _L = 0
        dbg_total_assigned = 0
        dbg_assigned_list = []
        for _R in xrange(
                N + 1
        ):  #Loop over datapoints, going 1 past the end, and group them
            # data  =  0[  . . . . . . . . . . . . .]N
            # ptrs  =          L         R
            #                  |-   k  -|L       R
            #                            |- k+1 |L   R
            #                                    |_K|
            if _R == N or centx_sort[_L] != centx_sort[_R]:  # We found a group
                centx = centx_sort[
                    _L]  # Assign this group cluster index: centx
                # SPECIAL CASE: ( akmeans might not assign everything )
                if centx - prev_centx > 1:  #Check if a cluster got skipped
                    for skipx in xrange(prev_centx + 1, centx):
                        print("    Skipping Index:" + str(skipx))
                        if len(assign[skipx]) != 0:
                            converged = False
                        assign[skipx] = []
                prev_centx = centx
                # Set Assignments
                num_members = np.float32(_R - _L)
                dbg_total_assigned += num_members
                centx_membx = datax_sort[_L:_R]
                #DBG CODE, keep track of data vectors you've assigned
                #print('    Assigning %d data vectors to center index: %d' % (num_members, centx) )
                #for x in centx_membx:
                #dbg_assigned_list.append(x)
                #/DBGCODE
                if np.all(assign[centx] != centx_membx):
                    converged = False
                assign[centx] = centx_membx
                # Recompute Centers
                cent[centx] = float_data[centx_membx, :].sum(
                    axis=0) / num_members
                _L = _R
        #print('    Did Assignment of %d centers' % prev_centx)
        #print('    Assigned %d datavectors in total' % dbg_total_assigned)
        # SPECIAL CASE: has to run at the end again
        if prev_centx < K:  #Check if a cluster got skipped at the end
            for skipx in xrange(prev_centx + 1, K):
                print('    Cluster Index %d was empty:' % skipx)
                if len(assign[skipx]) != 0:
                    converged = False
                assign[skipx] = []
        prev_centx = centx

        if converged:  # Assignments have not changed
            print 'akmeans converged in ' + str(iterx) + ' iterations'
            break
    return cent, assign
Esempio n. 7
0
def assign_feature_matches_1vM(rr, hs, K, method, cids_to_remove):
    '''Assigns each query feature to its K nearest database features
    with a similarity-score. Each feature votes for its assigned
    chip with this weight.'''
    logdbg('Assigning feature matches and initial scores')
    # Get managers
    cm = hs.cm
    nm = hs.nm
    vm = hs.vm
    # Get intermediate results
    qcx = rr.qcx
    qcid = rr.qcid
    qfdsc = rr.qfdsc
    qfpts = rr.qfpts

    num_qf = qfpts.shape[0]
    # define: Prefix K = list of K+1 nearest; k = K nearest
    # Everything is done in a flat manner, and reshaped at the end.
    if len(cids_to_remove) > 0:
        K += len(cids_to_remove)
        logdbg('K = %d. Increased by %d to account for removing results' %
               (K, len(cids_to_remove)))
    # qfx = Query Feature Index
    # Kwxs = the Kth result word index ;  Kdists = the Kth result distance
    (qfx2_Kwxs, qfx2_Kdists) = vm.nearest_neighbors(qfdsc, K + 1)
    # ---
    # Candidate score the nearest neighbor matches
    # p - pth nearest ; o - k+1th nearest
    score_fn_dict = {
        'DIFF': lambda p, o: o - p,
        'RAT': lambda p, o: o / p,
        'LNRAT': lambda p, o: np.log2(o / p),
        'COUNT': lambda p, o: 1,
        'NDIST': lambda p, o: 10e16 - p,
        'TFIDF': lambda wx2_tf, wx_idf, wx: wx2_tf[wx] * wx_idf[wx]
    }
    score_fn = score_fn_dict[method]
    if method == 'TFIDF':
        # The wx2_qtf could really be per k or as agged across all K
        w_histo = bincount(qfx2_wxs, minlength=vm.numWords())
        wx2_qtf = np.array(w_histo, dtype=np.float32) / num_qf
        qfx2_vweight = score_fn(wx2_qtf, vm.wx2_idf, qfx2_wxs)
    else:
        # Distances to the 0-K results
        p_vote = qfx2_Kdists[:, 0:K] + 1
        # Distance to the K+1th result
        o_norm = np.tile(qfx2_Kdists[:, -1].reshape(num_qf, 1) + 1, (1, K))
        # Use score method to get weight
        qfx2_kweight = np.array(
            [score_fn(p, o) for (p, o) in iter(zip(p_vote.flat, o_norm.flat))],
            dtype=np.float32)
        qfx2_kweight.shape = (num_qf, K)
    # ---
    # Use the scores to cast weighted votes for database chips
    #
    if len(cids_to_remove) > 0:
        # Remove the query from results
        # query feature index 2 agg descriptor indexes -> cids -> self_query_bit -> clean_axs
        #
        # Feature Matches -> Chip Ids
        logdbg('Query qcid=%r are being removed from results ' %
               cids_to_remove)
        qfx2_Kaxs_ = vm.wx2_axs[qfx2_Kwxs]
        qfx2_Kcids_ = [vm.ax2_cid[axs] for axs in qfx2_Kaxs_.flat]
        # Test if each FeatureMatch-ChipId is the Query-ChipId.
        qfx2_Ksqbit_ = [
            True - np.in1d(cids, cids_to_remove) for cids in qfx2_Kcids_
        ]
        # Remove FeatureMatches to the Query-ChipId
        qfx2_Kaxs    = [np.array(axs)[sqbit].tolist() for (axs, sqbit) in\
                        iter(zip(qfx2_Kaxs_.flat, qfx2_Ksqbit_))]
    else:
        qfx2_Kaxs_ = vm.wx2_axs[qfx2_Kwxs]
        qfx2_Kaxs = [np.array(axs).tolist() for axs in qfx2_Kaxs_.flat]
    # Clean Vote for Info
    qfx2_Kcxs = np.array([vm.ax2_cx(axs) for axs in qfx2_Kaxs])
    qfx2_Kfxs = np.array([vm.ax2_fx[axs] for axs in qfx2_Kaxs])
    qfx2_Knxs = np.array([cm.cx2_nx[cxs] for cxs in qfx2_Kcxs])
    if qfx2_Kfxs.size == 0:
        logerr('Cannot query when there is one chip in database')
    # Reshape Vote for Info
    qfx2_Kcxs = np.array(qfx2_Kcxs).reshape(num_qf, K + 1)
    qfx2_Kfxs = np.array(qfx2_Kfxs).reshape(num_qf, K + 1)
    qfx2_Knxs = np.array(qfx2_Knxs).reshape(num_qf, K + 1)

    # Using the K=K+1 results, make k=K scores
    qfx2_kcxs_vote = qfx2_Kcxs[:, 0:K]  # vote for cx
    qfx2_kfxs_vote = qfx2_Kfxs[:, 0:K]  # vote for fx
    qfx2_knxs_vote = qfx2_Knxs[:, 0:K]  # check with nx

    # Attempt to recover from problems where K is too small
    qfx2_knxs_norm = np.tile(qfx2_Knxs[:, K].reshape(num_qf, 1), (1, K))
    qfx2_knxs_norm[qfx2_knxs_norm ==
                   nm.UNIDEN_NX()] = 0  # Remove Unidentifieds from this test
    qfx2_kcxs_norm = np.tile(qfx2_Kcxs[:, K].reshape(num_qf, 1), (1, K))
    # If the normalizer has the same name, but is a different chip, there is a good chance
    # it is a correct match and was peanalized by the scoring function
    qfx2_normgood_bit = np.logical_and(qfx2_kcxs_vote != qfx2_kcxs_norm, \
                                    qfx2_knxs_vote == qfx2_knxs_norm)
    #qfx2_kweight[qfx2_normgood_bit] = 2

    # -----
    # Build FeatureMatches and FeaturesScores
    #
    cx2_fm = alloc_lists(cm.max_cx + 1)
    cx2_fs_ = alloc_lists(cm.max_cx + 1)

    qfx2_qfx = np.tile(np.arange(0, num_qf).reshape(num_qf, 1), (1, K))
    # Add matches and scores
    for (qfx, qfs, cxs, fxs)\
            in iter(zip(qfx2_qfx.flat, \
                        qfx2_kweight.flat, \
                        qfx2_kcxs_vote.flat, \
                        qfx2_kfxs_vote.flat)):
        if cxs.size == 0:
            continue
        for (vote_cx, vote_fx) in iter(zip(np.nditer(cxs), np.nditer(fxs))):
            cx2_fm[vote_cx].append((qfx, vote_fx))
            cx2_fs_[vote_cx].append(qfs)

    # Convert correspondences to to numpy
    for cx in xrange(len(cx2_fs_)):
        num_m = len(cx2_fm[cx])
        cx2_fs_[cx] = np.array(cx2_fs_[cx], dtype=np.float32)
        cx2_fm[cx] = np.array(cx2_fm[cx], dtype=np.uint32).reshape(num_m, 2)
    logdbg('Setting feature assignments')
    rr.cx2_fm = cx2_fm
    rr.cx2_fs_ = cx2_fs_
Esempio n. 8
0
def assign_feature_matches_1vM(rr, hs, K, method, cids_to_remove):
    '''Assigns each query feature to its K nearest database features
    with a similarity-score. Each feature votes for its assigned
    chip with this weight.'''
    logdbg('Assigning feature matches and initial scores')
    # Get managers
    cm = hs.cm
    nm = hs.nm
    vm = hs.vm
    # Get intermediate results
    qcx   = rr.qcx
    qcid  = rr.qcid
    qfdsc = rr.qfdsc
    qfpts = rr.qfpts

    num_qf = qfpts.shape[0]
    # define: Prefix K = list of K+1 nearest; k = K nearest
    # Everything is done in a flat manner, and reshaped at the end.
    if len(cids_to_remove) > 0: 
        K += len(cids_to_remove)
        logdbg('K = %d. Increased by %d to account for removing results'
                % (K, len(cids_to_remove)))
    # qfx = Query Feature Index
    # Kwxs = the Kth result word index ;  Kdists = the Kth result distance
    (qfx2_Kwxs, qfx2_Kdists) = vm.nearest_neighbors(qfdsc, K+1)
    # ---
    # Candidate score the nearest neighbor matches
    # p - pth nearest ; o - k+1th nearest
    score_fn_dict = {
        'DIFF'  : lambda p, o: o - p,
        'RAT'   : lambda p, o: o / p,
        'LNRAT' : lambda p, o: np.log2(o / p),
        'COUNT' : lambda p, o: 1,
        'NDIST' : lambda p, o: 10e16 - p,
        'TFIDF' : lambda wx2_tf, wx_idf, wx: wx2_tf[wx] * wx_idf[wx] }
    score_fn = score_fn_dict[method]
    if method == 'TFIDF':
            # The wx2_qtf could really be per k or as agged across all K
            w_histo = bincount(qfx2_wxs, minlength=vm.numWords())
            wx2_qtf = np.array(w_histo, dtype=np.float32) / num_qf
            qfx2_vweight = score_fn(wx2_qtf, vm.wx2_idf, qfx2_wxs)
    else:
        # Distances to the 0-K results
        p_vote = qfx2_Kdists[:, 0:K] + 1
        # Distance to the K+1th result
        o_norm = np.tile(
            qfx2_Kdists[:, -1].reshape(num_qf, 1) + 1, (1, K))
        # Use score method to get weight
        qfx2_kweight = np.array(
            [score_fn(p, o) for (p, o) in 
                iter(zip(p_vote.flat, o_norm.flat))],
                                dtype=np.float32)
        qfx2_kweight.shape = (num_qf, K)
    # ---
    # Use the scores to cast weighted votes for database chips
    #
    if len(cids_to_remove) > 0:
        # Remove the query from results
        # query feature index 2 agg descriptor indexes -> cids -> self_query_bit -> clean_axs
        #
        # Feature Matches -> Chip Ids
        logdbg('Query qcid=%r are being removed from results ' % cids_to_remove)
        qfx2_Kaxs_   = vm.wx2_axs[qfx2_Kwxs]
        qfx2_Kcids_  = [vm.ax2_cid[axs] for axs in qfx2_Kaxs_.flat]
        # Test if each FeatureMatch-ChipId is the Query-ChipId.
        qfx2_Ksqbit_ = [True - np.in1d(cids, cids_to_remove) for cids in qfx2_Kcids_]
        # Remove FeatureMatches to the Query-ChipId
        qfx2_Kaxs    = [np.array(axs)[sqbit].tolist() for (axs, sqbit) in\
                        iter(zip(qfx2_Kaxs_.flat, qfx2_Ksqbit_))]
    else:
        qfx2_Kaxs_ = vm.wx2_axs[qfx2_Kwxs]
        qfx2_Kaxs  = [np.array(axs).tolist() for axs in qfx2_Kaxs_.flat]
    # Clean Vote for Info
    qfx2_Kcxs = np.array([vm.ax2_cx(axs) for axs in qfx2_Kaxs])
    qfx2_Kfxs = np.array([vm.ax2_fx[axs] for axs in qfx2_Kaxs])
    qfx2_Knxs = np.array([cm.cx2_nx[cxs] for cxs in qfx2_Kcxs])
    if qfx2_Kfxs.size == 0:
        logerr('Cannot query when there is one chip in database')
    # Reshape Vote for Info
    qfx2_Kcxs = np.array(qfx2_Kcxs).reshape(num_qf, K+1)
    qfx2_Kfxs = np.array(qfx2_Kfxs).reshape(num_qf, K+1)
    qfx2_Knxs = np.array(qfx2_Knxs).reshape(num_qf, K+1)

    # Using the K=K+1 results, make k=K scores
    qfx2_kcxs_vote = qfx2_Kcxs[:, 0:K] # vote for cx
    qfx2_kfxs_vote = qfx2_Kfxs[:, 0:K] # vote for fx
    qfx2_knxs_vote = qfx2_Knxs[:, 0:K] # check with nx

    # Attempt to recover from problems where K is too small
    qfx2_knxs_norm = np.tile(qfx2_Knxs[:, K].reshape(num_qf, 1), (1, K))
    qfx2_knxs_norm[qfx2_knxs_norm == nm.UNIDEN_NX()] = 0 # Remove Unidentifieds from this test
    qfx2_kcxs_norm = np.tile(qfx2_Kcxs[:, K].reshape(num_qf, 1), (1, K))
    # If the normalizer has the same name, but is a different chip, there is a good chance
    # it is a correct match and was peanalized by the scoring function
    qfx2_normgood_bit = np.logical_and(qfx2_kcxs_vote != qfx2_kcxs_norm, \
                                    qfx2_knxs_vote == qfx2_knxs_norm)
    #qfx2_kweight[qfx2_normgood_bit] = 2

    # -----
    # Build FeatureMatches and FeaturesScores
    #
    cx2_fm  = alloc_lists(cm.max_cx + 1)
    cx2_fs_ = alloc_lists(cm.max_cx + 1)

    qfx2_qfx = np.tile(np.arange(0, num_qf).reshape(num_qf, 1), (1, K))
    # Add matches and scores
    for (qfx, qfs, cxs, fxs)\
            in iter(zip(qfx2_qfx.flat, \
                        qfx2_kweight.flat, \
                        qfx2_kcxs_vote.flat, \
                        qfx2_kfxs_vote.flat)):
        if cxs.size == 0: 
            continue
        for (vote_cx, vote_fx) in iter(zip(np.nditer(cxs), np.nditer(fxs))):
            cx2_fm[vote_cx].append((qfx, vote_fx))
            cx2_fs_[vote_cx].append(qfs)

    # Convert correspondences to to numpy
    for cx in xrange(len(cx2_fs_)):
        num_m = len(cx2_fm[cx])
        cx2_fs_[cx] = np.array(cx2_fs_[cx], dtype=np.float32)
        cx2_fm[cx]  = np.array(cx2_fm[cx], dtype=np.uint32).reshape(num_m, 2)
    logdbg('Setting feature assignments')
    rr.cx2_fm  = cx2_fm
    rr.cx2_fs_ = cx2_fs_