コード例 #1
0
ファイル: clustering.py プロジェクト: Erotemic/hotspotter
def approximate_kmeans(data, K=1e6, max_iters=1000, flann_pref=None):
    if flann_pref == None:
        flann_pref = Pref()
        flann_pref.algorithm = Pref("kdtree")
        flann_pref.trees = Pref(8)
        flann_pref.checks = Pref(128)
    flann_args = flann_pref.to_dict()
    float_data = np.array(data, dtype=np.float32)
    N = float_data.shape[0]
    print ("Approximately clustering %d data vectors into %d clusters" % (N, K))
    np.random.seed(seed=0)  # For Reproducibility
    # Initialize to Random Cluster Centers
    centx = np.random.choice(N, size=K, replace=False)
    cent = np.copy(float_data[centx])
    assign = alloc_lists(K)  # List for each cluster center with assigned indexes
    for iterx in xrange(0, max_iters):
        print "Iteration " + str(iterx)
        # Step 1: Find Nearest Neighbors
        flann = FLANN()
        flann.build_index(data_vecs, **flann_args)
        (index_list, dist_list) = flann.nn_index(query_vecs, K, checks=flann_args["checks"])
        return (index_list, dist_list)
        datax2_centx, _ = flann_one_time(cent, float_data, 1, flann_args)
        # Step 2: Assign data to cluster centers
        datax_sort = datax2_centx.argsort()
        centx_sort = datax2_centx[datax_sort]
        # Efficiently Trace over sorted centers with two pointers. Take care
        # To include the last batch of datavecs with the same center_index
        converged = True
        prev_centx = -1
        _L = 0
        dbg_total_assigned = 0
        dbg_assigned_list = []
        for _R in xrange(N + 1):  # Loop over datapoints, going 1 past the end, and group them
            # data  =  0[  . . . . . . . . . . . . .]N
            # ptrs  =          L         R
            #                  |-   k  -|L       R
            #                            |- k+1 |L   R
            #                                    |_K|
            if _R == N or centx_sort[_L] != centx_sort[_R]:  # We found a group
                centx = centx_sort[_L]  # Assign this group cluster index: centx
                # SPECIAL CASE: ( akmeans might not assign everything )
                if centx - prev_centx > 1:  # Check if a cluster got skipped
                    for skipx in xrange(prev_centx + 1, centx):
                        print ("    Skipping Index:" + str(skipx))
                        if len(assign[skipx]) != 0:
                            converged = False
                        assign[skipx] = []
                prev_centx = centx
                # Set Assignments
                num_members = np.float32(_R - _L)
                dbg_total_assigned += num_members
                centx_membx = datax_sort[_L:_R]
                # DBG CODE, keep track of data vectors you've assigned
                # print('    Assigning %d data vectors to center index: %d' % (num_members, centx) )
                # for x in centx_membx:
                # dbg_assigned_list.append(x)
                # /DBGCODE
                if np.all(assign[centx] != centx_membx):
                    converged = False
                assign[centx] = centx_membx
                # Recompute Centers
                cent[centx] = float_data[centx_membx, :].sum(axis=0) / num_members
                _L = _R
        # print('    Did Assignment of %d centers' % prev_centx)
        # print('    Assigned %d datavectors in total' % dbg_total_assigned)
        # SPECIAL CASE: has to run at the end again
        if prev_centx < K:  # Check if a cluster got skipped at the end
            for skipx in xrange(prev_centx + 1, K):
                print ("    Cluster Index %d was empty:" % skipx)
                if len(assign[skipx]) != 0:
                    converged = False
                assign[skipx] = []
        prev_centx = centx

        if converged:  # Assignments have not changed
            print "akmeans converged in " + str(iterx) + " iterations"
            break
    return cent, assign
コード例 #2
0
    def init_preferences(am, default_bit=False):
        iom = am.hs.iom
        if am.algo_prefs == None:
            am.algo_prefs = Pref(fpath=iom.get_prefs_fpath('algo_prefs'))
            #Define the pipeline stages
            am.algo_prefs.preproc = Pref(
                parent=am.algo_prefs)  # Low Level Chip Operations
            am.algo_prefs.chiprep = Pref(
                parent=am.algo_prefs)  # Extracting Chip Features
            am.algo_prefs.model = Pref(parent=am.algo_prefs,
                                       hidden=True)  # Building the model
            am.algo_prefs.query = Pref(
                parent=am.algo_prefs)  # Searching the model
            am.algo_prefs.results = Pref(
                parent=am.algo_prefs)  # Searching the model
        # --- Chip Preprocessing ---
        # (selection, options, prefs? )
        am.algo_prefs.preproc.sqrt_num_pxls = Pref(700)
        am.algo_prefs.preproc.autocontrast_bit = Pref(False, hidden=True)
        am.algo_prefs.preproc.bilateral_filt_bit = Pref(False)
        am.algo_prefs.preproc.histeq_bit = Pref(True)
        am.algo_prefs.preproc.contrast_stretch_bit = Pref(False)
        am.algo_prefs.preproc.adapt_histeq_bit = Pref(False)
        # --- Chip Representation ---
        # Currently one feature detector and one feature descriptor is chosen
        am.algo_prefs.chiprep.use_gravity_vector = True

        opencv_detectors = [
            'FAST', 'STAR', 'SIFT', 'SURF', 'ORB', 'BRISK', 'MSER', 'GFTT',
            'HARRIS', 'Dense', 'SimpleBlob'
        ]
        # Keypoint Detector: Add a list of opencv detectors
        am.algo_prefs.chiprep.kpts_detector = Pref(0,
                                                   choices=['heshesaff'] +
                                                   opencv_detectors)
        # Keypoint Detector Parameters: Add a list of associated preferences
        # types = { 0:'int', 1:'bool', 2:'double', 7:'float', 9:'int64', 11:'unsigned char'}
        for detector_type in opencv_detectors:
            det_dep = (am.algo_prefs.chiprep.kpts_detector_internal,
                       detector_type)
            det_pref = Pref(depeq=det_dep)
            det = cv2.FeatureDetector_create(detector_type)
            for param_name in det.getParams():
                param_type = det.paramType(param_name)
                if param_type in [0, 8, 9, 11]:
                    param_val = det.getInt(param_name)
                elif param_type == 1:
                    param_val = det.getBool(param_name)
                elif param_type in [2, 7]:
                    param_val = det.getDouble(param_name)
                else:
                    raise Exception('name: ' + str(param_name) + ' type: ' +
                                    str(param_type))
                det_pref[param_name] = param_val
            am.algo_prefs.chiprep[detector_type + '_params'] = det_pref

        am.algo_prefs.chiprep.kpts_extractor = Pref(
            0, choices=('SIFT', ), hidden=True)  #, '#SURF', '#BRISK'))
        # --- Vocabulary ---
        am.algo_prefs.model.quantizer = Pref(0,
                                             choices=('naive_bayes',
                                                      'akmeans'),
                                             hidden=True)

        #nbnn_dep = (am.algo_prefs.model.quantizer_internal, 'naive_bayes')
        #am.algo_prefs.model.naive_bayes                    = Pref(depeq=nbnn_dep)
        #am.algo_prefs.model.naive_bayes.num_nearest        = Pref('wip')
        #am.algo_prefs.model.naive_bayes.pseudo_num_words   = Pref('wip')

        akm_dep = (am.algo_prefs.model.quantizer_internal, 'akmeans')
        am.algo_prefs.model.akmeans = Pref(depeq=akm_dep, hidden=True)
        am.algo_prefs.model.akmeans.num_words = Pref(1000)
        am.algo_prefs.model.akmeans.max_iters = Pref(1000)

        hkm_dep = (am.algo_prefs.model.quantizer_internal, 'hkmeans')
        am.algo_prefs.model.hkmeans = Pref(depeq=hkm_dep, hidden=True)
        am.algo_prefs.model.hkmeans.branching = Pref(10)
        am.algo_prefs.model.hkmeans.depth = Pref(6)

        flann_kdtree = Pref(hidden=True)
        flann_kdtree.algorithm = Pref(
            default=1,
            choices=['linear', 'kdtree', 'kmeanes', 'composite',
                     'autotuned'])  # Build Prefs
        flann_kdtree.trees = Pref(8, min=0, max=30)
        flann_kdtree.checks = Pref(128, min=0, max=4096)  # Search Prefs
        #Autotuned Specific Prefeters
        autotune_spef = (flann_kdtree.algorithm_internal, 'autotuned')
        flann_kdtree.target_precision = Pref(0.95, depeq=autotune_spef)
        flann_kdtree.build_weight = Pref(0.01, depeq=autotune_spef)
        flann_kdtree.memory_weight    = Pref(0.86, depeq=autotune_spef,\
                                             doc='the time-search tradeoff')
        flann_kdtree.sample_fraction  = Pref(0.86, depeq=autotune_spef,\
                                             doc='the train_fraction')
        # HKMeans Specific Prefeters
        hkmeans_spef = (flann_kdtree.algorithm_internal, 'kmeans'
                        )  #Autotuned Specific Prefeters
        flann_kdtree.branching = Pref(10, depeq=hkmeans_spef)
        flann_kdtree.iterations = Pref(6, depeq=hkmeans_spef, doc='num levels')
        flann_kdtree.centers_init = Pref(choices=['random', 'gonzales', 'kmeansapp'],\
                                         depeq=hkmeans_spef)
        flann_kdtree.cb_index = Pref(0,
                                     min=0,
                                     max=5,
                                     depeq=hkmeans_spef,
                                     doc='''
            this parameter (cluster boundary index) influences the way exploration
            is performed in the hierarchical kmeans tree. When cb index is
            zero the next kmeans domain to be explored is choosen to be the one with
            the closest center. A value greater then zero also takes into account the
            size of the domain.''')
        am.algo_prefs.model.indexer = flann_kdtree  #Pref(0, choices=[flann_kdtree])

        # --- Query Prefs ---
        am.algo_prefs.query.k = Pref(1, min=1, max=50)
        am.algo_prefs.query.num_rerank = Pref(1000, min=0)
        am.algo_prefs.query.spatial_thresh = Pref(0.05, min=0, max=1)
        am.algo_prefs.query.sigma_thresh = Pref(0.05,
                                                min=0,
                                                max=1,
                                                hidden=True)  #: Unimplemented
        am.algo_prefs.query.method = Pref(
            2, choices=['COUNT', 'DIFF', 'LNRAT', 'RAT'])  #, '#TFIDF'
        am.algo_prefs.query.score = Pref(0, choices=['cscore', 'nscore'
                                                     ])  # move to results?
        am.algo_prefs.query.self_as_result_bit = Pref(
            False)  #: Return self (in terms of name) in results
        am.algo_prefs.query.remove_other_names = Pref(
            False
        )  #: Remove all results with the same identified name as the query

        # --- Result Prefs ---
        am.algo_prefs.results.score = Pref(
            0, choices=('cscore', 'nscore'))  # move to results?
        am.algo_prefs.results.one_result_per_name = Pref(
            False)  #: Return self (in terms of name) in results
        am.algo_prefs.results.match_threshold = Pref(0)
        am.algo_prefs.results.min_num_results = Pref(5)
        am.algo_prefs.results.max_num_results = Pref(5)
        am.algo_prefs.results.extra_num_results = Pref(0)
        if not default_bit:
            am.algo_prefs.load()
コード例 #3
0
ファイル: clustering.py プロジェクト: warunanc/hotspotter
def approximate_kmeans(data, K=1e6, max_iters=1000, flann_pref=None):
    if flann_pref == None:
        flann_pref = Pref()
        flann_pref.algorithm = Pref('kdtree')
        flann_pref.trees = Pref(8)
        flann_pref.checks = Pref(128)
    flann_args = flann_pref.to_dict()
    float_data = np.array(data, dtype=np.float32)
    N = float_data.shape[0]
    print('Approximately clustering %d data vectors into %d clusters' % (N, K))
    np.random.seed(seed=0)  # For Reproducibility
    # Initialize to Random Cluster Centers
    centx = np.random.choice(N, size=K, replace=False)
    cent = np.copy(float_data[centx])
    assign = alloc_lists(
        K)  # List for each cluster center with assigned indexes
    for iterx in xrange(0, max_iters):
        print "Iteration " + str(iterx)
        # Step 1: Find Nearest Neighbors
        flann = FLANN()
        flann.build_index(data_vecs, **flann_args)
        (index_list, dist_list) = flann.nn_index(query_vecs,
                                                 K,
                                                 checks=flann_args['checks'])
        return (index_list, dist_list)
        datax2_centx, _ = flann_one_time(cent, float_data, 1, flann_args)
        # Step 2: Assign data to cluster centers
        datax_sort = datax2_centx.argsort()
        centx_sort = datax2_centx[datax_sort]
        # Efficiently Trace over sorted centers with two pointers. Take care
        # To include the last batch of datavecs with the same center_index
        converged = True
        prev_centx = -1
        _L = 0
        dbg_total_assigned = 0
        dbg_assigned_list = []
        for _R in xrange(
                N + 1
        ):  #Loop over datapoints, going 1 past the end, and group them
            # data  =  0[  . . . . . . . . . . . . .]N
            # ptrs  =          L         R
            #                  |-   k  -|L       R
            #                            |- k+1 |L   R
            #                                    |_K|
            if _R == N or centx_sort[_L] != centx_sort[_R]:  # We found a group
                centx = centx_sort[
                    _L]  # Assign this group cluster index: centx
                # SPECIAL CASE: ( akmeans might not assign everything )
                if centx - prev_centx > 1:  #Check if a cluster got skipped
                    for skipx in xrange(prev_centx + 1, centx):
                        print("    Skipping Index:" + str(skipx))
                        if len(assign[skipx]) != 0:
                            converged = False
                        assign[skipx] = []
                prev_centx = centx
                # Set Assignments
                num_members = np.float32(_R - _L)
                dbg_total_assigned += num_members
                centx_membx = datax_sort[_L:_R]
                #DBG CODE, keep track of data vectors you've assigned
                #print('    Assigning %d data vectors to center index: %d' % (num_members, centx) )
                #for x in centx_membx:
                #dbg_assigned_list.append(x)
                #/DBGCODE
                if np.all(assign[centx] != centx_membx):
                    converged = False
                assign[centx] = centx_membx
                # Recompute Centers
                cent[centx] = float_data[centx_membx, :].sum(
                    axis=0) / num_members
                _L = _R
        #print('    Did Assignment of %d centers' % prev_centx)
        #print('    Assigned %d datavectors in total' % dbg_total_assigned)
        # SPECIAL CASE: has to run at the end again
        if prev_centx < K:  #Check if a cluster got skipped at the end
            for skipx in xrange(prev_centx + 1, K):
                print('    Cluster Index %d was empty:' % skipx)
                if len(assign[skipx]) != 0:
                    converged = False
                assign[skipx] = []
        prev_centx = centx

        if converged:  # Assignments have not changed
            print 'akmeans converged in ' + str(iterx) + ' iterations'
            break
    return cent, assign
コード例 #4
0
    def init_preferences(am, default_bit=False):
        iom = am.hs.iom
        if am.algo_prefs == None:
            am.algo_prefs = Pref(fpath=iom.get_prefs_fpath('algo_prefs'))
            #Define the pipeline stages
            am.algo_prefs.preproc  = Pref(parent=am.algo_prefs)  # Low Level Chip Operations
            am.algo_prefs.chiprep  = Pref(parent=am.algo_prefs)  # Extracting Chip Features
            am.algo_prefs.model    = Pref(parent=am.algo_prefs, hidden=True)  # Building the model
            am.algo_prefs.query    = Pref(parent=am.algo_prefs)  # Searching the model
            am.algo_prefs.results  = Pref(parent=am.algo_prefs)  # Searching the model
        # --- Chip Preprocessing ---
        # (selection, options, prefs? )
        am.algo_prefs.preproc.sqrt_num_pxls           = Pref(700)
        am.algo_prefs.preproc.autocontrast_bit        = Pref(False,hidden=True)
        am.algo_prefs.preproc.bilateral_filt_bit      = Pref(False)
        am.algo_prefs.preproc.histeq_bit              = Pref(True)
        am.algo_prefs.preproc.contrast_stretch_bit    = Pref(False)
        am.algo_prefs.preproc.adapt_histeq_bit        = Pref(False)
        # --- Chip Representation ---
        # Currently one feature detector and one feature descriptor is chosen
        am.algo_prefs.chiprep.use_gravity_vector     = True

        opencv_detectors = ['FAST', 'STAR', 'SIFT', 'SURF', 'ORB', 'BRISK',
                            'MSER', 'GFTT', 'HARRIS', 'Dense', 'SimpleBlob']
        # Keypoint Detector: Add a list of opencv detectors
        am.algo_prefs.chiprep.kpts_detector           = Pref(0, choices=['heshesaff']+opencv_detectors) 
        # Keypoint Detector Parameters: Add a list of associated preferences
        # types = { 0:'int', 1:'bool', 2:'double', 7:'float', 9:'int64', 11:'unsigned char'}
        for detector_type in opencv_detectors:
            det_dep = (am.algo_prefs.chiprep.kpts_detector_internal, detector_type)
            det_pref = Pref(depeq=det_dep)
            det = cv2.FeatureDetector_create(detector_type)
            for param_name in det.getParams():
                param_type = det.paramType(param_name)
                if param_type in [0, 8, 9, 11]:
                    param_val = det.getInt(param_name)
                elif param_type == 1:
                    param_val = det.getBool(param_name)
                elif param_type in [2,7]:
                    param_val = det.getDouble(param_name)
                else:
                    raise Exception('name: '+str(param_name) + ' type: '+str(param_type))
                det_pref[param_name] = param_val
            am.algo_prefs.chiprep[detector_type+'_params'] = det_pref

        am.algo_prefs.chiprep.kpts_extractor          = Pref(0, choices=('SIFT',), hidden=True) #, '#SURF', '#BRISK'))
        # --- Vocabulary ---
        am.algo_prefs.model.quantizer                 = Pref(0, choices=('naive_bayes', 'akmeans'), hidden=True)

        #nbnn_dep = (am.algo_prefs.model.quantizer_internal, 'naive_bayes')
        #am.algo_prefs.model.naive_bayes                    = Pref(depeq=nbnn_dep)
        #am.algo_prefs.model.naive_bayes.num_nearest        = Pref('wip')
        #am.algo_prefs.model.naive_bayes.pseudo_num_words   = Pref('wip')

        akm_dep = (am.algo_prefs.model.quantizer_internal, 'akmeans')
        am.algo_prefs.model.akmeans             = Pref(depeq=akm_dep, hidden=True)
        am.algo_prefs.model.akmeans.num_words   = Pref(1000)
        am.algo_prefs.model.akmeans.max_iters   = Pref(1000)

        hkm_dep = (am.algo_prefs.model.quantizer_internal, 'hkmeans')
        am.algo_prefs.model.hkmeans             = Pref(depeq=hkm_dep, hidden=True)
        am.algo_prefs.model.hkmeans.branching   = Pref(10)
        am.algo_prefs.model.hkmeans.depth       = Pref(6)
        
        flann_kdtree = Pref(hidden=True)
        flann_kdtree.algorithm  = Pref(default=1, choices=['linear',
                                                 'kdtree',
                                                 'kmeanes',
                                                 'composite',
                                                 'autotuned']) # Build Prefs
        flann_kdtree.trees      = Pref(8, min=0, max=30)
        flann_kdtree.checks     = Pref(128, min=0, max=4096) # Search Prefs
        #Autotuned Specific Prefeters
        autotune_spef = (flann_kdtree.algorithm_internal, 'autotuned') 
        flann_kdtree.target_precision = Pref(0.95, depeq=autotune_spef)  
        flann_kdtree.build_weight     = Pref(0.01, depeq=autotune_spef) 
        flann_kdtree.memory_weight    = Pref(0.86, depeq=autotune_spef,\
                                             doc='the time-search tradeoff') 
        flann_kdtree.sample_fraction  = Pref(0.86, depeq=autotune_spef,\
                                             doc='the train_fraction')
        # HKMeans Specific Prefeters
        hkmeans_spef = (flann_kdtree.algorithm_internal, 'kmeans') #Autotuned Specific Prefeters
        flann_kdtree.branching    = Pref(10, depeq=hkmeans_spef) 
        flann_kdtree.iterations   = Pref( 6, depeq=hkmeans_spef, doc='num levels') 
        flann_kdtree.centers_init = Pref(choices=['random', 'gonzales', 'kmeansapp'],\
                                         depeq=hkmeans_spef) 
        flann_kdtree.cb_index = Pref(0, min=0, max=5, depeq=hkmeans_spef, doc='''
            this parameter (cluster boundary index) influences the way exploration
            is performed in the hierarchical kmeans tree. When cb index is
            zero the next kmeans domain to be explored is choosen to be the one with
            the closest center. A value greater then zero also takes into account the
            size of the domain.''' ) 
        am.algo_prefs.model.indexer = flann_kdtree #Pref(0, choices=[flann_kdtree])

        # --- Query Prefs ---
        am.algo_prefs.query.k                         = Pref(1,    min=1, max=50)
        am.algo_prefs.query.num_rerank                = Pref(1000, min=0)
        am.algo_prefs.query.spatial_thresh            = Pref(0.05, min=0, max=1) 
        am.algo_prefs.query.sigma_thresh              = Pref(0.05, min=0, max=1, hidden=True) #: Unimplemented
        am.algo_prefs.query.method                    = Pref(2, choices=['COUNT', 'DIFF', 'LNRAT', 'RAT']) #, '#TFIDF'
        am.algo_prefs.query.score                     = Pref(0, choices=['cscore','nscore']) # move to results?
        am.algo_prefs.query.self_as_result_bit        = Pref(False)  #: Return self (in terms of name) in results
        am.algo_prefs.query.remove_other_names        = Pref(False)  #: Remove all results with the same identified name as the query

        # --- Result Prefs --- 
        am.algo_prefs.results.score                   = Pref(0, choices=('cscore','nscore')) # move to results?
        am.algo_prefs.results.one_result_per_name     = Pref(False)  #: Return self (in terms of name) in results
        am.algo_prefs.results.match_threshold         = Pref(0) 
        am.algo_prefs.results.min_num_results         = Pref(5)
        am.algo_prefs.results.max_num_results         = Pref(5) 
        am.algo_prefs.results.extra_num_results       = Pref(0)
        if not default_bit:
            am.algo_prefs.load()