def get_closest(possible_starts, free_tasks_starts, grid, n): flann = FLANN() result, dists = flann.nn(possible_starts, free_tasks_starts, n, algorithm="kmeans", branching=32, iterations=7, checks=16) lengths = [] nearestss = [] paths = [] INF = 2 * np.max(np.max(dists)) for i in range(n): temp_nearest = np.unravel_index(np.argmin(dists), [len(possible_starts), n]) dists[temp_nearest] = INF nearestss.append(temp_nearest) temp_i_possible_starts = result[temp_nearest] temp_i_free_tasks_start = temp_nearest[0] p, _ = path(tuple(possible_starts[temp_i_possible_starts]), tuple(free_tasks_starts[temp_i_free_tasks_start]), grid, []) if p: lengths.append(len(p)) paths.append(p) best_path = np.argmin(lengths) nearest = nearestss[best_path] i_free_tasks_start = nearest[0] i_possible_starts = result[nearest] return i_free_tasks_start, i_possible_starts, paths[best_path]
def assign_nearest_jobs(agent_idle, agent_job, agent_pos, blocked, jobs, left_jobs, n): from pyflann import FLANN children = [] starts = [] ends = [] ends_job = [] for left_job in left_jobs: # this makes many children ... ends.append(left_job[0]) ends_job.append(jobs.index(left_job)) for i_a in range(len(agent_pos)): if agent_job[i_a]: # has assignment i_j = agent_job[i_a][-1] starts.append(jobs[i_j][0]) else: starts.append(agent_pos[i_a]) flann = FLANN() result, dists = flann.nn( np.array(ends, dtype=float), np.array(starts, dtype=float), (n if len(ends) >= n else len(ends)), algorithm="kmeans", branching=32, iterations=7, checks=16) assert len(agent_pos) == len(result), "Not the right amount of results" for i_a in range(len(agent_pos)): if len(result.shape) == 1: result = np.array(list(map(lambda x: [x, ], result))) for res in result[i_a]: agent_job_new = agent_job.copy() agent_job_new[i_a] += (ends_job[res],) children.append(comp2state(tuple(agent_job_new), agent_idle, blocked)) return children
def __init__(self, datadict, maxR, denoise_absmin=None, denoise_delta=None, denoise_min=None, detect_planar=None): self.D = datadict # dict of numpy arrays # self.kd_tree = KDTree(self.D['coords']) # linear algorithm means brute force, which means its exact nn, which we need # approximate nn may cause algorithm not to converge self.flann = FLANN() self.flann.build_index(self.D['coords'], algorithm='linear', target_precision=1, sample_fraction=0.001, log_level="info") # print "constructed kd-tree" self.m, self.n = datadict['coords'].shape self.D['ma_coords_in'] = np.empty((self.m, self.n)) self.D['ma_coords_in'][:] = np.nan self.D['ma_coords_out'] = np.empty((self.m, self.n)) self.D['ma_coords_out'][:] = np.nan self.D['ma_radii_in'] = np.empty((self.m)) self.D['ma_radii_in'][:] = np.nan self.D['ma_radii_out'] = np.empty((self.m)) self.D['ma_radii_out'][:] = np.nan self.D['ma_f1_in'] = np.zeros((self.m), dtype=np.int) self.D['ma_f1_in'][:] = np.nan self.D['ma_f1_out'] = np.zeros((self.m), dtype=np.int) self.D['ma_f1_out'][:] = np.nan self.D['ma_f2_in'] = np.zeros((self.m), dtype=np.int) self.D['ma_f2_in'][:] = np.nan self.D['ma_f2_out'] = np.zeros((self.m), dtype=np.int) self.D['ma_f2_out'][:] = np.nan # a list of lists with indices of closest points during the ball shrinking process for every point: self.D['ma_shrinkhist_in'] = [] self.D['ma_shrinkhist_out'] = [] self.SuperR = maxR if denoise_absmin is None: self.denoise_absmin = None else: self.denoise_absmin = (math.pi / 180) * denoise_absmin if denoise_delta is None: self.denoise_delta = None else: self.denoise_delta = (math.pi / 180) * denoise_delta if denoise_min is None: self.denoise_min = None else: self.denoise_min = (math.pi / 180) * denoise_min if detect_planar is None: self.detect_planar = None else: self.detect_planar = (math.pi / 180) * detect_planar
def __init__(self, model, subject_layer, distance_threshold): self.model = model self.distant_vectors = [] self.distant_vectors_buffer = [] self.subject_layer = subject_layer self.distance_threshold = distance_threshold self.flann = FLANN()
def __init__(self, kernel, num_neighbors, max_memory, lr): self.kernel = kernel self.num_neighbors = num_neighbors self.max_memory = max_memory self.lr = lr self.keys = None self.values = None self.kdtree = FLANN() # key_cache stores a cache of all keys that exist in the DND # This makes DND updates efficient self.key_cache = {} # stale_index is a flag that indicates whether or not the index in self.kdtree is stale # This allows us to only rebuild the kdtree index when necessary self.stale_index = True # indexes_to_be_updated is the set of indexes to be updated on a call to update_params # This allows us to rebuild only the keys of key_cache that need to be rebuilt when necessary self.indexes_to_be_updated = set() # Keys and value to be inserted into self.keys and self.values when commit_insert is called self.keys_to_be_inserted = None self.values_to_be_inserted = None # Move recently used lookup indexes # These should be moved to the back of self.keys and self.values to get LRU property self.move_to_back = set()
def load_flann(clusters, flannfn, paramsfn): "Loads the flann file and its parameters" from pyflann import FLANN import pickle flann = FLANN() flann.load_index(flannfn,clusters) params_p = open(paramsfn,'rb') params = pickle.load(params_p) params_p.close() return flann,params
def __init__(self, embedding_file, tokenize=tokenize): self.word2vec_file = embedding_file self.word2vec = KeyedVectors.load_word2vec_format(self.word2vec_file, binary=True) self.embedding_dim = self.word2vec.vector_size self.tokenize = tokenize self.sentence_list = [] self.sentence_list_tokenized = [] self.sentence_embedding = np.array([]) self.flann = FLANN()
def __init__(self, cfg): self.mutual_best=cfg['mutual_best'] self.ratio_test=cfg['ratio_test'] self.ratio=cfg['ratio'] self.use_cuda=cfg['cuda'] self.flann=FLANN() if self.use_cuda: self.match_fn_1=lambda desc0,desc1: find_nearest_point_idx(desc1, desc0) self.match_fn_2=lambda desc0,desc1: find_first_and_second_nearest_point(desc1, desc0) else: self.match_fn_1=lambda desc0,desc1: self.flann.nn(desc1, desc0, 1, algorithm='linear') self.match_fn_2=lambda desc0,desc1: self.flann.nn(desc1, desc0, 2, algorithm='linear')
def compute_lfs(self): self.ma_kd_tree = FLANN() # collect all ma_coords that are not NaN ma_coords = np.concatenate( [self.D['ma_coords_in'], self.D['ma_coords_out']]) ma_coords = ma_coords[~np.isnan(ma_coords).any(axis=1)] self.ma_kd_tree.build_index(ma_coords, algorithm='linear') # we can get *squared* distances for free, so take the square root self.D['lfs'] = np.sqrt( self.ma_kd_tree.nn_index(self.D['coords'], 1)[1])
def adaptive_epsilon(loader, target_epsilon, batch_size): # split dataset into classes class_dict = dict() for i, (X,y) in enumerate(loader): y = y.item() X = X.numpy() if not y in class_dict: class_dict[y] = [X] else: class_dict[y].append(X) # build flann index for each class flann_dict = dict() for y in class_dict: mflann = FLANN() class_examples = np.array(class_dict[y]) class_size = len(class_examples) image_shape = class_examples.shape[1:] mflann.build_index(class_examples.reshape(class_size, np.prod(image_shape))) flann_dict[y] = mflann # for each example input, find distance to the closest example input of other classes dataset_with_dist = [] for i, (X,y) in enumerate(loader): y = y.item() X = X.numpy() smallest_dist = np.inf for _y in class_dict: if _y != y: _, dist = np.sqrt(flann_dict[_y].nn_index(X.reshape(-1), 1)) if dist[0] < smallest_dist: smallest_dist = dist[0] dataset_with_dist.append(np.array([X,y, smallest_dist])) # scale the distance to [target_epsilon/10, target_epsilon] interval dataset_with_eps = np.array(dataset_with_dist) dataset_with_eps[:,2] = (dataset_with_eps[:,2] - np.mean(dataset_with_eps[:,2]))/np.std(dataset_with_eps[:,2]) dataset_with_eps[:,2] = dataset_with_eps[:,2]*(target_epsilon/4) + (target_epsilon/2) dataset_with_eps[:,2] = np.clip(dataset_with_eps[:,2], target_epsilon/100, target_epsilon) # order by eps (ascending) new_order = np.argsort(dataset_with_eps[:,2])[::-1] dataset_with_eps = dataset_with_eps[new_order] # create and return dataset loader X = np.concatenate(dataset_with_eps[:,0], axis=0) Y = dataset_with_eps[:,1] eps = dataset_with_eps[:,2] return DataLoader(AdaptiveEpsilonDataset(X, Y, eps, batch_size), batch_size=1, shuffle=True, pin_memory=True)
def create_affinity(X, knn, scale=None, alg="annoy", savepath=None, W_path=None): N, D = X.shape if W_path is not None: if W_path.endswith('.mat'): W = sio.loadmat(W_path)['W'] elif W_path.endswith('.npz'): W = sparse.load_npz(W_path) else: print('Compute Affinity ') start_time = timeit.default_timer() if alg == "flann": print('with Flann') flann = FLANN() knnind, dist = flann.nn(X, X, knn, algorithm="kdtree", target_precision=0.9, cores=5) # knnind = knnind[:,1:] else: nbrs = NearestNeighbors(n_neighbors=knn).fit(X) dist, knnind = nbrs.kneighbors(X) row = np.repeat(range(N), knn - 1) col = knnind[:, 1:].flatten() if scale is None: data = np.ones(X.shape[0] * (knn - 1)) elif scale is True: scale = np.median(dist[:, 1:]) data = np.exp((-dist[:, 1:]**2) / (2 * scale**2)).flatten() else: data = np.exp((-dist[:, 1:]**2) / (2 * scale**2)).flatten() W = sparse.csc_matrix((data, (row, col)), shape=(N, N), dtype=np.float) W = (W + W.transpose(copy=True)) / 2 elapsed = timeit.default_timer() - start_time print(elapsed) if isinstance(savepath, str): if savepath.endswith('.npz'): sparse.save_npz(savepath, W) elif savepath.endswith('.mat'): sio.savemat(savepath, {'W': W}) return W
def match(desc1, desc2, dist_ratio=0.6, num_trees=4): flann = FLANN() # result, dists = flann.nn(desc2, desc1, 2, algorithm="kmeans", # branching=32, iterations=7, checks=16) result, dists = flann.nn(desc2, desc1, 2, algorithm='kdtree', trees=num_trees) matchscores = zeros((desc1.shape[0]), 'int') for idx1, (idx2, _idx_second_nearest) in enumerate(result): nearest, second_nearest = dists[idx1] if nearest < dist_ratio * second_nearest: matchscores[idx1] = idx2 return matchscores
def __init__(self, n_neighbors=5,weights='uniform'): """hyper parameters of teh FLANN algorithm""" self.algrithm_choice = "kmeans" self.branching = 32 self.iterations = 7 self.checks = 16 """Basic KNN parameters""" self.n_neighbors = n_neighbors self.weights = weights self.flann = FLANN()
def __init__(self, maxlen, seed=0, cores=4, trees=1): self.flann = FLANN( algorithm='kdtree', random_seed=seed, cores=cores, trees=trees, ) self.counter = 0 self.contents_lookup = {} #{oid: (e,q)} self.p_queue = collections.deque( ) #priority queue contains; list of (priotiry_value,oid) self.maxlen = maxlen
class Matcher: def __init__(self, cfg): self.mutual_best=cfg['mutual_best'] self.ratio_test=cfg['ratio_test'] self.ratio=cfg['ratio'] self.use_cuda=cfg['cuda'] self.flann=FLANN() if self.use_cuda: self.match_fn_1=lambda desc0,desc1: find_nearest_point_idx(desc1, desc0) self.match_fn_2=lambda desc0,desc1: find_first_and_second_nearest_point(desc1, desc0) else: self.match_fn_1=lambda desc0,desc1: self.flann.nn(desc1, desc0, 1, algorithm='linear') self.match_fn_2=lambda desc0,desc1: self.flann.nn(desc1, desc0, 2, algorithm='linear') def match(self,desc0,desc1,*args,**kwargs): mask=np.ones(desc0.shape[0],dtype=np.bool) if self.ratio_test: idxs,dists = self.match_fn_2(desc0,desc1) dists=np.sqrt(dists) # note the distance is squared ratio_mask=dists[:,0]/dists[:,1]<self.ratio mask&=ratio_mask idxs=idxs[:,0] else: idxs,_=self.match_fn_1(desc0,desc1) if self.mutual_best: idxs_mutual,_=self.match_fn_1(desc1,desc0) mutual_mask = np.arange(desc0.shape[0]) == idxs_mutual[idxs] mask&=mutual_mask matches=np.concatenate([np.arange(desc0.shape[0])[:,None],idxs[:,None]],axis=1) matches=matches[mask] return matches
def __init__(self, dcel): Tk.__init__(self) self.sizex = 700 self.sizey = 700 self.window_diagonal = math.sqrt(self.sizex**2 + self.sizey**2) self.title("DCELvis") self.resizable(0, 0) self.bind('q', self.exit) self.bind('h', self.print_help) self.bind('p', self.print_dcel) self.bind('e', self.iteratehedge) self.bind('v', self.iteratevertex) self.bind('f', self.iterateface) self.canvas = Canvas(self, bg="white", width=self.sizex, height=self.sizey) self.canvas.pack() if WITH_FLANN: self.bind("<ButtonRelease>", self.remove_closest) self.bind("<Motion>", self.report_closest) self.coordstext = self.canvas.create_text(self.sizex, self.sizey, anchor='se', text='') self.info_text = self.canvas.create_text(10, self.sizey, anchor='sw', text='') self.tx = 0 self.ty = 0 self.highlight_cache = [] self.bgdcel_cache = [] self.draw = draw(self) if WITH_FLANN: self.kdtree = FLANN() self.D = None self.bind_dcel(dcel) self.print_help(None)
def __init__(self, sbapp_list, filename, densify, sigma_noise, denoise, **args): CanvasApp.__init__(self, **args) self.sbapp_list = sbapp_list self.sbapp_list.append(self) self.window_diagonal = math.sqrt(self.sizex ** 2 + self.sizey ** 2) self.toplevel.title( "Shrink the balls [{}] - densify={}x, noise={}, denoise={} ".format(filename, densify, sigma_noise, denoise) ) self.toplevel.bind("h", self.print_help) self.toplevel.bind("a", self.ma_auto_stepper) self.toplevel.bind("b", self.draw_all_balls) self.toplevel.bind("t", self.toggle_inout) self.toplevel.bind("h", self.toggle_ma_stage_geom) self.inner_mode = True self.draw_stage_geom_mode = "normal" self.toplevel.bind("i", self.draw_topo) self.toplevel.bind("o", self.draw_topo) self.toplevel.bind("u", self.draw_topo) self.toplevel.bind("p", self.draw_topo) self.toplevel.bind("z", self.spawn_mapperapp) self.toplevel.bind("f", self.spawn_filterapp) self.toplevel.bind("s", self.spawn_shrinkhistapp) self.toplevel.bind("1", self.draw_normal_map_lfs) self.toplevel.bind("2", self.draw_normal_map_theta) self.toplevel.bind("3", self.draw_normal_map_lam) self.toplevel.bind("4", self.draw_normal_map_radii) self.toplevel.bind("`", self.draw_normal_map_clear) self.toplevel.bind("c", self.clear_overlays) self.canvas.pack() self.toplevel.bind("<Motion>", self.draw_closest_ball) self.toplevel.bind("<Key>", self.ma_step) self.toplevel.bind("<ButtonRelease>", self.ma_step) self.coordstext = self.canvas.create_text(self.sizex, self.sizey, anchor="se", text="") self.ball_info_text = self.canvas.create_text(10, self.sizey, anchor="sw", text="") self.stage_cache = {1: [], 2: [], 3: []} self.topo_cache = [] self.highlight_point_cache = [] self.highlight_cache = [] self.poly_cache = [] self.normalmap_cache = [] self.mapper_window = None self.plotter_window = None self.shrinkhist_window = None self.kdtree = FLANN()
def compute_lfs(self): self.ma_kd_tree = FLANN() # collect all ma_coords that are not NaN ma_coords = np.concatenate([self.D['ma_coords_in'], self.D['ma_coords_out']]) ma_coords = ma_coords[~np.isnan(ma_coords).any(axis=1)] self.ma_kd_tree.build_index(ma_coords, algorithm='linear') # we can get *squared* distances for free, so take the square root self.D['lfs'] = np.sqrt(self.ma_kd_tree.nn_index(self.D['coords'], 1)[1])
class ground_truth_classifier(object): def __init__(self, data_file): self.flann = FLANN() attributes, names = get_data(data_file) self.flann.build_index(attributes, algorithm="autotuned") self.names = names def predict(self, attrs): attrs1, attrs2 = split_attrs(attrs) idx1, _ = self.flann.nn_index(attrs1) names1 = self.names[idx1] idx2, _ = self.flann.nn_index(attrs2) names2 = self.names[idx2] return (names1 == names2) def score(self, x, y): y_ = self.predict(x) return (np.sum(y == y_) / y.size)
def nn_match(descs1, descs2): """ Perform nearest neighbor match, using descriptors. This function uses pyflann :param descs1: descriptors from image 1, (N1, D) :param descs2: descriptors from image 2, (N2, D) :return indices: indices into keypoints from image 2, (N1, D) """ # diff = descs1[:, None, :] - descs2[None, :, :] # diff = np.linalg.norm(diff, ord=2, axis=2) # indices = np.argmin(diff, axis=1) # flann = cv2.FlannBasedMatcher_create() # matches = flann.match(descs1.astype(np.float32), descs2.astype(np.float32)) # indices = [x.trainIdx for x in matches] flann = FLANN() indices, _ = flann.nn(descs2, descs1, algorithm="kdtree", trees=4) return indices
class NearestFilter: def __init__(self, k: int): self.k = k self.flann = FLANN() def filter(self, v, points, labels): [neighbours_i ], _ = self.flann.nn(points, v.astype('float32'), num_neighbors=min(self.k, len(points)), algorithm='linear') return points[neighbours_i], labels[neighbours_i]
def fit_flann(data, algorithm): logger.info('Fitting FLANN...') from pyflann import FLANN matcher = FLANN( algorithm=algorithm, checks=32, eps=0.0, cb_index=0.5, trees=1, leaf_max_size=4, branching=32, iterations=5, centers_init='random', target_precision=0.9, build_weight=0.01, memory_weight=0.0, sample_fraction=0.1, log_level="warning", random_seed=-1, ) matcher.build_index(data) return matcher
def __init__(self, word2vec, tokenize, target_word_list=[], ngram=[1], window_size=1, min_count=1): self.w2v = word2vec self.embedding_dim = self.w2v.vector_size self.vocab = set(self.w2v.vocab.keys()) self.target_word_list = set(target_word_list) for word in self.target_word_list: self.vocab.add(word) self.tokenize = tokenize self.ngram = ngram self.window_size = window_size self.min_count = min_count self.c2v = {} self.target_counts = Counter() self.alacarte = {} self.flann = FLANN()
class TFCoverage(AbstractCoverage): def __init__(self, model, subject_layer, distance_threshold): self.model = model self.distant_vectors = [] self.distant_vectors_buffer = [] self.subject_layer = subject_layer self.distance_threshold = distance_threshold self.flann = FLANN() def get_measure_state(self): s = [] s.append(self.distant_vectors) s.append(self.distant_vectors_buffer) return s def set_measure_state(self, s): self.distant_vectors = s[0] self.distant_vectors_buffer = s[1] if len(self.distant_vectors_buffer) > _BUFFER_SIZE: self.build_index_and_flush_buffer() def reset_measure_state(self): self.flann.delete_index() self.distant_vectors = [] self.distant_vectors_buffer = [] def get_current_coverage(self, with_implicit_reward=False): return len(self.distant_vectors) def build_index_and_flush_buffer(self): self.distant_vectors_buffer = [] self.flann.build_index(np.array(self.distant_vectors)) def test(self, test_inputs, with_implicit_reward=False): pen_layer_outs = get_layer_outs_new(self.model, test_inputs)[self.subject_layer] for plo in pen_layer_outs: if len(self.distant_vectors) > 0: _, approx_distances = self.flann.nn_index(plo, 1) exact_distances = [ np.sum(np.square(plo - distant_vec)) for distant_vec in self.distant_vectors_buffer ] nearest_distance = min(exact_distances + approx_distances.tolist()) if nearest_distance > self.distance_threshold: self.distant_vectors_buffer.append(plo) self.distant_vectors.append(plo) else: self.flann.build_index(plo) self.distant_vectors.append(plo) return len(self.distant_vectors), self.distant_vectors
def __init__(self, kernel, num_neighbors, max_memory, lr): """ 定义 DND 的结构 :param kernel: :param num_neighbors: :param max_memory: :param lr: """ self.kernel = kernel self.num_neighbors = num_neighbors self.max_memory = max_memory self.lr = lr self.keys = None self.values = None self.kdtree = FLANN() # key_cache stores a cache of all keys that exist in the DND # This makes DND updates efficient # 这个应该是存储所有存在于 DND 中的 keys 的集合 self.key_cache = {} # stale_index is a flag that indicates whether or not the index in self.kdtree is stale # This allows us to only rebuild the kdtree index when necessary # 这个是标志 KD 树是否已经需要退化操作 self.stale_index = True # indexes_to_be_updated is the set of indexes to be updated on a call to update_params # This allows us to rebuild only the keys of key_cache that need to be rebuilt when necessary # 用于在仅仅需要重新建立树的情况下被更新的索引记录 self.indexes_to_be_updated = set() # Keys and value to be inserted into self.keys and self.values when commit_insert is called # 当 commit_insert 调用的时候, 用于整体更新 keys 和 相关的 values self.keys_to_be_inserted = None self.values_to_be_inserted = None # Move recently used lookup indexes # These should be moved to the back of self.keys and self.values to get LRU property # LRU 置换算法的要被移到尾后的 keys 和 values self.move_to_back = set()
def __init__(self, datadict, maxR, denoise_absmin=None, denoise_delta=None, denoise_min=None, detect_planar=None): self.D = datadict # dict of numpy arrays # self.kd_tree = KDTree(self.D['coords']) # linear algorithm means brute force, which means its exact nn, which we need # approximate nn may cause algorithm not to converge self.flann = FLANN() self.flann.build_index(self.D['coords'], algorithm='linear',target_precision=1, sample_fraction=0.001, log_level = "info") # print "constructed kd-tree" self.m, self.n = datadict['coords'].shape self.D['ma_coords_in'] = np.empty( (self.m,self.n) ) self.D['ma_coords_in'][:] = np.nan self.D['ma_coords_out'] = np.empty( (self.m,self.n) ) self.D['ma_coords_out'][:] = np.nan self.D['ma_radii_in'] = np.empty( (self.m) ) self.D['ma_radii_in'][:] = np.nan self.D['ma_radii_out'] = np.empty( (self.m) ) self.D['ma_radii_out'][:] = np.nan self.D['ma_f1_in'] = np.zeros( (self.m), dtype=np.int ) self.D['ma_f1_in'][:] = np.nan self.D['ma_f1_out'] = np.zeros( (self.m), dtype=np.int ) self.D['ma_f1_out'][:] = np.nan self.D['ma_f2_in'] = np.zeros( (self.m), dtype=np.int ) self.D['ma_f2_in'][:] = np.nan self.D['ma_f2_out'] = np.zeros( (self.m), dtype=np.int ) self.D['ma_f2_out'][:] = np.nan # a list of lists with indices of closest points during the ball shrinking process for every point: self.D['ma_shrinkhist_in'] = [] self.D['ma_shrinkhist_out'] = [] self.SuperR = maxR if denoise_absmin is None: self.denoise_absmin = None else: self.denoise_absmin = (math.pi/180)*denoise_absmin if denoise_delta is None: self.denoise_delta = None else: self.denoise_delta = (math.pi/180)*denoise_delta if denoise_min is None: self.denoise_min = None else: self.denoise_min = (math.pi/180)*denoise_min if detect_planar is None: self.detect_planar = None else: self.detect_planar = (math.pi/180)*detect_planar
def kernel_model(scaled_data, xs, ys, parms): """ Estimate the value at the target grid given the exemplars, using the specified kernel. """ X = np.vstack((xs, ys,)).T x, y = np.meshgrid(parms.psf_grid[0], parms.psf_grid[1]) T = np.vstack((x.ravel(), y.ravel())).T # use flann for distances and indicies flann = FLANN() p = flann.build_index(X, target_precision=parms.flann_precision, log_level='info') inds, dists = flann.nn_index(T, parms.knn, check=p['checks']) # go through the grid and compute the model model = np.zeros(T.shape[0]) for i in range(model.size): local_values = scaled_data[inds[i]] if parms.kernel_parms['type'] == 'gaussian': k = np.exp(-1. * dists[i] ** 2. / parms.kernel_parms['gamma'] ** 2.) model[i] = np.sum(k * local_values) / np.sum(k) return model.reshape(parms.psf_model_shape)
def __init__(self, dcel): Tk.__init__(self) self.sizex = 700 self.sizey = 700 self.window_diagonal = math.sqrt(self.sizex**2 + self.sizey**2) self.title("DCELvis") self.resizable(0,0) self.bind('q', self.exit) self.bind('h', self.print_help) self.bind('p', self.print_dcel) self.bind('e', self.iteratehedge) self.bind('v', self.iteratevertex) self.bind('f', self.iterateface) self.canvas = Canvas(self, bg="white", width=self.sizex, height=self.sizey) self.canvas.pack() if WITH_FLANN: self.bind("<ButtonRelease>", self.remove_closest) self.bind("<Motion>", self.report_closest) self.coordstext = self.canvas.create_text(self.sizex, self.sizey, anchor='se', text='') self.info_text = self.canvas.create_text(10, self.sizey, anchor='sw', text='') self.tx = 0 self.ty = 0 self.highlight_cache = [] self.bgdcel_cache = [] self.draw = draw(self) if WITH_FLANN: self.kdtree = FLANN() self.D = None self.bind_dcel(dcel) self.print_help(None)
def __init__(self, servers, remote_dataset, **kwargs): self.__setup_servers(servers) self.__load_data(remote_dataset) FLANN.__init__(self,**kwargs)
class MA(object): def __init__(self, datadict, maxR, denoise_absmin=None, denoise_delta=None, denoise_min=None, detect_planar=None): self.D = datadict # dict of numpy arrays # self.kd_tree = KDTree(self.D['coords']) # linear algorithm means brute force, which means its exact nn, which we need # approximate nn may cause algorithm not to converge self.flann = FLANN() self.flann.build_index(self.D['coords'], algorithm='linear',target_precision=1, sample_fraction=0.001, log_level = "info") # print "constructed kd-tree" self.m, self.n = datadict['coords'].shape self.D['ma_coords_in'] = np.empty( (self.m,self.n) ) self.D['ma_coords_in'][:] = np.nan self.D['ma_coords_out'] = np.empty( (self.m,self.n) ) self.D['ma_coords_out'][:] = np.nan self.D['ma_radii_in'] = np.empty( (self.m) ) self.D['ma_radii_in'][:] = np.nan self.D['ma_radii_out'] = np.empty( (self.m) ) self.D['ma_radii_out'][:] = np.nan self.D['ma_f1_in'] = np.zeros( (self.m), dtype=np.int ) self.D['ma_f1_in'][:] = np.nan self.D['ma_f1_out'] = np.zeros( (self.m), dtype=np.int ) self.D['ma_f1_out'][:] = np.nan self.D['ma_f2_in'] = np.zeros( (self.m), dtype=np.int ) self.D['ma_f2_in'][:] = np.nan self.D['ma_f2_out'] = np.zeros( (self.m), dtype=np.int ) self.D['ma_f2_out'][:] = np.nan # a list of lists with indices of closest points during the ball shrinking process for every point: self.D['ma_shrinkhist_in'] = [] self.D['ma_shrinkhist_out'] = [] self.SuperR = maxR if denoise_absmin is None: self.denoise_absmin = None else: self.denoise_absmin = (math.pi/180)*denoise_absmin if denoise_delta is None: self.denoise_delta = None else: self.denoise_delta = (math.pi/180)*denoise_delta if denoise_min is None: self.denoise_min = None else: self.denoise_min = (math.pi/180)*denoise_min if detect_planar is None: self.detect_planar = None else: self.detect_planar = (math.pi/180)*detect_planar # self.normal_thres = 0.99 def compute_balls_inout(self): for stage in self.compute_balls(inner=True): pass for stage in self.compute_balls(inner=False): pass def compute_lfs(self): self.ma_kd_tree = FLANN() # collect all ma_coords that are not NaN ma_coords = np.concatenate([self.D['ma_coords_in'], self.D['ma_coords_out']]) ma_coords = ma_coords[~np.isnan(ma_coords).any(axis=1)] self.ma_kd_tree.build_index(ma_coords, algorithm='linear') # we can get *squared* distances for free, so take the square root self.D['lfs'] = np.sqrt(self.ma_kd_tree.nn_index(self.D['coords'], 1)[1]) def decimate_lfs(self, m, scramble = False, sort = False): i=0 self.D['decimate_lfs'] = np.zeros(self.m) == True plfs = zip(self.D['coords'], self.D['lfs']) if scramble: from random import shuffle shuffle( plfs ) if sort: plfs.sort(key = lambda item: item[1]) plfs.reverse() for p, lfs in plfs: if type(m) is float: qts = self.flann.nn_radius(p, (lfs*m)**2)[0][1:] else: qts = self.flann.nn_radius(p, m.f(lfs)**2)[0][1:] iqts = np.invert(self.D['decimate_lfs'][qts]) if iqts.any(): self.D['decimate_lfs'][i] = True i+=1 def refine_lfs(self, m, scramble = False, sort = False): def brute_force_nn(q, coords): """return index of the closest point in coords""" distances = np.sqrt( np.square( coords[:,0]-q[0] ) + np.square( coords[:,1]-q[1] ) ); return np.argsort(distances)[0] i=0 self.D['decimate_lfs'] = np.zeros(self.m) == False plfs = zip(self.D['coords'], self.D['lfs']) if scramble: from random import shuffle shuffle( plfs ) if sort: plfs.sort(key = lambda item: item[1]) plfs.reverse() tmp_coords = np.array() for p, lfs in plfs: if type(m) is float: qts = self.flann.nn_radius(p, (lfs*m)**2)[0][1:] else: qts = self.flann.nn_radius(p, m.f(lfs)**2)[0][1:] iqts = np.invert(self.D['decimate_lfs'][qts]) if iqts.any(): self.D['decimate_lfs'][i] = True i+=1 def compute_boundary_lenghts_2d(self): '''Compute for every point the boundary distance to the first point''' self.D['bound_len'] = np.zeros(self.m) i=1 for p in self.D['coords'][1:]: self.D['bound_len'][i] = self.D['bound_len'][i-1] + np.linalg.norm(p-self.D['coords'][i-1]) i+=1 def compute_lam(self, inner='in'): '''Compute for every boundary point p, corresponding ma point m, and other feature point p_ the distance p-p_ ''' self.D['lam_'+inner] = np.zeros(self.m) self.D['lam_'+inner][:] = np.nan for i, p in enumerate(self.D['coords']): c_p = self.D['ma_coords_'+inner][i] if not np.isnan(c_p[0]): p_ = self.D['coords'][self.D['ma_f2_'+inner][i]] self.D['lam_'+inner][i] = np.linalg.norm(p-p_) def compute_theta(self, inner='in'): '''Compute for every boundary point p, corresponding ma point m, and other feature point p_ the angle p-m-p_ ''' self.D['theta_'+inner] = np.zeros(self.m) self.D['theta_'+inner][:] = np.nan for i, p in enumerate(self.D['coords']): c_p = self.D['ma_coords_'+inner][i] if not np.isnan(c_p[0]): p_ = self.D['coords'][self.D['ma_f2_'+inner][i]] self.D['theta_'+inner][i] = cos_angle(p-c_p, p_-c_p) def decimate_ballco(self, xi=0.1, k=4, inner='in'): self.D['decimate_ballco'] = np.zeros(self.m) == True for i, p in enumerate(self.D['coords']): c_p = self.D['ma_coords_'+inner][i] r_p = self.D['ma_radii_'+inner][i] if not np.isnan(c_p[0]): indices,dists = self.flann.nn_index(p, k+1) # convert indices to coordinates and radii M = [ (self.D['ma_coords_'+inner][index], self.D['ma_radii_'+inner][index]) for index in indices[0][1:] ] for m, r_m in M: # can this medial ball (c_p) be contained by medial ball at m? if np.linalg.norm(m-c_p) + r_p < r_m * (1+xi): self.D['decimate_ballco'][i] = True break # ballcos = [ r_m/np.linalg.norm(m-c_p) for m, r_m in M ] # self.D['ballco'][i] = max(ballcos) def decimate_heur(self, xi=0.1, k=3, omega=math.pi/20, inner='in'): '''Decimation based on heuristics as defined in ma (2012)''' cos_omega = math.cos(omega) self.D['filtered'] = np.zeros(self.m) == True for i, p in enumerate(self.D['coords']): c_p = self.D['ma_coords_'+inner][i] r_p = self.D['ma_radii_'+inner][i] if not np.isnan(c_p[0]): # test 1 - angle feature points p_ = self.D['coords'][self.D['ma_f2_'+inner][i]] if cos_angle(p, c_p, p_) < cos_omega: self.D['filtered'][i] = True break # test 2 - ball containmment indices,dists = self.flann.nn_index(p, k+1) M = [ ( self.D['ma_coords_'+inner][index], self.D['ma_radii_'+inner][index] ) for index in indices[0][1:] ] for m, r_m in M: # can this medial ball (c_p) be contained by medial ball at m? if np.linalg.norm(m-c_p) + r_p < r_m * (1+xi): self.D['filtered'][i] = True break def filter_radiuscon(self, alpha, k, inner='in'): '''Filter noisy points based on contuity in radius when compared to near points''' self.D['filter_radiuscon'] = np.zeros(self.m) == True for i, p in enumerate(self.D['coords']): c_p = self.D['ma_coords_'+inner][i] r_p = self.D['ma_radii_'+inner][i] if c_p != None: indices,dists = self.flann.nn_index(p, k+1) # print indices,dists M = [] for index in indices[0][1:]: M.append(self.D['ma_coords_'+inner][index]) # print M L = [] for m in M: # projection_len = np.linalg.norm(proj(m-p,c_p-p)) val = np.linalg.norm(p-m) * cos_angle(m-p, c_p-p) L.append(val) # print L, alpha * max(L), r_p if r_p < alpha * max(L): self.D['filter_radiuscon'][i] = True else: self.D['filter_radiuscon'][i] = False def filter_thetacon(self, theta_min=37, theta_delta=45, theta_absmin=26, inner='in'): """Filter noisy points based on continuity in separation angle as function of the ith iteration in the shrinking ball process""" # TODO: points with k=1 now receive no filtering... just discard them? self.D['filter_thetacon'] = np.zeros(self.m) == True theta_min *= (math.pi/180) theta_delta *= (math.pi/180) theta_absmin *= (math.pi/180) def find_optimal_theta(thetas): theta_prev = thetas[0] for j, theta in enumerate(thetas[1:]): if ( (theta_prev - theta) >= theta_delta and theta <= theta_min ) or (theta < theta_absmin): return j theta_prev = theta # print return None for i, p in enumerate(self.D['coords']): p_n = self.D['normals'][i] q_indices = self.D['ma_shrinkhist_'+inner][i] if len(q_indices) <= 1: continue q_coords = self.D['coords'][q_indices] # if not is_inner: p_n = -p_n radii = [ compute_radius(p,p_n,q) for q in q_coords ] centers = [ p - p_n * r for r in radii ] thetas = [ math.acos(cos_angle(p-c,q-c)) for c, q in zip(centers, q_coords) ] optimal_theta = find_optimal_theta(thetas) # print optimal_theta if optimal_theta is not None: self.D['filter_thetacon'][i] = True def compute_balls(self, inner=True, verbose=False): """Balls shrinking algorithm. Set `inner` to False when outer balls are wanted.""" for i, pn in enumerate(zip(self.D['coords'], self.D['normals'])): p, n = pn if not inner: n = -n # when approximating 1st point initialize q with random point not equal to p q=p # if i==0: # while (q == p).all(): # random_index = int(rand(1)*self.D['coords'].shape[0]) # q = self.D['coords'][random_index] # r = compute_radius(p,n,q) # forget optimization of r: r=self.SuperR msg='New iteration, initial r = {:.5}'.format(float(r)) if verbose: print msg yield {'stage': 1, 'geom': (p,n), 'msg':msg} r_ = None c = None j = -1 q_i = None q_history = [] while True: j+=1 # initialize r on last found radius if j>0: r = r_ elif j==0 and i>0: r = r # compute ball center c = p - n*r # q_i_previous = q_i msg = 'Current iteration: #' + str(i) +', r = {:.5}'.format(float(r)) if verbose: print msg yield {'stage': 2, 'geom': (q,c,r), 'msg':msg} ### FINDING NEAREST NEIGHBOR OF c # find closest point to c and assign to q indices,dists = self.flann.nn_index(c, 2) # dists, indices = self.kd_tree.query(array([c]), k=2) candidate_c = self.D['coords'][indices] # candidate_n= self.D['normals'][indices] # print 'candidates:', candidates q = candidate_c[0][0] # q_n = candidate_n[0][0] q_i = indices[0][0] # yield {'stage': 3, 'geom': (q)} # What to do if closest point is p itself? if (q==p).all(): # 1) if r==SuperR, apparantly no other points on the halfspace spanned by -n => that's an infinite ball if r == self.SuperR: r_ = r break # 2) otherwise just pick the second closest point else: q = candidate_c[0][1] # q_n = candidate_n[0][1] q_i = indices[0][1] q_history.append(q_i) # compute new candidate radius r_ r_ = compute_radius(p,n,q) # print r, r_, p-c, q-c, cos_angle(p-c, q-c) ### BOUNDARY CASES # if r_ < 0 closest point was on the wrong side of plane with normal n => start over with SuperRadius on the right side of that plance if r_ < 0: r_ = self.SuperR # if r_ > SuperR, stop now because otherwise in case of planar surface point configuration, we end up in an infinite loop elif r_ > self.SuperR: # elif cos_angle(p-c, q-c) >= self.normal_thres: r_ = self.SuperR break c_ = p - n*r_ # this seems to work well against noisy ma points. if self.denoise_absmin is not None: if math.acos(cos_angle(p-c_, q-c_)) < self.denoise_absmin and j>0 and r_>np.linalg.norm(q-p): # msg = 'Current iteration: -#' + str(i) +', r = {:.5}'.format(float(r)) # yield {'stage': 2, 'geom': (q,c_,r), 'msg':msg} # keep previous radius: r_=r q_i = q_i_previous break if self.denoise_delta is not None and j>0: theta_now = math.acos(cos_angle(p-c_, q-c_)) q_previous = self.D['coords'][q_i_previous] theta_prev = math.acos(cos_angle(p-c_, q_previous-c_)) if theta_prev-theta_now > self.denoise_delta and theta_now < self.denoise_min and r_>np.linalg.norm(q-p): # print "theta_prev:",theta_prev/math.pi * 180 # print "theta_now:",theta_now/math.pi * 180 # print "self.denoise_delta:",self.denoise_delta/math.pi * 180 # print "self.denoise_min:",self.denoise_min/math.pi * 180 # keep previous radius: r_=r q_i = q_i_previous break if self.detect_planar != None: if math.acos( cos_angle(q-p, -n) ) > self.detect_planar and j<2: # yield {'stage': 2, 'geom': (q,p - n*r_,r_), 'msg':msg} r_= self.SuperR # r_= r # q_i = q_i_previous break ### NORMAL STOP CONDITION # stop iteration if r has converged if r == r_: break if inner: inout = 'in' else: inout = 'out' if r_ >= self.SuperR: pass else: self.D['ma_radii_'+inout][i] = r_ self.D['ma_coords_'+inout][i] = c self.D['ma_f1_'+inout][i] = i self.D['ma_f2_'+inout][i] = q_i self.D['ma_shrinkhist_'+inout].append(q_history[:-1]) def construct_topo_2d(self, inner='in', project=True): def arrayindex(A, value): tmp = np.where(A==value) # print tmp, tmp[0].shape if tmp[0].shape != (0,): return tmp[0][0] else: return np.nan self.D['ma_linepieces_'+inner] = list() if project: for index in xrange(1,self.m): index_1 = index-1 # find ma points corresponding to these three feature points f2_p = arrayindex(self.D['ma_f2_'+inner], index_1) f2 = arrayindex(self.D['ma_f2_'+inner], index) f1_p = arrayindex(self.D['ma_f1_'+inner], index_1) f1 = arrayindex(self.D['ma_f1_'+inner], index) # collect unique id's of corresponding ma_coords S = set() for f in [f1,f1_p, f2, f2_p]: if not np.isnan(f): S.add( f ) # this is the linevector we are projecting the ma_coords on: l = self.D['coords'][index] - self.D['coords'][index_1] # compute projections of ma_coords on line l S_ = list() for s in S: # if not np.isnan(self.D['ma_coords_'+inner][s]): S_.append( (projfac(l, self.D['ma_coords_'+inner][s]-self.D['coords'][index_1] ), s) ) # now we can sort them on their x coordinate S_.sort(key=lambda item: item[0]) # now we have the line segments for i in xrange(len(S_)): self.D['ma_linepieces_'+inner].append( (S_[i-1][1], S_[i][1]) ) else: indices = list() for i in xrange(self.m): if not np.isnan(self.D['ma_coords_'+inner][i][0]): indices.append(i) for i in xrange(1,len(indices)): s = indices[i-1] e = indices[i] self.D['ma_linepieces_'+inner].append((s,e))
def __init__(self, data_file): self.flann = FLANN() attributes, names = get_data(data_file) self.flann.build_index(attributes, algorithm="autotuned") self.names = names
def __pipe_match(desc1, desc2): flann_ = FLANN() flann_.build_index(desc1, **params.__VSMANY_FLANN_PARAMS__) fm, fs = mc2.match_vsone(desc2, flann_, 64) return fm, fs
def add(self, points): self.__clouds.append(points) flann = FLANN() flann.build_index(points, algorithm='kdtree', trees=self.__n_trees) self.__FLANNs.append(flann)
class ShinkkingBallApp(CanvasApp): def __init__(self, sbapp_list, filename, densify, sigma_noise, denoise, **args): CanvasApp.__init__(self, **args) self.sbapp_list = sbapp_list self.sbapp_list.append(self) self.window_diagonal = math.sqrt(self.sizex**2 + self.sizey**2) self.toplevel.title( "Shrink the balls [{}] - densify={}x, noise={}, denoise={} ". format(filename, densify, sigma_noise, denoise)) self.toplevel.bind('h', self.print_help) self.toplevel.bind('a', self.ma_auto_stepper) self.toplevel.bind('b', self.draw_all_balls) self.toplevel.bind('t', self.toggle_inout) self.toplevel.bind('h', self.toggle_ma_stage_geom) self.inner_mode = True self.draw_stage_geom_mode = 'normal' self.toplevel.bind('i', self.draw_topo) self.toplevel.bind('o', self.draw_topo) self.toplevel.bind('u', self.draw_topo) self.toplevel.bind('p', self.draw_topo) self.toplevel.bind('z', self.spawn_mapperapp) self.toplevel.bind('f', self.spawn_filterapp) self.toplevel.bind('s', self.spawn_shrinkhistapp) self.toplevel.bind('1', self.draw_normal_map_lfs) self.toplevel.bind('2', self.draw_normal_map_theta) self.toplevel.bind('3', self.draw_normal_map_lam) self.toplevel.bind('4', self.draw_normal_map_radii) self.toplevel.bind('`', self.draw_normal_map_clear) self.toplevel.bind('c', self.clear_overlays) self.canvas.pack() self.toplevel.bind("<Motion>", self.draw_closest_ball) self.toplevel.bind("<Key>", self.ma_step) self.toplevel.bind("<ButtonRelease>", self.ma_step) self.coordstext = self.canvas.create_text(self.sizex, self.sizey, anchor='se', text='') self.ball_info_text = self.canvas.create_text(10, self.sizey, anchor='sw', text='') self.stage_cache = {1: [], 2: [], 3: []} self.topo_cache = [] self.highlight_point_cache = [] self.highlight_cache = [] self.poly_cache = [] self.normalmap_cache = [] self.mapper_window = None self.plotter_window = None self.shrinkhist_window = None self.kdtree = FLANN() def toggle_ma_stage_geom(self, event): if self.draw_stage_geom_mode == 'normal': self.draw_stage_geom_mode = 'dontclear' else: self.draw_stage_geom_mode = 'normal' def spawn_shrinkhistapp(self, event): self.ma_ensure_complete() self.shrinkhist_window = ShrinkHistApp(self) def spawn_mapperapp(self, event): self.ma_ensure_complete() self.mapper_window = MapperApp(self) def spawn_filterapp(self, event): self.ma_ensure_complete() self.plot_window = FilterApp(self) def update_mouse_coords(self, event): self.mouse_x = event.x self.mouse_y = event.y def toggle_inout(self, event): self.inner_mode = not self.inner_mode def print_help(self, event): print HELP def bind_ma(self, ma, draw_poly=True): self.ma = ma self.ma_inner = True self.ma_complete = False self.ma_gen = ma.compute_balls(inner=self.ma_inner) minx = ma.D['coords'][:, 0].min() miny = ma.D['coords'][:, 1].min() maxx = ma.D['coords'][:, 0].max() maxy = ma.D['coords'][:, 1].max() self.set_transform(minx, maxx, miny, maxy) self.normal_scale = 0.02 * (self.window_diagonal / self.scale) if draw_poly: self.draw.polygon(ma.D['coords'], fill="#eeeeee") for p, n in zip(ma.D['coords'], ma.D['normals']): self.draw.normal(p, n, s=self.normal_scale, fill='#888888', width=1) self.kdtree.build_index(self.ma.D['coords'], algorithm='linear') # self.kdtree = KDTree(self.ma.D['coords']) self.print_help(None) self.canvas.update_idletasks() def ma_ensure_complete(self): while self.ma_complete == False: self.ma_auto_stepper(None) def ma_auto_stepper(self, event): self.ma_stepper(mode='auto_step') def ma_step(self, event): self.ma_stepper(mode='onestep') def ma_stepper(self, mode): def step_and_draw(): d = self.ma_gen.next() self.ma_draw_stage(d) try: if mode == 'onestep': step_and_draw() elif mode == 'auto_step': while True: step_and_draw() except StopIteration: if not self.ma_inner: self.ma.compute_lfs() self.ma.compute_lam() self.ma.compute_theta() self.ma.compute_lam(inner="out") self.ma.compute_theta(inner="out") self.ma_complete = True self.ma_inner = not self.ma_inner self.ma_gen = self.ma.compute_balls(self.ma_inner) def ma_draw_stage(self, d): if d['stage'] == 1: try: self.stage_cache[2].remove(self.stage_cache[2][2]) except IndexError: pass self.deleteCache([1, 2, 3]) p, n = d['geom'] l = self.window_diagonal # line length - depends on windows size i = self.draw.point(p[0], p[1], size=8, fill='red', outline='') j = self.draw.edge( (p[0]+n[0]*l, p[1]+n[1]*l),\ (p[0]-n[0]*l, p[1]-n[1]*l), width=1, fill='blue', dash=(4,2) ) self.stage_cache[1] = [i, j] self.canvas.itemconfig(self.coordstext, text=d['msg']) elif d['stage'] == 2: if self.draw_stage_geom_mode == 'normal': self.draw.deleteItems(self.stage_cache[2]) q, c, r = d['geom'] i = self.draw.point(q[0], q[1], size=4, fill='blue', outline='') j = self.draw.point(c[0], c[1], size=r * self.scale, fill='', outline='blue') k = self.draw.point(c[0], c[1], size=2, fill='blue', outline='') self.stage_cache[2] = [i, j, k] self.canvas.itemconfig(self.coordstext, text=d['msg']) def draw_highlight_points(self, key, val, how, inner='in'): self.draw.deleteItems(self.highlight_cache) for m, v in zip(self.ma.D['ma_coords_' + inner], self.ma.D[key]): if not np.isnan(v): if how == 'greater' and v > val: i = self.draw.point(m[0], m[1], size=4, fill='', outline='red', width=2) self.highlight_cache.append(i) elif how == 'smaller' and v < val: i = self.draw.point(m[0], m[1], size=4, fill='', outline='red', width=2) self.highlight_cache.append(i) elif how == 'equal' and v == val: i = self.draw.point(m[0], m[1], size=4, fill='', outline='red', width=2) self.highlight_cache.append(i) def draw_topo(self, event): if event.char in ['i', 'u']: inner = 'in' elif event.char in ['o', 'p']: inner = 'out' if event.char in ['p', 'u']: project = True else: project = False self.draw.deleteItems(self.topo_cache) self.ma.construct_topo_2d(inner, project) for start, end in self.ma.D['ma_linepieces_' + inner]: s_e = self.ma.D['ma_coords_' + inner][start] e_e = self.ma.D['ma_coords_' + inner][end] i = self.draw.edge(s_e, e_e, fill='blue', width=1) self.topo_cache.append(i) def draw_all_balls(self, event): self.draw.deleteItems(self.highlight_cache) for p_i in xrange(self.ma.m): self.draw_medial_ball(p_i, with_points=False) def draw_closest_ball(self, event): # x,y = self.t_(self.mouse_x, self.mouse_y) x, y = self.t_(event.x, event.y) q = np.array([x, y]) p_i = self.kdtree.nn_index(q, 1)[0][0] # p_i = self.kdtree.query(np.array([q]),1)[1][0] for sbapp in self.sbapp_list: sbapp.highlight_single_ball(p_i) def highlight_single_ball(self, p_i): if self.inner_mode: inner = 'in' else: inner = 'out' # plot the shrink history of this ball: if self.shrinkhist_window is not None: self.shrinkhist_window.update_plot(p_i, inner) def get_ball_info_text(p_i): if not self.ma.D.has_key('lfs'): return "" return "lfs\t{0:.2f}\nr\t{2:.2f}\nlambda\t{1:.2f}\ntheta\t{3:.2f} ({4:.2f} deg)\nk\t{5}\nplanar\t{6:.2f} deg".format( \ self.ma.D['lfs'][p_i], \ self.ma.D['lam_'+inner][p_i], \ self.ma.D['ma_radii_'+inner][p_i], \ self.ma.D['theta_'+inner][p_i], \ (180/math.pi) * math.acos(self.ma.D['theta_'+inner][p_i]), \ len(self.ma.D['ma_shrinkhist_'+inner][p_i]), \ (90/math.pi)*( math.pi - math.acos(self.ma.D['theta_'+inner][p_i]) ) ) self.draw.deleteItems(self.highlight_point_cache) self.draw_medial_ball(p_i) self.draw_lfs_ball(p_i) self.canvas.itemconfig(self.ball_info_text, text=get_ball_info_text(p_i)) def draw_medial_ball(self, p_i, with_points=True): inner = 'out' if self.inner_mode: inner = 'in' p1x, p1y = self.ma.D['coords'][p_i][0], self.ma.D['coords'][p_i][1] ma_px, ma_py = self.ma.D['ma_coords_' + inner][p_i][0], self.ma.D['ma_coords_' + inner][p_i][1] if not np.isnan(ma_px): p2x, p2y = self.ma.D['coords'][self.ma.D[ 'ma_f2_' + inner][p_i]][0], self.ma.D['coords'][self.ma.D['ma_f2_' + inner][p_i]][1] r = self.ma.D['ma_radii_' + inner][p_i] ball = self.draw.point(ma_px, ma_py, size=r * self.scale, width=1, fill='', outline='red', dash=(4, 2, 1)) if with_points: self.highlight_point_cache.append( self.draw.point(p1x, p1y, size=4, fill='', outline='red', width=2)) self.highlight_point_cache.append( self.draw.point(p2x, p2y, size=4, fill='', outline='purple', width=2)) self.highlight_point_cache.append( self.draw.point(ma_px, ma_py, size=4, fill='', outline='blue', dash=(1), width=2)) self.highlight_point_cache.append(ball) else: self.highlight_cache.append(ball) def draw_closest_lfs_ball(self, event): # self.draw.deleteItems(self.highlight_cache) x, y = self.t_(event.x, event.y) q = np.array([x, y]) p_i = self.kdtree.nn_index(q, 1)[0][0] # p_i = self.kdtree.query(np.array([q]),1)[1][0] self.draw_lfs_ball(p_i) def draw_lfs_ball(self, p_i): if self.ma.D.has_key('lfs'): p1x, p1y = self.ma.D['coords'][p_i][0], self.ma.D['coords'][p_i][1] lfs = self.ma.D['lfs'][p_i] if not np.isnan(lfs): self.highlight_point_cache.append( self.draw.point(p1x, p1y, size=lfs * self.scale, fill='', outline='#888888', dash=(2, 1))) def draw_decimate_lfs(self, epsilon): self.ma.decimate_lfs(epsilon) dropped, total = np.count_nonzero(self.ma.D['decimate_lfs']), self.ma.m print 'LFS decimation e={}: {} from {} points are dropped ({:.2f}%)'.format( epsilon, dropped, total, float(dropped) / total * 100) self.draw.deleteItems(self.poly_cache) i = self.draw.polygon_alternating_edge(self.ma.D['coords'][np.invert( self.ma.D['decimate_lfs'])], width=3) self.poly_cache.extend(i) def draw_decimate_ballco(self, xi, k): self.ma.decimate_ballco(xi, k) dropped, total = np.count_nonzero( self.ma.D['decimate_ballco']), self.ma.m print 'BALLCO decimation xi={}, k={}: {} from {} points are dropped ({:.2f}%)'.format( xi, k, dropped, total, float(dropped) / total * 100) self.draw.deleteItems(self.poly_cache) i = self.draw.polygon_alternating_edge(self.ma.D['coords'][np.invert( self.ma.D['decimate_ballco'])], width=3) self.poly_cache.extend(i) def draw_normal_map_lfs(self, event): self.draw_normal_map('lfs', 40) def draw_normal_map_theta(self, event): self.draw_normal_map('theta_in', 30) def draw_normal_map_lam(self, event): self.draw_normal_map('lam_in', 30) def draw_normal_map_radii(self, event): self.draw_normal_map('ma_radii_in', 30) def draw_normal_map_clear(self, event): self.draw.deleteItems(self.normalmap_cache) def draw_normal_map(self, key, scale=30): self.draw.deleteItems(self.normalmap_cache) max_val = np.nanmax(self.ma.D[key]) for p, p_n, val in zip(self.ma.D['coords'], self.ma.D['normals'], self.ma.D[key]): s = scale * (val / max_val) i = self.draw.normal(p, p_n, s=s, width=2, fill='red') self.normalmap_cache.append(i) def clear_overlays(self, event): self.draw.deleteItems(self.topo_cache) self.draw.deleteItems(self.highlight_cache) self.draw.deleteItems(self.poly_cache) def deleteCache(self, stages): for s in stages: self.draw.deleteItems(self.stage_cache[s])
class ShinkkingBallApp(CanvasApp): def __init__(self, sbapp_list, filename, densify, sigma_noise, denoise, **args): CanvasApp.__init__(self, **args) self.sbapp_list = sbapp_list self.sbapp_list.append(self) self.window_diagonal = math.sqrt(self.sizex ** 2 + self.sizey ** 2) self.toplevel.title( "Shrink the balls [{}] - densify={}x, noise={}, denoise={} ".format(filename, densify, sigma_noise, denoise) ) self.toplevel.bind("h", self.print_help) self.toplevel.bind("a", self.ma_auto_stepper) self.toplevel.bind("b", self.draw_all_balls) self.toplevel.bind("t", self.toggle_inout) self.toplevel.bind("h", self.toggle_ma_stage_geom) self.inner_mode = True self.draw_stage_geom_mode = "normal" self.toplevel.bind("i", self.draw_topo) self.toplevel.bind("o", self.draw_topo) self.toplevel.bind("u", self.draw_topo) self.toplevel.bind("p", self.draw_topo) self.toplevel.bind("z", self.spawn_mapperapp) self.toplevel.bind("f", self.spawn_filterapp) self.toplevel.bind("s", self.spawn_shrinkhistapp) self.toplevel.bind("1", self.draw_normal_map_lfs) self.toplevel.bind("2", self.draw_normal_map_theta) self.toplevel.bind("3", self.draw_normal_map_lam) self.toplevel.bind("4", self.draw_normal_map_radii) self.toplevel.bind("`", self.draw_normal_map_clear) self.toplevel.bind("c", self.clear_overlays) self.canvas.pack() self.toplevel.bind("<Motion>", self.draw_closest_ball) self.toplevel.bind("<Key>", self.ma_step) self.toplevel.bind("<ButtonRelease>", self.ma_step) self.coordstext = self.canvas.create_text(self.sizex, self.sizey, anchor="se", text="") self.ball_info_text = self.canvas.create_text(10, self.sizey, anchor="sw", text="") self.stage_cache = {1: [], 2: [], 3: []} self.topo_cache = [] self.highlight_point_cache = [] self.highlight_cache = [] self.poly_cache = [] self.normalmap_cache = [] self.mapper_window = None self.plotter_window = None self.shrinkhist_window = None self.kdtree = FLANN() def toggle_ma_stage_geom(self, event): if self.draw_stage_geom_mode == "normal": self.draw_stage_geom_mode = "dontclear" else: self.draw_stage_geom_mode = "normal" def spawn_shrinkhistapp(self, event): self.ma_ensure_complete() self.shrinkhist_window = ShrinkHistApp(self) def spawn_mapperapp(self, event): self.ma_ensure_complete() self.mapper_window = MapperApp(self) def spawn_filterapp(self, event): self.ma_ensure_complete() self.plot_window = FilterApp(self) def update_mouse_coords(self, event): self.mouse_x = event.x self.mouse_y = event.y def toggle_inout(self, event): self.inner_mode = not self.inner_mode def print_help(self, event): print HELP def bind_ma(self, ma, draw_poly=True): self.ma = ma self.ma_inner = True self.ma_complete = False self.ma_gen = ma.compute_balls(inner=self.ma_inner) minx = ma.D["coords"][:, 0].min() miny = ma.D["coords"][:, 1].min() maxx = ma.D["coords"][:, 0].max() maxy = ma.D["coords"][:, 1].max() self.set_transform(minx, maxx, miny, maxy) self.normal_scale = 0.02 * (self.window_diagonal / self.scale) if draw_poly: self.draw.polygon(ma.D["coords"], fill="#eeeeee") for p, n in zip(ma.D["coords"], ma.D["normals"]): self.draw.normal(p, n, s=self.normal_scale, fill="#888888", width=1) self.kdtree.build_index(self.ma.D["coords"], algorithm="linear") # self.kdtree = KDTree(self.ma.D['coords']) self.print_help(None) self.canvas.update_idletasks() def ma_ensure_complete(self): while self.ma_complete == False: self.ma_auto_stepper(None) def ma_auto_stepper(self, event): self.ma_stepper(mode="auto_step") def ma_step(self, event): self.ma_stepper(mode="onestep") def ma_stepper(self, mode): def step_and_draw(): d = self.ma_gen.next() self.ma_draw_stage(d) try: if mode == "onestep": step_and_draw() elif mode == "auto_step": while True: step_and_draw() except StopIteration: if not self.ma_inner: self.ma.compute_lfs() self.ma.compute_lam() self.ma.compute_theta() self.ma.compute_lam(inner="out") self.ma.compute_theta(inner="out") self.ma_complete = True self.ma_inner = not self.ma_inner self.ma_gen = self.ma.compute_balls(self.ma_inner) def ma_draw_stage(self, d): if d["stage"] == 1: try: self.stage_cache[2].remove(self.stage_cache[2][2]) except IndexError: pass self.deleteCache([1, 2, 3]) p, n = d["geom"] l = self.window_diagonal # line length - depends on windows size i = self.draw.point(p[0], p[1], size=8, fill="red", outline="") j = self.draw.edge( (p[0] + n[0] * l, p[1] + n[1] * l), (p[0] - n[0] * l, p[1] - n[1] * l), width=1, fill="blue", dash=(4, 2), ) self.stage_cache[1] = [i, j] self.canvas.itemconfig(self.coordstext, text=d["msg"]) elif d["stage"] == 2: if self.draw_stage_geom_mode == "normal": self.draw.deleteItems(self.stage_cache[2]) q, c, r = d["geom"] i = self.draw.point(q[0], q[1], size=4, fill="blue", outline="") j = self.draw.point(c[0], c[1], size=r * self.scale, fill="", outline="blue") k = self.draw.point(c[0], c[1], size=2, fill="blue", outline="") self.stage_cache[2] = [i, j, k] self.canvas.itemconfig(self.coordstext, text=d["msg"]) def draw_highlight_points(self, key, val, how, inner="in"): self.draw.deleteItems(self.highlight_cache) for m, v in zip(self.ma.D["ma_coords_" + inner], self.ma.D[key]): if not np.isnan(v): if how == "greater" and v > val: i = self.draw.point(m[0], m[1], size=4, fill="", outline="red", width=2) self.highlight_cache.append(i) elif how == "smaller" and v < val: i = self.draw.point(m[0], m[1], size=4, fill="", outline="red", width=2) self.highlight_cache.append(i) elif how == "equal" and v == val: i = self.draw.point(m[0], m[1], size=4, fill="", outline="red", width=2) self.highlight_cache.append(i) def draw_topo(self, event): if event.char in ["i", "u"]: inner = "in" elif event.char in ["o", "p"]: inner = "out" if event.char in ["p", "u"]: project = True else: project = False self.draw.deleteItems(self.topo_cache) self.ma.construct_topo_2d(inner, project) for start, end in self.ma.D["ma_linepieces_" + inner]: s_e = self.ma.D["ma_coords_" + inner][start] e_e = self.ma.D["ma_coords_" + inner][end] i = self.draw.edge(s_e, e_e, fill="blue", width=1) self.topo_cache.append(i) def draw_all_balls(self, event): self.draw.deleteItems(self.highlight_cache) for p_i in xrange(self.ma.m): self.draw_medial_ball(p_i, with_points=False) def draw_closest_ball(self, event): # x,y = self.t_(self.mouse_x, self.mouse_y) x, y = self.t_(event.x, event.y) q = np.array([x, y]) p_i = self.kdtree.nn_index(q, 1)[0][0] # p_i = self.kdtree.query(np.array([q]),1)[1][0] for sbapp in self.sbapp_list: sbapp.highlight_single_ball(p_i) def highlight_single_ball(self, p_i): if self.inner_mode: inner = "in" else: inner = "out" # plot the shrink history of this ball: if self.shrinkhist_window is not None: self.shrinkhist_window.update_plot(p_i, inner) def get_ball_info_text(p_i): if not self.ma.D.has_key("lfs"): return "" return "lfs\t{0:.2f}\nr\t{2:.2f}\nlambda\t{1:.2f}\ntheta\t{3:.2f} ({4:.2f} deg)\nk\t{5}\nplanar\t{6:.2f} deg".format( self.ma.D["lfs"][p_i], self.ma.D["lam_" + inner][p_i], self.ma.D["ma_radii_" + inner][p_i], self.ma.D["theta_" + inner][p_i], (180 / math.pi) * math.acos(self.ma.D["theta_" + inner][p_i]), len(self.ma.D["ma_shrinkhist_" + inner][p_i]), (90 / math.pi) * (math.pi - math.acos(self.ma.D["theta_" + inner][p_i])), ) self.draw.deleteItems(self.highlight_point_cache) self.draw_medial_ball(p_i) self.draw_lfs_ball(p_i) self.canvas.itemconfig(self.ball_info_text, text=get_ball_info_text(p_i)) def draw_medial_ball(self, p_i, with_points=True): inner = "out" if self.inner_mode: inner = "in" p1x, p1y = self.ma.D["coords"][p_i][0], self.ma.D["coords"][p_i][1] ma_px, ma_py = self.ma.D["ma_coords_" + inner][p_i][0], self.ma.D["ma_coords_" + inner][p_i][1] if not np.isnan(ma_px): p2x, p2y = ( self.ma.D["coords"][self.ma.D["ma_f2_" + inner][p_i]][0], self.ma.D["coords"][self.ma.D["ma_f2_" + inner][p_i]][1], ) r = self.ma.D["ma_radii_" + inner][p_i] ball = self.draw.point(ma_px, ma_py, size=r * self.scale, width=1, fill="", outline="red", dash=(4, 2, 1)) if with_points: self.highlight_point_cache.append(self.draw.point(p1x, p1y, size=4, fill="", outline="red", width=2)) self.highlight_point_cache.append(self.draw.point(p2x, p2y, size=4, fill="", outline="purple", width=2)) self.highlight_point_cache.append( self.draw.point(ma_px, ma_py, size=4, fill="", outline="blue", dash=(1), width=2) ) self.highlight_point_cache.append(ball) else: self.highlight_cache.append(ball) def draw_closest_lfs_ball(self, event): # self.draw.deleteItems(self.highlight_cache) x, y = self.t_(event.x, event.y) q = np.array([x, y]) p_i = self.kdtree.nn_index(q, 1)[0][0] # p_i = self.kdtree.query(np.array([q]),1)[1][0] self.draw_lfs_ball(p_i) def draw_lfs_ball(self, p_i): if self.ma.D.has_key("lfs"): p1x, p1y = self.ma.D["coords"][p_i][0], self.ma.D["coords"][p_i][1] lfs = self.ma.D["lfs"][p_i] if not np.isnan(lfs): self.highlight_point_cache.append( self.draw.point(p1x, p1y, size=lfs * self.scale, fill="", outline="#888888", dash=(2, 1)) ) def draw_decimate_lfs(self, epsilon): self.ma.decimate_lfs(epsilon) dropped, total = np.count_nonzero(self.ma.D["decimate_lfs"]), self.ma.m print "LFS decimation e={}: {} from {} points are dropped ({:.2f}%)".format( epsilon, dropped, total, float(dropped) / total * 100 ) self.draw.deleteItems(self.poly_cache) i = self.draw.polygon_alternating_edge(self.ma.D["coords"][np.invert(self.ma.D["decimate_lfs"])], width=3) self.poly_cache.extend(i) def draw_decimate_ballco(self, xi, k): self.ma.decimate_ballco(xi, k) dropped, total = np.count_nonzero(self.ma.D["decimate_ballco"]), self.ma.m print "BALLCO decimation xi={}, k={}: {} from {} points are dropped ({:.2f}%)".format( xi, k, dropped, total, float(dropped) / total * 100 ) self.draw.deleteItems(self.poly_cache) i = self.draw.polygon_alternating_edge(self.ma.D["coords"][np.invert(self.ma.D["decimate_ballco"])], width=3) self.poly_cache.extend(i) def draw_normal_map_lfs(self, event): self.draw_normal_map("lfs", 40) def draw_normal_map_theta(self, event): self.draw_normal_map("theta_in", 30) def draw_normal_map_lam(self, event): self.draw_normal_map("lam_in", 30) def draw_normal_map_radii(self, event): self.draw_normal_map("ma_radii_in", 30) def draw_normal_map_clear(self, event): self.draw.deleteItems(self.normalmap_cache) def draw_normal_map(self, key, scale=30): self.draw.deleteItems(self.normalmap_cache) max_val = np.nanmax(self.ma.D[key]) for p, p_n, val in zip(self.ma.D["coords"], self.ma.D["normals"], self.ma.D[key]): s = scale * (val / max_val) i = self.draw.normal(p, p_n, s=s, width=2, fill="red") self.normalmap_cache.append(i) def clear_overlays(self, event): self.draw.deleteItems(self.topo_cache) self.draw.deleteItems(self.highlight_cache) self.draw.deleteItems(self.poly_cache) def deleteCache(self, stages): for s in stages: self.draw.deleteItems(self.stage_cache[s])
#!/usr/bin/env python from pyflann import FLANN import numpy as np import sys import pickle if __name__ == "__main__": flann = FLANN() target_precision = float(sys.argv[1]) params = flann.build_index(np.load(sys.argv[2]), target_precision = target_precision, log_level = "info") print "FLANN params: ", params # Save the FLANN flann.save_index(sys.argv[3]) # Save the params p_output = open(sys.argv[4], 'wb') pickle.dump(params, p_output) p_output.close()
class W2VAverageEmbedding(): def __init__(self, embedding_file, tokenize=tokenize): self.word2vec_file = embedding_file self.word2vec = KeyedVectors.load_word2vec_format(self.word2vec_file, binary=True) self.embedding_dim = self.word2vec.vector_size self.tokenize = tokenize self.sentence_list = [] self.sentence_list_tokenized = [] self.sentence_embedding = np.array([]) self.flann = FLANN() def _average_bow(self, sentence): vs = np.zeros(self.embedding_dim) sentence_length = 0 for word in sentence: try: vs = np.add(vs, self.word2vec[word]) sentence_length += 1 except Exception: pass # print(f"Embedding Vector: {word} not found") if sentence_length != 0: vs = np.divide(vs, sentence_length) return vs def fit(self, sentence_list): for sentence in sentence_list: self.sentence_list.append(sentence) self.sentence_list_tokenized.append(self.tokenize(sentence)) # Alg.1 step 1 sentence_vec = [] for sentence in self.sentence_list_tokenized: sentence_vec.append(self._average_bow(sentence)) self.sentence_embedding = np.array(sentence_vec) # make index for similarity search self.flann.build_index(self.sentence_embedding) def infer_vector(self, sentence): return self._average_bow(self.tokenize(sentence)) def predict(self, sentence, topn=1): vs = self.infer_vector(sentence) result, dists = self.flann.nn_index(vs, num_neighbors=topn) if topn != 1: result = result[0] dists = dists[0] output = [] for i, index in enumerate(result.tolist()): text = self.sentence_list[index] sim = dists[i] output.append([text, sim]) return output
class dcelVis(Tk): def __init__(self, dcel): Tk.__init__(self) self.sizex = 700 self.sizey = 700 self.window_diagonal = math.sqrt(self.sizex**2 + self.sizey**2) self.title("DCELvis") self.resizable(0,0) self.bind('q', self.exit) self.bind('h', self.print_help) self.bind('p', self.print_dcel) self.bind('e', self.iteratehedge) self.bind('v', self.iteratevertex) self.bind('f', self.iterateface) self.canvas = Canvas(self, bg="white", width=self.sizex, height=self.sizey) self.canvas.pack() if WITH_FLANN: self.bind("<ButtonRelease>", self.remove_closest) self.bind("<Motion>", self.report_closest) self.coordstext = self.canvas.create_text(self.sizex, self.sizey, anchor='se', text='') self.info_text = self.canvas.create_text(10, self.sizey, anchor='sw', text='') self.tx = 0 self.ty = 0 self.highlight_cache = [] self.bgdcel_cache = [] self.draw = draw(self) if WITH_FLANN: self.kdtree = FLANN() self.D = None self.bind_dcel(dcel) self.print_help(None) def t(self, x, y): """transform data coordinates to screen coordinates""" x = (x * self.scale) + self.tx y = self.sizey - ((y * self.scale) + self.ty) return (x,y) def t_(self, x, y): """transform screen coordinates to data coordinates""" x = (x - self.tx)/self.scale y = (self.sizey - y - self.ty)/self.scale return (x,y) def print_help(self, event): print HELP def print_dcel(self, event): print self.D def bind_dcel(self, dcel): minx = maxx = dcel.vertexList[0].x miny = maxy = dcel.vertexList[0].y for v in dcel.vertexList[1:]: if v.x < minx: minx = v.x if v.y < miny: miny = v.y if v.x > maxx: maxx = v.x if v.y > maxy: maxy = v.y d_x = maxx-minx d_y = maxy-miny c_x = minx + (d_x)/2 c_y = miny + (d_y)/2 if d_x > d_y: self.scale = (self.sizex*0.8) / d_x else: self.scale = (self.sizey*0.8) / d_y self.tx = self.sizex/2 - c_x*self.scale self.ty = self.sizey/2 - c_y*self.scale self.D = dcel self.draw_dcel() def draw_dcel(self): self.draw.deleteItems(self.bgdcel_cache) self.draw_dcel_faces() self.draw_dcel_hedges() self.draw_dcel_vertices() self.hedge_it = self.type_iterator('hedge') self.face_it = self.type_iterator('face') self.vertex_it = self.type_iterator('vertex') def getClosestVertex(self, screenx, screeny): vertices = [np.array([v.x,v.y]) for v in self.D.vertexList] self.kdtree.build_index(np.array(vertices), algorithm='linear') x,y = self.t_(screenx, screeny) q = np.array([x,y]) v_i = self.kdtree.nn_index(q,1)[0][0] return self.D.vertexList[v_i] def remove_closest(self, event): v = self.getClosestVertex(event.x, event.y) self.D.remove_vertex( v ) self.draw_dcel() def report_closest(self, event): s = str(self.getClosestVertex(event.x, event.y)) self.canvas.itemconfig(self.info_text, text=s ) def iteratehedge(self, event): try: self.hedge_it.next() except StopIteration: self.hedge_it = self.type_iterator('hedge') self.hedge_it.next() def iterateface(self, event): try: self.face_it.next() except StopIteration: self.face_it = self.type_iterator('face') self.face_it.next() def iteratevertex(self, event): try: self.vertex_it.next() except StopIteration: self.vertex_it = self.type_iterator('vertex') self.vertex_it.next() def type_iterator(self, q='hedge'): if q == 'hedge': for e in self.D.hedgeList: yield self.explain_hedge(e) elif q == 'face': for e in self.D.faceList: yield self.explain_face(e) elif q == 'vertex': for e in self.D.vertexList: yield self.explain_vertex(e) def explain_hedge(self, e): print e self.draw.deleteItems(self.highlight_cache) i1 = self.draw_dcel_face(e.incidentFace, fill='#ffc0bf', outline='') i4 = self.draw_dcel_vertex(e.origin, size=7, fill='red', outline='') i2 = self.draw_dcel_hedge(e.next, arrow=LAST, arrowshape=(7,6,2), width=2, fill='#1a740c') i3 = self.draw_dcel_hedge(e.previous, arrow=LAST, arrowshape=(7,6,2), width=2, fill='#0d4174') i5 = self.draw_dcel_hedge(e, arrow=LAST, arrowshape=(7,6,2), width=3, fill='red') i6 = self.draw_dcel_hedge(e.twin, arrow=LAST, arrowshape=(7,6,2), width=3, fill='orange') self.highlight_cache = [i1,i2,i3,i4,i5,i6] def explain_vertex(self, v): print v self.draw.deleteItems(self.highlight_cache) i1 = self.draw_dcel_vertex(v, size=7, fill='red', outline='') i2 = self.draw_dcel_hedge(v.incidentEdge, arrow=LAST, arrowshape=(7,6,2), width=2, fill='red') self.highlight_cache = [i1,i2] def explain_face(self, f): print f self.draw.deleteItems(self.highlight_cache) i1 = self.draw_dcel_face(f, fill='#ffc0bf', outline='') i2 = self.draw_dcel_hedge(f.outerComponent, arrow=LAST, arrowshape=(7,6,2), width=3, fill='red') self.highlight_cache = [i1,i2] def draw_dcel_vertices(self): for v in self.D.vertexList: self.bgdcel_cache.append(self.draw_dcel_vertex(v)) def draw_dcel_vertex(self, v, **options): if options == {}: options = {'size':5, 'fill':'blue', 'outline':''} return self.draw.point(v.x, v.y, **options) def draw_dcel_hedges(self): for e in self.D.hedgeList: self.bgdcel_cache.append(self.draw_dcel_hedge(e)) def draw_dcel_hedge(self, e, **options): if options == {}: options = {'arrow':LAST, 'arrowshape':(7,6,2), 'fill': '#444444'} offset = .02 sx,sy = e.origin.x, e.origin.y ex,ey = e.twin.origin.x, e.twin.origin.y vx,vy = ex - sx, ey - sy v = vec2(vx, vy) v_ = v.orthogonal_l()*offset v = v - v.normalized()*.25 ex, ey = sx+v.x, sy+v.y return self.draw.edge( (sx+v_.x, sy+v_.y), (ex+v_.x, ey+v_.y) , **options) def draw_dcel_faces(self): for f in self.D.faceList: self.bgdcel_cache.append(self.draw_dcel_face(f)) def draw_dcel_face(self, f, **options): if f == self.D.infiniteFace: print 'Im not drawing infiniteFace' return if options == {}: options = {'fill':'#eeeeee', 'outline':''} vlist = [ (v.x, v.y) for v in f.loopOuterVertices() ] return self.draw.polygon(vlist, **options) def find_closest(self, event): x = self.canvas.canvasx(event.x) y = self.canvas.canvasy(event.y) # print event.x, event.y # print x,y print self.canvas.find_closest(x, y) def exit(self, event): print "bye bye." self.quit() self.destroy()
class FastDictionary(object): def __init__(self, maxlen, seed=0, cores=4, trees=1): self.flann = FLANN( algorithm='kdtree', random_seed=seed, cores=cores, trees=trees, ) self.counter = 0 self.contents_lookup = {} #{oid: (e,q)} self.p_queue = collections.deque( ) #priority queue contains; list of (priotiry_value,oid) self.maxlen = maxlen def save(self, dir, fname, it=None): fname = f'{fname}' if it is None else f'{fname}-{it}' with open(os.path.join(dir, fname), 'wb') as f: pickle.dump((self.contents_lookup, self.p_queue, self.maxlen), f) def restore(self, fname): with open(fname, 'rb') as f: _contents_lookup, _p_queue, maxlen = pickle.load(f) assert self.maxlen == maxlen, (self.maxlen, maxlen) new_oid_lookup = {} E, Q = [], [] for oid, (e, q) in _contents_lookup.items(): E.append(e) Q.append(q) new_oid, self.counter = self.counter, self.counter + 1 new_oid_lookup[oid] = new_oid E = np.array(E) # Rebuild KD-Tree self.flann.build_index(E) # Reallocate contents_lookup for new_oid, (e, q) in enumerate(zip(E, Q)): assert e.base is E self.contents_lookup[new_oid] = (e, q) # Rebuild Heap while len(_p_queue) > 0: oid = _p_queue.popleft() if not oid in new_oid_lookup: continue self.p_queue.append(new_oid_lookup[oid]) def add(self, E, Contents): assert not np.isnan(E).any(), ('NaN Detected in Add', np.argwhere(np.isnan(E))) assert len(E) == len(Contents) assert E.ndim == 2 and E.shape[1] == 64, E.shape if self.counter == 0: self.flann.build_index(E) else: self.flann.add_points(E) Oid, self.counter = np.arange(self.counter, self.counter + len(E), dtype=np.uint32), self.counter + len(E) for oid, e, content in zip(Oid, E, Contents): assert e.base is E or e.base is E.base self.contents_lookup[oid] = (e, content) self.p_queue.append(oid) if len(self.contents_lookup) > self.maxlen: while not self.p_queue[0] in self.contents_lookup: self.p_queue.popleft( ) #invalidated items due to update, so just pop. old_oid = self.p_queue.popleft() ret = self.flann.remove_point(old_oid) if ret <= 0: raise Exception(f'remove point error {ret}') del self.contents_lookup[old_oid] def update(self, Oid, E, Contents): """ Basically, same this is remove & add. This code only manages a heap more effectively; since delete an item in the middle of heap is not trivial!) """ assert not np.isnan(E).any(), ('NaN Detected in Updating', np.argwhere(np.isnan(E))) assert len(np.unique(Oid)) == len(Oid) assert E.ndim == 2 and E.shape[1] == 64, E.shape # add new Embeddings self.flann.add_points(E) NewOid, self.counter = np.arange( self.counter, self.counter + len(E), dtype=np.uint32), self.counter + len(E) for oid, new_oid, e, content in zip(Oid, NewOid, E, Contents): assert e.base is E or e.base is E.base self.contents_lookup[new_oid] = (e, content) self.p_queue.append(new_oid) # delete from kd-tree ret = self.flann.remove_point(oid) if ret <= 0: raise Exception(f'remove point error {ret}') # delete from contents_lookup del self.contents_lookup[oid] # I cannot remove from p_queue, but it will be handeled in add op. def query_knn(self, E, K=100): assert not np.isnan(E).any(), ('NaN Detected in Querying', np.argwhere(np.isnan(E))) flatten = False if E.ndim == 1: E = E[None] flatten = True Oids, Dists, C = self.flann.nn_index(E, num_neighbors=K) if C != len(E) * K: print( f'Not enough neighbors ({np.count_nonzero(Dists>=0.)} == {C}) != {len(E)}*{K}, rebuild and try again...' ) self.flann.rebuild_index() Oids, Dists, C = self.flann.nn_index(E, num_neighbors=K) # TODO: Hmm. Dists sometimes becomes NaN #assert np.count_nonzero(np.isnan(Dists)) == 0, 'pyflann returned a NaN for a distance' if np.count_nonzero(np.isnan(Dists)) > 0: print('warning: NaN Returned as a distance') Dists = np.nan_to_num(Dists, copy=False) NN_E = np.zeros((len(E), K, E.shape[1]), np.float32) NN_Q = np.zeros((len(E), K), np.float32) Len = np.count_nonzero(Dists >= 0., axis=1) assert np.sum(Len) == C, f'{np.sum(Len)} != {C}' assert C > 0, 'Nothing returned...' for b, oids in enumerate(Oids): for k, oid in enumerate( oids[:Len[b]]): #drop if not enough NN retrieved. e, q = self.contents_lookup[oid] NN_E[b, k] = e NN_Q[b, k] = q if flatten: return Oids[0][:Len[0]], NN_E[0][:Len[0]], NN_Q[0][:Len[0]] else: return Oids, NN_E, NN_Q, Len
def __init__(self, k: int): self.k = k self.flann = FLANN()
def full_displacement(shap,supp,t,pol_en=False,cent=None,theta_param=1,pol_mod=False,coord_map=None,knn=None,eps = 1.e-16): """Computes all quantities required to compute displacement interpolation at steps ``t``. Calls: * :func:`utils.polar_coord_cloud` """ from numpy import ones,zeros,copy,array,pi,int,transpose,diag from utils import polar_coord_cloud from pyflann import FLANN if coord_map is None: coord_map = zeros((shap[0],shap[1],2)) coord_map[:,:,0] = arange(0,shap[0]).reshape((shap[0],1)).dot(ones((1,shap[1]))) coord_map[:,:,1] = ones((shap[0],1)).dot(arange(0,shap[1]).reshape((1,shap[1]))) if pol_en: if cent is None: cent = array([shap[0]/2,shap[1]/2]) cloud_in = zeros((2,shap[0]*shap[1])) cloud_in[0,:] = copy(coord_map[:,:,0].reshape((shap[0]*shap[1],))) cloud_in[1,:] = copy(coord_map[:,:,1].reshape((shap[0]*shap[1],))) cloud_out = polar_coord_cloud(cloud_in,cent) coord_map[:,:,0] = cloud_out[0,:].reshape((shap[0],shap[1])) coord_map[:,:,1] = theta_param*cloud_out[1,:].reshape((shap[0],shap[1]))/(2*pi) if pol_mod: coord_map[:,:,1] *= coord_map[:,:,0] knn = FLANN() cloud_in = zeros((shap[0]*shap[1],2)) cloud_in[:,0] = copy(coord_map[:,:,0].reshape((shap[0]*shap[1],))) cloud_in[:,1] = copy(coord_map[:,:,1].reshape((shap[0]*shap[1],))) params = knn.build_index(array(cloud_in, dtype=float64)) advection_points = zeros((supp.shape[0],2,size(t))) for i in range(0,supp.shape[0]): # Matching coordinates pos1_i = int(supp[i,0]/(shap[0])) pos1_j = int(supp[i,0]%(shap[0])) pos2_i = int(supp[i,1]/(shap[0])) pos2_j = int(supp[i,1]%(shap[0])) if size(t)==1: advection_points[i,0,0] = (1-t)*coord_map[pos1_i,pos1_j,0]+t*coord_map[pos2_i,pos2_j,0] advection_points[i,1,0] = (1-t)*coord_map[pos1_i,pos1_j,1]+t*coord_map[pos2_i,pos2_j,1] else: for j in range(0,size(t)): advection_points[i,0,j] = (1-t[j])*coord_map[pos1_i,pos1_j,0]+t[j]*coord_map[pos2_i,pos2_j,0] advection_points[i,1,j] = (1-t[j])*coord_map[pos1_i,pos1_j,1]+t[j]*coord_map[pos2_i,pos2_j,1] neighbors_graph = zeros((supp.shape[0],4,size(t))) neighbors_graph = zeros((supp.shape[0],2,4,size(t))) weights_neighbors = zeros((supp.shape[0],4,size(t))) if size(t)==1: neighbors_graph_temp,dist_neighbors = knn.nn_index(advection_points[:,:,0],4) neighbors_graph[:,0,:,0] = neighbors_graph_temp/shap[0] neighbors_graph[:,1,:,0] = neighbors_graph_temp%shap[0] inv_dist = (dist_neighbors+eps)**(-1) weights_neighbors[:,:,0] = inv_dist/(inv_dist.sum(axis=1).reshape((supp.shape[0],1)).dot(ones((1,4)))) else: for j in range(0,size(t)): print "Wavelength ",j+1,"/",size(t) neighbors_graph_temp,dist_neighbors = knn.nn_index(advection_points[:,:,j],4) neighbors_graph[:,0,:,j] = neighbors_graph_temp/shap[0] neighbors_graph[:,1,:,j] = neighbors_graph_temp%shap[0] inv_dist = (dist_neighbors+eps)**(-1) weights_neighbors[:,:,j] = inv_dist/(inv_dist.sum(axis=1).reshape((supp.shape[0],1)).dot(ones((1,4)))) gc.collect() return neighbors_graph.astype(int),weights_neighbors,cent,coord_map,knn
class DND: def __init__(self, kernel, num_neighbors, max_memory, lr): self.kernel = kernel self.num_neighbors = num_neighbors self.max_memory = max_memory self.lr = lr self.keys = None self.values = None self.kdtree = FLANN() # key_cache stores a cache of all keys that exist in the DND # This makes DND updates efficient self.key_cache = {} # stale_index is a flag that indicates whether or not the index in self.kdtree is stale # This allows us to only rebuild the kdtree index when necessary self.stale_index = True # indexes_to_be_updated is the set of indexes to be updated on a call to update_params # This allows us to rebuild only the keys of key_cache that need to be rebuilt when necessary self.indexes_to_be_updated = set() # Keys and value to be inserted into self.keys and self.values when commit_insert is called self.keys_to_be_inserted = None self.values_to_be_inserted = None # Move recently used lookup indexes # These should be moved to the back of self.keys and self.values to get LRU property self.move_to_back = set() def get_index(self, key): """ If key exists in the DND, return its index Otherwise, return None """ if self.key_cache.get(tuple(key.data.cpu().numpy()[0])) is not None: if self.stale_index: self.commit_insert() return int(self.kdtree.nn_index(key.data.cpu().numpy(), 1)[0][0]) else: return None def update(self, value, index): """ Set self.values[index] = value """ values = self.values.data values[index] = value[0].data self.values = Parameter(values) self.optimizer = optim.RMSprop([self.keys, self.values], lr=self.lr) def insert(self, key, value): """ Insert key, value pair into DND """ if self.keys_to_be_inserted is None: # Initial insert self.keys_to_be_inserted = key.data self.values_to_be_inserted = value.data else: self.keys_to_be_inserted = torch.cat( [self.keys_to_be_inserted, key.data], 0) self.values_to_be_inserted = torch.cat( [self.values_to_be_inserted, value.data], 0) self.key_cache[tuple(key.data.cpu().numpy()[0])] = 0 self.stale_index = True def commit_insert(self): if self.keys is None or len(self.keys)==0: self.keys = Parameter(self.keys_to_be_inserted) self.values = Parameter(self.values_to_be_inserted) elif self.keys_to_be_inserted is not None: #print(self.keys.data,'...') #print(self.keys_to_be_inserted) self.keys = Parameter( torch.cat([self.keys.data, self.keys_to_be_inserted], 0)) self.values = Parameter( torch.cat([self.values.data, self.values_to_be_inserted], 0)) # Move most recently used key-value pairs to the back if len(self.move_to_back) != 0: self.keys = Parameter(torch.cat([self.keys.data[list(set(range(len( self.keys))) - self.move_to_back)], self.keys.data[list(self.move_to_back)]], 0)) self.values = Parameter(torch.cat([self.values.data[list(set(range(len( self.values))) - self.move_to_back)], self.values.data[list(self.move_to_back)]], 0)) self.move_to_back = set() if len(self.keys) > self.max_memory: # Expel oldest key to maintain total memory for key in self.keys[:-self.max_memory]: del self.key_cache[tuple(key.data.cpu().numpy())] self.keys = Parameter(self.keys[-self.max_memory:].data) self.values = Parameter(self.values[-self.max_memory:].data) self.keys_to_be_inserted = None self.values_to_be_inserted = None self.optimizer = optim.RMSprop([self.keys, self.values], lr=self.lr) if self.keys.data.cpu().numpy()!=[]: self.kdtree.build_index(self.keys.data.cpu().numpy()) self.stale_index = False def lookup(self, lookup_key, update_flag=False): """ Perform DND lookup If update_flag == True, add the nearest neighbor indexes to self.indexes_to_be_updated """ lookup_indexes = self.kdtree.nn_index( lookup_key.data.cpu().numpy(), min(self.num_neighbors, len(self.keys)))[0][0] output = 0 kernel_sum = 0 for i, index in enumerate(lookup_indexes): if i == 0 and self.key_cache.get(tuple(lookup_key[0].data.cpu().numpy())) is not None: # If a key exactly equal to lookup_key is used in the DND lookup calculation # then the loss becomes non-differentiable. Just skip this case to avoid the issue. continue if update_flag: self.indexes_to_be_updated.add(int(index)) else: self.move_to_back.add(int(index)) kernel_val = self.kernel(self.keys[int(index)], lookup_key[0]) output += kernel_val * self.values[int(index)] kernel_sum += kernel_val output = output / kernel_sum return output def update_params(self): """ Update self.keys and self.values via backprop Use self.indexes_to_be_updated to update self.key_cache accordingly and rebuild the index of self.kdtree """ for index in self.indexes_to_be_updated: del self.key_cache[tuple(self.keys[index].data.cpu().numpy())] self.optimizer.step() self.optimizer.zero_grad() for index in self.indexes_to_be_updated: self.key_cache[tuple(self.keys[index].data.cpu().numpy())] = 0 self.indexes_to_be_updated = set() if self.keys.data.cpu().numpy()!=[]: self.kdtree.build_index(self.keys.data.cpu().numpy()) self.stale_index = False
def __init__(self, sbapp_list, filename, densify, sigma_noise, denoise, **args): CanvasApp.__init__(self, **args) self.sbapp_list = sbapp_list self.sbapp_list.append(self) self.window_diagonal = math.sqrt(self.sizex**2 + self.sizey**2) self.toplevel.title( "Shrink the balls [{}] - densify={}x, noise={}, denoise={} ". format(filename, densify, sigma_noise, denoise)) self.toplevel.bind('h', self.print_help) self.toplevel.bind('a', self.ma_auto_stepper) self.toplevel.bind('b', self.draw_all_balls) self.toplevel.bind('t', self.toggle_inout) self.toplevel.bind('h', self.toggle_ma_stage_geom) self.inner_mode = True self.draw_stage_geom_mode = 'normal' self.toplevel.bind('i', self.draw_topo) self.toplevel.bind('o', self.draw_topo) self.toplevel.bind('u', self.draw_topo) self.toplevel.bind('p', self.draw_topo) self.toplevel.bind('z', self.spawn_mapperapp) self.toplevel.bind('f', self.spawn_filterapp) self.toplevel.bind('s', self.spawn_shrinkhistapp) self.toplevel.bind('1', self.draw_normal_map_lfs) self.toplevel.bind('2', self.draw_normal_map_theta) self.toplevel.bind('3', self.draw_normal_map_lam) self.toplevel.bind('4', self.draw_normal_map_radii) self.toplevel.bind('`', self.draw_normal_map_clear) self.toplevel.bind('c', self.clear_overlays) self.canvas.pack() self.toplevel.bind("<Motion>", self.draw_closest_ball) self.toplevel.bind("<Key>", self.ma_step) self.toplevel.bind("<ButtonRelease>", self.ma_step) self.coordstext = self.canvas.create_text(self.sizex, self.sizey, anchor='se', text='') self.ball_info_text = self.canvas.create_text(10, self.sizey, anchor='sw', text='') self.stage_cache = {1: [], 2: [], 3: []} self.topo_cache = [] self.highlight_point_cache = [] self.highlight_cache = [] self.poly_cache = [] self.normalmap_cache = [] self.mapper_window = None self.plotter_window = None self.shrinkhist_window = None self.kdtree = FLANN()
class KNeighborsClassifier(): def __init__(self, n_neighbors=5,weights='uniform'): """hyper parameters of teh FLANN algorithm""" self.algrithm_choice = "kmeans" self.branching = 32 self.iterations = 7 self.checks = 16 """Basic KNN parameters""" self.n_neighbors = n_neighbors self.weights = weights self.flann = FLANN() def fit(self,X,Y): self.train_data = np.asarray(X).astype(np.float32) if Y.ndim == 1 or Y.ndim == 2 and Y.shape[1] == 1: if Y.ndim != 1: warnings.warn("A column-vector y was passed when a 1d array " "was expected. Please change the shape of y to " "(n_samples, ), for example using ravel().", DataConversionWarning, stacklevel=2) print("XXXdasdasdaX!!!") self.outputs_2d_ = False Y = Y.reshape((-1, 1)) print(Y.shape) else: self.outputs_2d_ = True self.classes_ = [] self.train_label = np.empty(Y.shape, dtype=np.int) for k in range(self.train_label.shape[1]): classes, self.train_label[:, k] = np.unique(Y[:, k], return_inverse=True) self.classes_.append(classes) if not self.outputs_2d_: self.classes_ = self.classes_[0] self.train_label = self.train_label.ravel() def predict(self, X, n_neighbors=None): """Predict the class labels for the provided data. Parameters ---------- X : array-like, shape (n_queries, n_features), \ or (n_queries, n_indexed) if metric == 'precomputed' Test samples. Returns ------- y : array of shape [n_queries] or [n_queries, n_outputs] Class labels for each data sample. """ if n_neighbors is not None: self.n_neighbors = n_neighbors X = check_array(X, accept_sparse='csr') X = X.astype(np.float32) neigh_dist, neigh_ind = self.kneighbors(X) classes_ = self.classes_ _y = self.train_label if not self.outputs_2d_: _y = self.train_label.reshape((-1, 1)) classes_ = [self.classes_] n_outputs = len(classes_) n_queries = X.shape[0] weights = _get_weights(neigh_dist, self.weights) y_pred = np.empty((n_queries, n_outputs), dtype=classes_[0].dtype) for k, classes_k in enumerate(classes_): if weights is None: mode, _ = stats.mode(_y[neigh_ind, k], axis=1) else: mode, _ = weighted_mode(_y[neigh_ind, k], weights, axis=1) mode = np.asarray(mode.ravel(), dtype=np.intp) y_pred[:, k] = classes_k.take(mode) if not self.outputs_2d_: y_pred = y_pred.ravel() return y_pred return y_pred def kneighbors(self,test_data): nearest_neighbours,dists = self.flann.nn(self.train_data,test_data,self.n_neighbors,algorithm=self.algrithm_choice, branching=self.branching, iterations=self.iterations, checks=self.checks) if len(nearest_neighbours.shape) == 1: nearest_neighbours = nearest_neighbours.reshape((-1, 1)) dists = dists.reshape((-1, 1)) return dists, nearest_neighbours
class ALaCarteEmbedding(): def __init__(self, word2vec, tokenize, target_word_list=[], ngram=[1], window_size=1, min_count=1): self.w2v = word2vec self.embedding_dim = self.w2v.vector_size self.vocab = set(self.w2v.vocab.keys()) self.target_word_list = set(target_word_list) for word in self.target_word_list: self.vocab.add(word) self.tokenize = tokenize self.ngram = ngram self.window_size = window_size self.min_count = min_count self.c2v = {} self.target_counts = Counter() self.alacarte = {} self.flann = FLANN() def _get_embedding_vec(self, token): if type(token) == str: # for unigram if token in self.w2v.vocab: return self.w2v[token] else: return np.zeros(self.embedding_dim) else: # for ngram vec = np.zeros(self.embedding_dim) for t in token: if t in self.w2v.vocab: vec += self.w2v[t] return vec def _make_context_vectors(self, tokens, n): if n > 1: token_list = ngram(tokens, n) else: token_list = tokens for target_token, context in window_without_center( token_list, self.window_size): context_vector = np.zeros(self.embedding_dim) if self.target_word_list and target_token not in self.vocab: # target_word_list is specified and each target token is not in the vocabulary continue for token in context: context_vector += self._get_embedding_vec(token) if target_token in self.c2v: self.c2v[target_token] += context_vector else: self.c2v[target_token] = context_vector self.vocab.add(target_token) self.target_counts[target_token] += 1 def build(self, sentences): # compute each word’s context embedding for sentence in tqdm(sentences): tokens = self.tokenize(sentence) if len(tokens) > self.window_size * 2 + 1: for n in self.ngram: self._make_context_vectors(tokens, n) # remove low frequency token for word, freq in self.target_counts.items(): if freq < self.min_count and word in self.vocab: self.vocab.remove(word) # compute context-to-feature transform X_all = np.array([ v / self.target_counts[k] for k, v in self.c2v.items() if k in self.vocab ]) X = np.array([ v / self.target_counts[k] for k, v in self.c2v.items() if k in self.w2v.vocab ]) y = np.array( [self.w2v[k] for k, v in self.c2v.items() if k in self.w2v.vocab]) self.A = LinearRegression(fit_intercept=False).fit(X, y).coef_.astype( np.float32) # emb x emb # set a la carte embedding self.alacarte = normalize(X_all.dot(self.A.T)) self.alacarte_vocab = [v for v in self.c2v.keys() if v in self.vocab] # make index for similaarity search self.flann.build_index(self.alacarte) def most_similar(self, word, topn=1): word_vec = self.alacarte[self.alacarte_vocab.index(word)] result, dists = self.flann.nn_index(word_vec, num_neighbors=topn) if topn != 1: result = result[0] dists = dists[0] output = [] for i, index in enumerate(result.tolist()): text = "".join(self.alacarte_vocab[index]) sim = dists[i] output.append((text, sim)) return output def save(self, path): with open(path, "w") as f: f.write(f"{len(self.alacarte_vocab)} {self.embedding_dim}\n") for arr, word in zip(alc.alacarte, alc.alacarte_vocab): f.write(" ".join(["".join(word)] + [str(np.round(s, 6)) for s in arr.tolist()]) + "\n")
def make_test(test_start=1000, test_end=1050): f1 = open('states.pkl', 'r') f2 = open('states_for_test.pkl', 'r') data_states = cPickle.load(f1) test_states = cPickle.load(f2) f1.close() f2.close() time_brute = [] time_sk_kd = [] time_sk_ball = [] time_kdtree = [] time_annoy = [] time_flann = [] time_brute_tot = time_sk_kd_tot = time_sk_ball_tot = time_kdtree_tot = time_annoy_tot = time_flann_tot = 0 kdtree_tree = None for items in xrange(test_start, test_end): print "item:", items ground_truth = np.zeros((test_num_for_each, K), dtype=np.int32) time_brute_start = time.time() for no_test in xrange(test_num_for_each): distance_list = [] current_state = test_states[items, no_test] for target in xrange(items): target_state = data_states[target] distance_list.append(DistanceNode(np.sum(np.absolute(current_state - target_state)**2), target)) smallest = heapq.nsmallest(K, distance_list, key=lambda x: x.distance) ground_truth[no_test] = [x.index for x in smallest] time_brute_end = time.time() time_brute.append(time_brute_end - time_brute_start) time_brute_tot += time_brute[-1] # print ground_truth time_sk_kd_start = time.time() tree = KDTree(data_states[:items, :]) dist, indices = tree.query(test_states[items], K) time_sk_kd_end = time.time() time_sk_kd.append(time_sk_kd_end - time_sk_kd_start) time_sk_kd_tot += time_sk_kd[-1] # print indices time_sk_ball_start = time.time() tree = BallTree(data_states[:items, :], 10000) dist, indices = tree.query(test_states[items], K) time_sk_ball_end = time.time() time_sk_ball.append(time_sk_ball_end - time_sk_ball_start) time_sk_ball_tot += time_sk_ball[-1] # print indices """ annoy is absolutely disappointing for its low speed and poor accuracy. """ time_annoy_start = time.time() annoy_result = np.zeros((test_num_for_each, K), dtype=np.int32) tree = AnnoyIndex(dimension_result) for i in xrange(items): tree.add_item(i, data_states[i, :]) tree.build(10) for no_test in xrange(test_num_for_each): current_state = test_states[items, no_test] annoy_result[no_test] = tree.get_nns_by_vector(current_state, K) time_annoy_end = time.time() time_annoy.append(time_annoy_end - time_annoy_start) time_annoy_tot += time_annoy[-1] # print annoy_result # print annoy_result - indices """ flann is still not very ideal """ time_flann_start = time.time() flann = FLANN() result, dist = flann.nn(data_states[:items, :], test_states[items], K, algorithm='kdtree', trees=10, checks=16) time_flann_end = time.time() time_flann.append(time_flann_end - time_flann_start) time_flann_tot += time_flann[-1] # print result-indices """ This kdtree module is so disappointing!!!! It is 100 times slower than Sklearn and even slower than brute force, more over it even makes mistakes. This kdtree module supports online insertion and deletion. I thought it would be much faster than Sklearn KdTree which rebuilds the tree every time. But the truth is the opposite. """ # time_kdtree_start = time.time() # if kdtree_tree is None: # point_list = [MyTuple(data_states[i, :], i) for i in xrange(items)] # kdtree_tree = kdtree.create(point_list) # else: # point = MyTuple(data_states[items, :], items) # kdtree_tree.add(point) # kdtree_result = np.zeros((test_num_for_each, K), dtype=np.int32) # for no_test in xrange(test_num_for_each): # current_state = test_states[items, no_test] # smallest = kdtree_tree.search_knn(MyTuple(current_state, -1), K) # kdtree_result[no_test] = [x[0].data.pos for x in smallest] # time_kdtree_end = time.time() # time_kdtree.append(time_kdtree_end - time_kdtree_start) # time_kdtree_tot += time_kdtree[-1] # print kdtree_result # print kdtree_result-indices print 'brute force:', time_brute_tot print 'sklearn KDTree', time_sk_kd_tot print 'sklearn BallTree', time_sk_ball_tot print 'approximate annoy', time_annoy_tot print 'approximate flann', time_flann_tot print 'kdtree (deprecated)', time_kdtree_tot
def build_index(self, X): flann = FLANN() params = flann.build_index(X) return flann