Beispiel #1
0
class ground_truth_classifier(object):

    def __init__(self, data_file):
        self.flann = FLANN()
        attributes, names = get_data(data_file)
        self.flann.build_index(attributes, algorithm="autotuned")
        self.names = names

    def predict(self, attrs):
        attrs1, attrs2 = split_attrs(attrs)
        idx1, _ = self.flann.nn_index(attrs1)
        names1 = self.names[idx1]
        idx2, _ = self.flann.nn_index(attrs2)
        names2 = self.names[idx2]
        return (names1 == names2)

    def score(self, x, y):
        y_ = self.predict(x)
        return (np.sum(y == y_) / y.size)
Beispiel #2
0
class TFCoverage(AbstractCoverage):
    def __init__(self, model, subject_layer, distance_threshold):
        self.model = model
        self.distant_vectors = []
        self.distant_vectors_buffer = []
        self.subject_layer = subject_layer
        self.distance_threshold = distance_threshold
        self.flann = FLANN()

    def get_measure_state(self):
        s = []
        s.append(self.distant_vectors)
        s.append(self.distant_vectors_buffer)
        return s

    def set_measure_state(self, s):
        self.distant_vectors = s[0]
        self.distant_vectors_buffer = s[1]
        if len(self.distant_vectors_buffer) > _BUFFER_SIZE:
            self.build_index_and_flush_buffer()

    def reset_measure_state(self):
        self.flann.delete_index()
        self.distant_vectors = []
        self.distant_vectors_buffer = []

    def get_current_coverage(self, with_implicit_reward=False):
        return len(self.distant_vectors)

    def build_index_and_flush_buffer(self):
        self.distant_vectors_buffer = []
        self.flann.build_index(np.array(self.distant_vectors))

    def test(self, test_inputs, with_implicit_reward=False):
        pen_layer_outs = get_layer_outs_new(self.model,
                                            test_inputs)[self.subject_layer]

        for plo in pen_layer_outs:
            if len(self.distant_vectors) > 0:
                _, approx_distances = self.flann.nn_index(plo, 1)
                exact_distances = [
                    np.sum(np.square(plo - distant_vec))
                    for distant_vec in self.distant_vectors_buffer
                ]
                nearest_distance = min(exact_distances +
                                       approx_distances.tolist())
                if nearest_distance > self.distance_threshold:
                    self.distant_vectors_buffer.append(plo)
                    self.distant_vectors.append(plo)
            else:
                self.flann.build_index(plo)
                self.distant_vectors.append(plo)

        return len(self.distant_vectors), self.distant_vectors
Beispiel #3
0
def kernel_model(scaled_data, xs, ys, parms):
    """
    Estimate the value at the target grid given the exemplars, using the 
    specified kernel.
    """
    X = np.vstack((xs, ys,)).T
    x, y = np.meshgrid(parms.psf_grid[0], parms.psf_grid[1])
    T = np.vstack((x.ravel(), y.ravel())).T

    # use flann for distances and indicies
    flann = FLANN()
    p = flann.build_index(X, target_precision=parms.flann_precision,
                          log_level='info')
    inds, dists = flann.nn_index(T, parms.knn, check=p['checks'])

    # go through the grid and compute the model
    model = np.zeros(T.shape[0])
    for i in range(model.size):
        local_values = scaled_data[inds[i]]
        if parms.kernel_parms['type'] == 'gaussian':
            k = np.exp(-1. * dists[i] ** 2. / parms.kernel_parms['gamma'] ** 2.)
            model[i] = np.sum(k * local_values) / np.sum(k)

    return model.reshape(parms.psf_model_shape)
Beispiel #4
0
class ShinkkingBallApp(CanvasApp):
    def __init__(self, sbapp_list, filename, densify, sigma_noise, denoise, **args):
        CanvasApp.__init__(self, **args)
        self.sbapp_list = sbapp_list
        self.sbapp_list.append(self)

        self.window_diagonal = math.sqrt(self.sizex ** 2 + self.sizey ** 2)
        self.toplevel.title(
            "Shrink the balls [{}] - densify={}x, noise={}, denoise={} ".format(filename, densify, sigma_noise, denoise)
        )

        self.toplevel.bind("h", self.print_help)

        self.toplevel.bind("a", self.ma_auto_stepper)
        self.toplevel.bind("b", self.draw_all_balls)
        self.toplevel.bind("t", self.toggle_inout)
        self.toplevel.bind("h", self.toggle_ma_stage_geom)

        self.inner_mode = True
        self.draw_stage_geom_mode = "normal"

        self.toplevel.bind("i", self.draw_topo)
        self.toplevel.bind("o", self.draw_topo)
        self.toplevel.bind("u", self.draw_topo)
        self.toplevel.bind("p", self.draw_topo)

        self.toplevel.bind("z", self.spawn_mapperapp)
        self.toplevel.bind("f", self.spawn_filterapp)
        self.toplevel.bind("s", self.spawn_shrinkhistapp)

        self.toplevel.bind("1", self.draw_normal_map_lfs)
        self.toplevel.bind("2", self.draw_normal_map_theta)
        self.toplevel.bind("3", self.draw_normal_map_lam)
        self.toplevel.bind("4", self.draw_normal_map_radii)
        self.toplevel.bind("`", self.draw_normal_map_clear)

        self.toplevel.bind("c", self.clear_overlays)
        self.canvas.pack()

        self.toplevel.bind("<Motion>", self.draw_closest_ball)
        self.toplevel.bind("<Key>", self.ma_step)
        self.toplevel.bind("<ButtonRelease>", self.ma_step)
        self.coordstext = self.canvas.create_text(self.sizex, self.sizey, anchor="se", text="")
        self.ball_info_text = self.canvas.create_text(10, self.sizey, anchor="sw", text="")

        self.stage_cache = {1: [], 2: [], 3: []}
        self.topo_cache = []
        self.highlight_point_cache = []
        self.highlight_cache = []
        self.poly_cache = []
        self.normalmap_cache = []

        self.mapper_window = None
        self.plotter_window = None
        self.shrinkhist_window = None

        self.kdtree = FLANN()

    def toggle_ma_stage_geom(self, event):
        if self.draw_stage_geom_mode == "normal":
            self.draw_stage_geom_mode = "dontclear"
        else:
            self.draw_stage_geom_mode = "normal"

    def spawn_shrinkhistapp(self, event):
        self.ma_ensure_complete()
        self.shrinkhist_window = ShrinkHistApp(self)

    def spawn_mapperapp(self, event):
        self.ma_ensure_complete()
        self.mapper_window = MapperApp(self)

    def spawn_filterapp(self, event):
        self.ma_ensure_complete()
        self.plot_window = FilterApp(self)

    def update_mouse_coords(self, event):
        self.mouse_x = event.x
        self.mouse_y = event.y

    def toggle_inout(self, event):
        self.inner_mode = not self.inner_mode

    def print_help(self, event):
        print HELP

    def bind_ma(self, ma, draw_poly=True):
        self.ma = ma
        self.ma_inner = True
        self.ma_complete = False
        self.ma_gen = ma.compute_balls(inner=self.ma_inner)
        minx = ma.D["coords"][:, 0].min()
        miny = ma.D["coords"][:, 1].min()
        maxx = ma.D["coords"][:, 0].max()
        maxy = ma.D["coords"][:, 1].max()

        self.set_transform(minx, maxx, miny, maxy)
        self.normal_scale = 0.02 * (self.window_diagonal / self.scale)

        if draw_poly:
            self.draw.polygon(ma.D["coords"], fill="#eeeeee")
        for p, n in zip(ma.D["coords"], ma.D["normals"]):
            self.draw.normal(p, n, s=self.normal_scale, fill="#888888", width=1)

        self.kdtree.build_index(self.ma.D["coords"], algorithm="linear")
        # self.kdtree = KDTree(self.ma.D['coords'])

        self.print_help(None)

        self.canvas.update_idletasks()

    def ma_ensure_complete(self):
        while self.ma_complete == False:
            self.ma_auto_stepper(None)

    def ma_auto_stepper(self, event):
        self.ma_stepper(mode="auto_step")

    def ma_step(self, event):
        self.ma_stepper(mode="onestep")

    def ma_stepper(self, mode):
        def step_and_draw():
            d = self.ma_gen.next()
            self.ma_draw_stage(d)

        try:
            if mode == "onestep":
                step_and_draw()
            elif mode == "auto_step":
                while True:
                    step_and_draw()
        except StopIteration:
            if not self.ma_inner:
                self.ma.compute_lfs()
                self.ma.compute_lam()
                self.ma.compute_theta()
                self.ma.compute_lam(inner="out")
                self.ma.compute_theta(inner="out")
                self.ma_complete = True
            self.ma_inner = not self.ma_inner
            self.ma_gen = self.ma.compute_balls(self.ma_inner)

    def ma_draw_stage(self, d):
        if d["stage"] == 1:
            try:
                self.stage_cache[2].remove(self.stage_cache[2][2])
            except IndexError:
                pass

            self.deleteCache([1, 2, 3])
            p, n = d["geom"]
            l = self.window_diagonal  # line length - depends on windows size
            i = self.draw.point(p[0], p[1], size=8, fill="red", outline="")
            j = self.draw.edge(
                (p[0] + n[0] * l, p[1] + n[1] * l),
                (p[0] - n[0] * l, p[1] - n[1] * l),
                width=1,
                fill="blue",
                dash=(4, 2),
            )
            self.stage_cache[1] = [i, j]
            self.canvas.itemconfig(self.coordstext, text=d["msg"])

        elif d["stage"] == 2:
            if self.draw_stage_geom_mode == "normal":
                self.draw.deleteItems(self.stage_cache[2])
            q, c, r = d["geom"]
            i = self.draw.point(q[0], q[1], size=4, fill="blue", outline="")
            j = self.draw.point(c[0], c[1], size=r * self.scale, fill="", outline="blue")
            k = self.draw.point(c[0], c[1], size=2, fill="blue", outline="")
            self.stage_cache[2] = [i, j, k]
            self.canvas.itemconfig(self.coordstext, text=d["msg"])

    def draw_highlight_points(self, key, val, how, inner="in"):
        self.draw.deleteItems(self.highlight_cache)
        for m, v in zip(self.ma.D["ma_coords_" + inner], self.ma.D[key]):
            if not np.isnan(v):
                if how == "greater" and v > val:
                    i = self.draw.point(m[0], m[1], size=4, fill="", outline="red", width=2)
                    self.highlight_cache.append(i)
                elif how == "smaller" and v < val:
                    i = self.draw.point(m[0], m[1], size=4, fill="", outline="red", width=2)
                    self.highlight_cache.append(i)
                elif how == "equal" and v == val:
                    i = self.draw.point(m[0], m[1], size=4, fill="", outline="red", width=2)
                    self.highlight_cache.append(i)

    def draw_topo(self, event):
        if event.char in ["i", "u"]:
            inner = "in"
        elif event.char in ["o", "p"]:
            inner = "out"

        if event.char in ["p", "u"]:
            project = True
        else:
            project = False

        self.draw.deleteItems(self.topo_cache)
        self.ma.construct_topo_2d(inner, project)

        for start, end in self.ma.D["ma_linepieces_" + inner]:
            s_e = self.ma.D["ma_coords_" + inner][start]
            e_e = self.ma.D["ma_coords_" + inner][end]
            i = self.draw.edge(s_e, e_e, fill="blue", width=1)
            self.topo_cache.append(i)

    def draw_all_balls(self, event):
        self.draw.deleteItems(self.highlight_cache)
        for p_i in xrange(self.ma.m):
            self.draw_medial_ball(p_i, with_points=False)

    def draw_closest_ball(self, event):
        # x,y = self.t_(self.mouse_x, self.mouse_y)
        x, y = self.t_(event.x, event.y)
        q = np.array([x, y])
        p_i = self.kdtree.nn_index(q, 1)[0][0]
        # p_i = self.kdtree.query(np.array([q]),1)[1][0]

        for sbapp in self.sbapp_list:
            sbapp.highlight_single_ball(p_i)

    def highlight_single_ball(self, p_i):
        if self.inner_mode:
            inner = "in"
        else:
            inner = "out"

        # plot the shrink history of this ball:
        if self.shrinkhist_window is not None:
            self.shrinkhist_window.update_plot(p_i, inner)

        def get_ball_info_text(p_i):
            if not self.ma.D.has_key("lfs"):
                return ""
            return "lfs\t{0:.2f}\nr\t{2:.2f}\nlambda\t{1:.2f}\ntheta\t{3:.2f} ({4:.2f} deg)\nk\t{5}\nplanar\t{6:.2f} deg".format(
                self.ma.D["lfs"][p_i],
                self.ma.D["lam_" + inner][p_i],
                self.ma.D["ma_radii_" + inner][p_i],
                self.ma.D["theta_" + inner][p_i],
                (180 / math.pi) * math.acos(self.ma.D["theta_" + inner][p_i]),
                len(self.ma.D["ma_shrinkhist_" + inner][p_i]),
                (90 / math.pi) * (math.pi - math.acos(self.ma.D["theta_" + inner][p_i])),
            )

        self.draw.deleteItems(self.highlight_point_cache)
        self.draw_medial_ball(p_i)
        self.draw_lfs_ball(p_i)

        self.canvas.itemconfig(self.ball_info_text, text=get_ball_info_text(p_i))

    def draw_medial_ball(self, p_i, with_points=True):
        inner = "out"
        if self.inner_mode:
            inner = "in"

        p1x, p1y = self.ma.D["coords"][p_i][0], self.ma.D["coords"][p_i][1]
        ma_px, ma_py = self.ma.D["ma_coords_" + inner][p_i][0], self.ma.D["ma_coords_" + inner][p_i][1]

        if not np.isnan(ma_px):
            p2x, p2y = (
                self.ma.D["coords"][self.ma.D["ma_f2_" + inner][p_i]][0],
                self.ma.D["coords"][self.ma.D["ma_f2_" + inner][p_i]][1],
            )
            r = self.ma.D["ma_radii_" + inner][p_i]

            ball = self.draw.point(ma_px, ma_py, size=r * self.scale, width=1, fill="", outline="red", dash=(4, 2, 1))
            if with_points:
                self.highlight_point_cache.append(self.draw.point(p1x, p1y, size=4, fill="", outline="red", width=2))
                self.highlight_point_cache.append(self.draw.point(p2x, p2y, size=4, fill="", outline="purple", width=2))
                self.highlight_point_cache.append(
                    self.draw.point(ma_px, ma_py, size=4, fill="", outline="blue", dash=(1), width=2)
                )
                self.highlight_point_cache.append(ball)
            else:
                self.highlight_cache.append(ball)

    def draw_closest_lfs_ball(self, event):
        # self.draw.deleteItems(self.highlight_cache)

        x, y = self.t_(event.x, event.y)
        q = np.array([x, y])
        p_i = self.kdtree.nn_index(q, 1)[0][0]
        # p_i = self.kdtree.query(np.array([q]),1)[1][0]

        self.draw_lfs_ball(p_i)

    def draw_lfs_ball(self, p_i):
        if self.ma.D.has_key("lfs"):
            p1x, p1y = self.ma.D["coords"][p_i][0], self.ma.D["coords"][p_i][1]
            lfs = self.ma.D["lfs"][p_i]
            if not np.isnan(lfs):
                self.highlight_point_cache.append(
                    self.draw.point(p1x, p1y, size=lfs * self.scale, fill="", outline="#888888", dash=(2, 1))
                )

    def draw_decimate_lfs(self, epsilon):
        self.ma.decimate_lfs(epsilon)

        dropped, total = np.count_nonzero(self.ma.D["decimate_lfs"]), self.ma.m
        print "LFS decimation e={}: {} from {} points are dropped ({:.2f}%)".format(
            epsilon, dropped, total, float(dropped) / total * 100
        )

        self.draw.deleteItems(self.poly_cache)
        i = self.draw.polygon_alternating_edge(self.ma.D["coords"][np.invert(self.ma.D["decimate_lfs"])], width=3)
        self.poly_cache.extend(i)

    def draw_decimate_ballco(self, xi, k):
        self.ma.decimate_ballco(xi, k)

        dropped, total = np.count_nonzero(self.ma.D["decimate_ballco"]), self.ma.m
        print "BALLCO decimation xi={}, k={}: {} from {} points are dropped ({:.2f}%)".format(
            xi, k, dropped, total, float(dropped) / total * 100
        )

        self.draw.deleteItems(self.poly_cache)
        i = self.draw.polygon_alternating_edge(self.ma.D["coords"][np.invert(self.ma.D["decimate_ballco"])], width=3)
        self.poly_cache.extend(i)

    def draw_normal_map_lfs(self, event):
        self.draw_normal_map("lfs", 40)

    def draw_normal_map_theta(self, event):
        self.draw_normal_map("theta_in", 30)

    def draw_normal_map_lam(self, event):
        self.draw_normal_map("lam_in", 30)

    def draw_normal_map_radii(self, event):
        self.draw_normal_map("ma_radii_in", 30)

    def draw_normal_map_clear(self, event):
        self.draw.deleteItems(self.normalmap_cache)

    def draw_normal_map(self, key, scale=30):
        self.draw.deleteItems(self.normalmap_cache)
        max_val = np.nanmax(self.ma.D[key])
        for p, p_n, val in zip(self.ma.D["coords"], self.ma.D["normals"], self.ma.D[key]):
            s = scale * (val / max_val)
            i = self.draw.normal(p, p_n, s=s, width=2, fill="red")
            self.normalmap_cache.append(i)

    def clear_overlays(self, event):
        self.draw.deleteItems(self.topo_cache)
        self.draw.deleteItems(self.highlight_cache)
        self.draw.deleteItems(self.poly_cache)

    def deleteCache(self, stages):
        for s in stages:
            self.draw.deleteItems(self.stage_cache[s])
class W2VAverageEmbedding():
    def __init__(self, embedding_file, tokenize=tokenize):
        self.word2vec_file = embedding_file
        self.word2vec = KeyedVectors.load_word2vec_format(self.word2vec_file,
                                                          binary=True)
        self.embedding_dim = self.word2vec.vector_size
        self.tokenize = tokenize
        self.sentence_list = []
        self.sentence_list_tokenized = []
        self.sentence_embedding = np.array([])
        self.flann = FLANN()

    def _average_bow(self, sentence):
        vs = np.zeros(self.embedding_dim)
        sentence_length = 0

        for word in sentence:
            try:
                vs = np.add(vs, self.word2vec[word])
                sentence_length += 1
            except Exception:
                pass
                # print(f"Embedding Vector: {word} not found")

        if sentence_length != 0:
            vs = np.divide(vs, sentence_length)

        return vs

    def fit(self, sentence_list):
        for sentence in sentence_list:
            self.sentence_list.append(sentence)
            self.sentence_list_tokenized.append(self.tokenize(sentence))

        # Alg.1 step 1
        sentence_vec = []
        for sentence in self.sentence_list_tokenized:
            sentence_vec.append(self._average_bow(sentence))

        self.sentence_embedding = np.array(sentence_vec)

        # make index for similarity search
        self.flann.build_index(self.sentence_embedding)

    def infer_vector(self, sentence):
        return self._average_bow(self.tokenize(sentence))

    def predict(self, sentence, topn=1):
        vs = self.infer_vector(sentence)
        result, dists = self.flann.nn_index(vs, num_neighbors=topn)

        if topn != 1:
            result = result[0]
            dists = dists[0]

        output = []
        for i, index in enumerate(result.tolist()):
            text = self.sentence_list[index]
            sim = dists[i]
            output.append([text, sim])
        return output
Beispiel #6
0
class FastDictionary(object):
    def __init__(self, maxlen, seed=0, cores=4, trees=1):
        self.flann = FLANN(
            algorithm='kdtree',
            random_seed=seed,
            cores=cores,
            trees=trees,
        )

        self.counter = 0

        self.contents_lookup = {}  #{oid: (e,q)}
        self.p_queue = collections.deque(
        )  #priority queue contains; list of (priotiry_value,oid)
        self.maxlen = maxlen

    def save(self, dir, fname, it=None):
        fname = f'{fname}' if it is None else f'{fname}-{it}'

        with open(os.path.join(dir, fname), 'wb') as f:
            pickle.dump((self.contents_lookup, self.p_queue, self.maxlen), f)

    def restore(self, fname):
        with open(fname, 'rb') as f:
            _contents_lookup, _p_queue, maxlen = pickle.load(f)

            assert self.maxlen == maxlen, (self.maxlen, maxlen)

        new_oid_lookup = {}
        E, Q = [], []
        for oid, (e, q) in _contents_lookup.items():
            E.append(e)
            Q.append(q)

            new_oid, self.counter = self.counter, self.counter + 1
            new_oid_lookup[oid] = new_oid

        E = np.array(E)

        # Rebuild KD-Tree
        self.flann.build_index(E)

        # Reallocate contents_lookup
        for new_oid, (e, q) in enumerate(zip(E, Q)):
            assert e.base is E
            self.contents_lookup[new_oid] = (e, q)

        # Rebuild Heap
        while len(_p_queue) > 0:
            oid = _p_queue.popleft()

            if not oid in new_oid_lookup:
                continue
            self.p_queue.append(new_oid_lookup[oid])

    def add(self, E, Contents):
        assert not np.isnan(E).any(), ('NaN Detected in Add',
                                       np.argwhere(np.isnan(E)))
        assert len(E) == len(Contents)
        assert E.ndim == 2 and E.shape[1] == 64, E.shape

        if self.counter == 0:
            self.flann.build_index(E)
        else:
            self.flann.add_points(E)
        Oid, self.counter = np.arange(self.counter,
                                      self.counter + len(E),
                                      dtype=np.uint32), self.counter + len(E)

        for oid, e, content in zip(Oid, E, Contents):
            assert e.base is E or e.base is E.base

            self.contents_lookup[oid] = (e, content)
            self.p_queue.append(oid)

            if len(self.contents_lookup) > self.maxlen:
                while not self.p_queue[0] in self.contents_lookup:
                    self.p_queue.popleft(
                    )  #invalidated items due to update, so just pop.

                old_oid = self.p_queue.popleft()

                ret = self.flann.remove_point(old_oid)
                if ret <= 0:
                    raise Exception(f'remove point error {ret}')
                del self.contents_lookup[old_oid]

    def update(self, Oid, E, Contents):
        """
        Basically, same this is remove & add.
        This code only manages a heap more effectively; since delete an item in the middle of heap is not trivial!)
        """
        assert not np.isnan(E).any(), ('NaN Detected in Updating',
                                       np.argwhere(np.isnan(E)))
        assert len(np.unique(Oid)) == len(Oid)
        assert E.ndim == 2 and E.shape[1] == 64, E.shape

        # add new Embeddings
        self.flann.add_points(E)
        NewOid, self.counter = np.arange(
            self.counter, self.counter + len(E),
            dtype=np.uint32), self.counter + len(E)

        for oid, new_oid, e, content in zip(Oid, NewOid, E, Contents):
            assert e.base is E or e.base is E.base

            self.contents_lookup[new_oid] = (e, content)
            self.p_queue.append(new_oid)

            # delete from kd-tree
            ret = self.flann.remove_point(oid)
            if ret <= 0:
                raise Exception(f'remove point error {ret}')
            # delete from contents_lookup
            del self.contents_lookup[oid]
            # I cannot remove from p_queue, but it will be handeled in add op.

    def query_knn(self, E, K=100):
        assert not np.isnan(E).any(), ('NaN Detected in Querying',
                                       np.argwhere(np.isnan(E)))

        flatten = False
        if E.ndim == 1:
            E = E[None]
            flatten = True

        Oids, Dists, C = self.flann.nn_index(E, num_neighbors=K)

        if C != len(E) * K:
            print(
                f'Not enough neighbors ({np.count_nonzero(Dists>=0.)} == {C}) != {len(E)}*{K}, rebuild and try again...'
            )
            self.flann.rebuild_index()
            Oids, Dists, C = self.flann.nn_index(E, num_neighbors=K)

        # TODO: Hmm. Dists sometimes becomes NaN
        #assert np.count_nonzero(np.isnan(Dists)) == 0, 'pyflann returned a NaN for a distance'
        if np.count_nonzero(np.isnan(Dists)) > 0:
            print('warning: NaN Returned as a distance')
            Dists = np.nan_to_num(Dists, copy=False)

        NN_E = np.zeros((len(E), K, E.shape[1]), np.float32)
        NN_Q = np.zeros((len(E), K), np.float32)
        Len = np.count_nonzero(Dists >= 0., axis=1)

        assert np.sum(Len) == C, f'{np.sum(Len)} != {C}'
        assert C > 0, 'Nothing returned...'

        for b, oids in enumerate(Oids):
            for k, oid in enumerate(
                    oids[:Len[b]]):  #drop if not enough NN retrieved.
                e, q = self.contents_lookup[oid]

                NN_E[b, k] = e
                NN_Q[b, k] = q

        if flatten:
            return Oids[0][:Len[0]], NN_E[0][:Len[0]], NN_Q[0][:Len[0]]
        else:
            return Oids, NN_E, NN_Q, Len
Beispiel #7
0
class dcelVis(Tk):
    def __init__(self, dcel):
        Tk.__init__(self)
        self.sizex = 700
        self.sizey = 700
        self.window_diagonal = math.sqrt(self.sizex**2 + self.sizey**2)
        self.title("DCELvis")
        self.resizable(0,0)

        self.bind('q', self.exit)
        self.bind('h', self.print_help)
        self.bind('p', self.print_dcel)

        self.bind('e', self.iteratehedge)
        self.bind('v', self.iteratevertex)
        self.bind('f', self.iterateface)
        self.canvas = Canvas(self, bg="white", width=self.sizex, height=self.sizey)
        self.canvas.pack()

        if WITH_FLANN:
            self.bind("<ButtonRelease>", self.remove_closest)
            self.bind("<Motion>", self.report_closest)

        self.coordstext = self.canvas.create_text(self.sizex, self.sizey, anchor='se', text='')
        self.info_text = self.canvas.create_text(10, self.sizey, anchor='sw', text='')
        
        self.tx = 0
        self.ty = 0

        self.highlight_cache = []
        self.bgdcel_cache = []

        self.draw = draw(self)

        if WITH_FLANN:
            self.kdtree = FLANN()
        self.D = None
        self.bind_dcel(dcel)
        self.print_help(None)

    def t(self, x, y):
        """transform data coordinates to screen coordinates"""
        x = (x * self.scale) + self.tx
        y = self.sizey - ((y * self.scale) + self.ty)
        return (x,y)

    def t_(self, x, y):
        """transform screen coordinates to data coordinates"""
        x = (x - self.tx)/self.scale
        y = (self.sizey - y - self.ty)/self.scale
        return (x,y)

    def print_help(self, event):
        print(HELP)

    def print_dcel(self, event):
        print(self.D)

    def bind_dcel(self, dcel):
        minx = maxx = dcel.vertexList[0].x
        miny = maxy = dcel.vertexList[0].y
        for v in dcel.vertexList[1:]:
            if v.x < minx:
                minx = v.x
            if v.y < miny:
                miny = v.y
            if v.x > maxx:
                maxx = v.x
            if v.y > maxy:
                maxy = v.y

        d_x = maxx-minx
        d_y = maxy-miny
        c_x = minx + (d_x)/2
        c_y = miny + (d_y)/2

        if d_x > d_y:
            self.scale = (self.sizex*0.8) / d_x
        else:
            self.scale = (self.sizey*0.8) / d_y

        self.tx = self.sizex/2 - c_x*self.scale
        self.ty = self.sizey/2 - c_y*self.scale

        self.D = dcel

        self.draw_dcel()

    def draw_dcel(self):
        self.draw.deleteItems(self.bgdcel_cache)
        self.draw_dcel_faces()
        self.draw_dcel_hedges()
        self.draw_dcel_vertices()
        
        self.hedge_it = self.type_iterator('hedge')
        self.face_it = self.type_iterator('face')
        self.vertex_it = self.type_iterator('vertex')

    def getClosestVertex(self, screenx, screeny):
        vertices = [np.array([v.x,v.y]) for v in self.D.vertexList]
        self.kdtree.build_index(np.array(vertices), algorithm='linear')

        x,y = self.t_(screenx, screeny)
        q = np.array([x,y])
        v_i = self.kdtree.nn_index(q,1)[0][0]

        return self.D.vertexList[v_i]

    def remove_closest(self, event):
        v = self.getClosestVertex(event.x, event.y)
        self.D.remove_vertex( v )
        self.draw_dcel()

    def report_closest(self, event):
        s = str(self.getClosestVertex(event.x, event.y))
        self.canvas.itemconfig(self.info_text, text=s )

    def iteratehedge(self, event):
        try:
            self.hedge_it.next()
        except StopIteration:
            self.hedge_it = self.type_iterator('hedge')
            self.hedge_it.next()

    def iterateface(self, event):
        try:
            self.face_it.next()
        except StopIteration:
            self.face_it = self.type_iterator('face')
            self.face_it.next()

    def iteratevertex(self, event):
        try:
            self.vertex_it.next()
        except StopIteration:
            self.vertex_it = self.type_iterator('vertex')
            self.vertex_it.next()


    def type_iterator(self, q='hedge'):
        if q == 'hedge':
            for e in self.D.hedgeList:
                yield self.explain_hedge(e)
        elif q == 'face':
            for e in self.D.faceList:
                yield self.explain_face(e)
        elif q == 'vertex':
            for e in self.D.vertexList:
                yield self.explain_vertex(e)

    def explain_hedge(self, e):
        print(e)
        self.draw.deleteItems(self.highlight_cache)

        i1 = self.draw_dcel_face(e.incidentFace, fill='#ffc0bf', outline='')
        i4 = self.draw_dcel_vertex(e.origin, size=7, fill='red', outline='')
        i2 = self.draw_dcel_hedge(e.next, arrow=LAST, arrowshape=(7,6,2), width=2, fill='#1a740c')
        i3 = self.draw_dcel_hedge(e.previous, arrow=LAST, arrowshape=(7,6,2), width=2, fill='#0d4174')
        i5 = self.draw_dcel_hedge(e, arrow=LAST, arrowshape=(7,6,2), width=3, fill='red')
        i6 = self.draw_dcel_hedge(e.twin, arrow=LAST, arrowshape=(7,6,2), width=3, fill='orange')

        self.highlight_cache = [i1,i2,i3,i4,i5,i6]

    def explain_vertex(self, v):
        print(v)
        self.draw.deleteItems(self.highlight_cache)

        i1 = self.draw_dcel_vertex(v, size=7, fill='red', outline='')
        i2 = self.draw_dcel_hedge(v.incidentEdge, arrow=LAST, arrowshape=(7,6,2), width=2, fill='red')

        self.highlight_cache = [i1,i2]

    def explain_face(self, f):
        print(f)
        self.draw.deleteItems(self.highlight_cache)

        i1 = self.draw_dcel_face(f, fill='#ffc0bf', outline='')
        i2 = self.draw_dcel_hedge(f.outerComponent, arrow=LAST, arrowshape=(7,6,2), width=3, fill='red')

        self.highlight_cache = [i1,i2]

    def draw_dcel_vertices(self):
        for v in self.D.vertexList:
            self.bgdcel_cache.append(self.draw_dcel_vertex(v))

    def draw_dcel_vertex(self, v, **options):
        if options == {}:
            options = {'size':5, 'fill':'blue', 'outline':''}
        
        return self.draw.point(v.x, v.y, **options)

    def draw_dcel_hedges(self):
        for e in self.D.hedgeList:
            self.bgdcel_cache.append(self.draw_dcel_hedge(e))

    def draw_dcel_hedge(self, e, **options):
        if options == {}:
            options = {'arrow':LAST, 'arrowshape':(7,6,2), 'fill': '#444444'}

        offset = .02
        sx,sy = e.origin.x, e.origin.y
        ex,ey = e.twin.origin.x, e.twin.origin.y
        vx,vy = ex - sx, ey - sy
        v = vec2(vx, vy)
        v_ = v.orthogonal_l()*offset

        v = v - v.normalized()*.25
        ex, ey = sx+v.x, sy+v.y
        
        return self.draw.edge( (sx+v_.x, sy+v_.y), (ex+v_.x, ey+v_.y) , **options)

    def draw_dcel_faces(self):
        for f in self.D.faceList:
            self.bgdcel_cache.append(self.draw_dcel_face(f))

    def draw_dcel_face(self, f, **options):
        if f == self.D.infiniteFace:
            print('Im not drawing infiniteFace')
            return

        if options == {}:
            options = {'fill':'#eeeeee', 'outline':''}
        
        vlist = [ (v.x, v.y) for v in f.loopOuterVertices() ]
        return self.draw.polygon(vlist, **options)

    def find_closest(self, event):
        x = self.canvas.canvasx(event.x)
        y = self.canvas.canvasy(event.y)
        # print event.x, event.y
        # print x,y
        print(self.canvas.find_closest(x, y))

    def exit(self, event):
        print("bye bye.")
        self.quit()
        self.destroy()
Beispiel #8
0
def full_displacement(shap,
                      supp,
                      t,
                      pol_en=False,
                      cent=None,
                      theta_param=1,
                      pol_mod=False,
                      coord_map=None,
                      knn=None,
                      eps=1.e-16):
    """Computes all quantities required to compute displacement interpolation at steps ``t``.
    
    Calls:
    
    * :func:`utils.polar_coord_cloud`
    """
    from numpy import ones, zeros, copy, array, pi, int, transpose, diag
    from utils import polar_coord_cloud
    from pyflann import FLANN

    if coord_map is None:
        coord_map = zeros((shap[0], shap[1], 2))
        coord_map[:, :, 0] = arange(0, shap[0]).reshape(
            (shap[0], 1)).dot(ones((1, shap[1])))
        coord_map[:, :, 1] = ones(
            (shap[0], 1)).dot(arange(0, shap[1]).reshape((1, shap[1])))
        if pol_en:
            if cent is None:
                cent = array([shap[0] / 2, shap[1] / 2])
            cloud_in = zeros((2, shap[0] * shap[1]))
            cloud_in[0, :] = copy(coord_map[:, :, 0].reshape(
                (shap[0] * shap[1], )))
            cloud_in[1, :] = copy(coord_map[:, :, 1].reshape(
                (shap[0] * shap[1], )))
            cloud_out = polar_coord_cloud(cloud_in, cent)
            coord_map[:, :, 0] = cloud_out[0, :].reshape((shap[0], shap[1]))
            coord_map[:, :, 1] = theta_param * cloud_out[1, :].reshape(
                (shap[0], shap[1])) / (2 * pi)
            if pol_mod:
                coord_map[:, :, 1] *= coord_map[:, :, 0]
        knn = FLANN()
        cloud_in = zeros((shap[0] * shap[1], 2))
        cloud_in[:, 0] = copy(coord_map[:, :, 0].reshape(
            (shap[0] * shap[1], )))
        cloud_in[:, 1] = copy(coord_map[:, :, 1].reshape(
            (shap[0] * shap[1], )))
        params = knn.build_index(array(cloud_in, dtype=float64))

    advection_points = zeros((supp.shape[0], 2, size(t)))

    for i in range(0, supp.shape[0]):
        # Matching coordinates
        pos1_i = int(supp[i, 0] / (shap[0]))
        pos1_j = int(supp[i, 0] % (shap[0]))
        pos2_i = int(supp[i, 1] / (shap[0]))
        pos2_j = int(supp[i, 1] % (shap[0]))

        if size(t) == 1:
            advection_points[i, 0, 0] = (1 - t) * coord_map[
                pos1_i, pos1_j, 0] + t * coord_map[pos2_i, pos2_j, 0]
            advection_points[i, 1, 0] = (1 - t) * coord_map[
                pos1_i, pos1_j, 1] + t * coord_map[pos2_i, pos2_j, 1]
        else:
            for j in range(0, size(t)):
                advection_points[i, 0, j] = (1 - t[j]) * coord_map[
                    pos1_i, pos1_j, 0] + t[j] * coord_map[pos2_i, pos2_j, 0]
                advection_points[i, 1, j] = (1 - t[j]) * coord_map[
                    pos1_i, pos1_j, 1] + t[j] * coord_map[pos2_i, pos2_j, 1]

    neighbors_graph = zeros((supp.shape[0], 4, size(t)))
    neighbors_graph = zeros((supp.shape[0], 2, 4, size(t)))
    weights_neighbors = zeros((supp.shape[0], 4, size(t)))

    if size(t) == 1:
        neighbors_graph_temp, dist_neighbors = knn.nn_index(
            advection_points[:, :, 0], 4)
        neighbors_graph[:, 0, :, 0] = neighbors_graph_temp / shap[0]
        neighbors_graph[:, 1, :, 0] = neighbors_graph_temp % shap[0]
        inv_dist = (dist_neighbors + eps)**(-1)
        weights_neighbors[:, :, 0] = inv_dist / (inv_dist.sum(axis=1).reshape(
            (supp.shape[0], 1)).dot(ones((1, 4))))
    else:
        for j in range(0, size(t)):
            print "Wavelength ", j + 1, "/", size(t)
            neighbors_graph_temp, dist_neighbors = knn.nn_index(
                advection_points[:, :, j], 4)
            neighbors_graph[:, 0, :, j] = neighbors_graph_temp / shap[0]
            neighbors_graph[:, 1, :, j] = neighbors_graph_temp % shap[0]
            inv_dist = (dist_neighbors + eps)**(-1)
            weights_neighbors[:, :,
                              j] = inv_dist / (inv_dist.sum(axis=1).reshape(
                                  (supp.shape[0], 1)).dot(ones((1, 4))))
    gc.collect()

    return neighbors_graph.astype(int), weights_neighbors, cent, coord_map, knn
Beispiel #9
0
class DND:
    def __init__(self, kernel, num_neighbors, max_memory, lr):
        self.kernel = kernel
        self.num_neighbors = num_neighbors
        self.max_memory = max_memory
        self.lr = lr
        self.keys = None
        self.values = None
        self.kdtree = FLANN()
    
        # key_cache stores a cache of all keys that exist in the DND
        # This makes DND updates efficient
        self.key_cache = {}
        # stale_index is a flag that indicates whether or not the index in self.kdtree is stale
        # This allows us to only rebuild the kdtree index when necessary
        self.stale_index = True
        # indexes_to_be_updated is the set of indexes to be updated on a call to update_params
        # This allows us to rebuild only the keys of key_cache that need to be rebuilt when necessary
        self.indexes_to_be_updated = set()
    
        # Keys and value to be inserted into self.keys and self.values when commit_insert is called
        self.keys_to_be_inserted = None
        self.values_to_be_inserted = None
    
        # Move recently used lookup indexes
        # These should be moved to the back of self.keys and self.values to get LRU property
        self.move_to_back = set()

    def get_index(self, key):
        """
      If key exists in the DND, return its index
      Otherwise, return None
        """
        if self.key_cache.get(tuple(key.data.cpu().numpy()[0])) is not None:
            if self.stale_index:
                self.commit_insert()
            return int(self.kdtree.nn_index(key.data.cpu().numpy(), 1)[0][0])
        else:
            return None
  
    def update(self, value, index):
        """
      Set self.values[index] = value
        """
        values = self.values.data
        values[index] = value[0].data
        self.values = Parameter(values)
        self.optimizer = optim.RMSprop([self.keys, self.values], lr=self.lr)
  
    def insert(self, key, value):
        """
      Insert key, value pair into DND
        """
        if self.keys_to_be_inserted is None:
          # Initial insert
            self.keys_to_be_inserted = key.data
            self.values_to_be_inserted = value.data
        else:
            self.keys_to_be_inserted = torch.cat(
              [self.keys_to_be_inserted, key.data], 0)
            self.values_to_be_inserted = torch.cat(
              [self.values_to_be_inserted, value.data], 0)
        self.key_cache[tuple(key.data.cpu().numpy()[0])] = 0
        self.stale_index = True
  
    def commit_insert(self):
        if self.keys is None or len(self.keys)==0:
            self.keys = Parameter(self.keys_to_be_inserted)
            self.values = Parameter(self.values_to_be_inserted)
        elif self.keys_to_be_inserted is not None:
            #print(self.keys.data,'...')
            #print(self.keys_to_be_inserted)
            self.keys = Parameter(
              torch.cat([self.keys.data, self.keys_to_be_inserted], 0))
            self.values = Parameter(
              torch.cat([self.values.data, self.values_to_be_inserted], 0))
    
        # Move most recently used key-value pairs to the back
        if len(self.move_to_back) != 0:
            self.keys = Parameter(torch.cat([self.keys.data[list(set(range(len(
              self.keys))) - self.move_to_back)], self.keys.data[list(self.move_to_back)]], 0))
            self.values = Parameter(torch.cat([self.values.data[list(set(range(len(
              self.values))) - self.move_to_back)], self.values.data[list(self.move_to_back)]], 0))
            self.move_to_back = set()
    
        if len(self.keys) > self.max_memory:
          # Expel oldest key to maintain total memory
            for key in self.keys[:-self.max_memory]:
                del self.key_cache[tuple(key.data.cpu().numpy())]
            self.keys = Parameter(self.keys[-self.max_memory:].data)
            self.values = Parameter(self.values[-self.max_memory:].data)
        self.keys_to_be_inserted = None
        self.values_to_be_inserted = None
        self.optimizer = optim.RMSprop([self.keys, self.values], lr=self.lr)
        if self.keys.data.cpu().numpy()!=[]:
            self.kdtree.build_index(self.keys.data.cpu().numpy())
        self.stale_index = False
  
    def lookup(self, lookup_key, update_flag=False):
        """
      Perform DND lookup
      If update_flag == True, add the nearest neighbor indexes to self.indexes_to_be_updated
        """
        lookup_indexes = self.kdtree.nn_index(
          lookup_key.data.cpu().numpy(), min(self.num_neighbors, len(self.keys)))[0][0]
        output = 0
        kernel_sum = 0
        for i, index in enumerate(lookup_indexes):
            if i == 0 and self.key_cache.get(tuple(lookup_key[0].data.cpu().numpy())) is not None:
          # If a key exactly equal to lookup_key is used in the DND lookup calculation
          # then the loss becomes non-differentiable. Just skip this case to avoid the issue.
                continue
            if update_flag:
                self.indexes_to_be_updated.add(int(index))
            else:
                self.move_to_back.add(int(index))
            kernel_val = self.kernel(self.keys[int(index)], lookup_key[0])
            output += kernel_val * self.values[int(index)]
            kernel_sum += kernel_val
        output = output / kernel_sum
        return output
  
    def update_params(self):
        """
      Update self.keys and self.values via backprop
      Use self.indexes_to_be_updated to update self.key_cache accordingly and rebuild the index of self.kdtree
        """
        for index in self.indexes_to_be_updated:
            del self.key_cache[tuple(self.keys[index].data.cpu().numpy())]
        self.optimizer.step()
        self.optimizer.zero_grad()
        for index in self.indexes_to_be_updated:
            self.key_cache[tuple(self.keys[index].data.cpu().numpy())] = 0
        self.indexes_to_be_updated = set()
        if self.keys.data.cpu().numpy()!=[]:
            self.kdtree.build_index(self.keys.data.cpu().numpy())
        self.stale_index = False
Beispiel #10
0
class dcelVis(Tk):
    def __init__(self, dcel):
        Tk.__init__(self)
        self.sizex = 700
        self.sizey = 700
        self.window_diagonal = math.sqrt(self.sizex**2 + self.sizey**2)
        self.title("DCELvis")
        self.resizable(0,0)

        self.bind('q', self.exit)
        self.bind('h', self.print_help)
        self.bind('p', self.print_dcel)

        self.bind('e', self.iteratehedge)
        self.bind('v', self.iteratevertex)
        self.bind('f', self.iterateface)
        self.canvas = Canvas(self, bg="white", width=self.sizex, height=self.sizey)
        self.canvas.pack()

        if WITH_FLANN:
            self.bind("<ButtonRelease>", self.remove_closest)
            self.bind("<Motion>", self.report_closest)

        self.coordstext = self.canvas.create_text(self.sizex, self.sizey, anchor='se', text='')
        self.info_text = self.canvas.create_text(10, self.sizey, anchor='sw', text='')
        
        self.tx = 0
        self.ty = 0

        self.highlight_cache = []
        self.bgdcel_cache = []

        self.draw = draw(self)

        if WITH_FLANN:
            self.kdtree = FLANN()
        self.D = None
        self.bind_dcel(dcel)
        self.print_help(None)

    def t(self, x, y):
        """transform data coordinates to screen coordinates"""
        x = (x * self.scale) + self.tx
        y = self.sizey - ((y * self.scale) + self.ty)
        return (x,y)

    def t_(self, x, y):
        """transform screen coordinates to data coordinates"""
        x = (x - self.tx)/self.scale
        y = (self.sizey - y - self.ty)/self.scale
        return (x,y)

    def print_help(self, event):
        print HELP

    def print_dcel(self, event):
        print self.D

    def bind_dcel(self, dcel):
        minx = maxx = dcel.vertexList[0].x
        miny = maxy = dcel.vertexList[0].y
        for v in dcel.vertexList[1:]:
            if v.x < minx:
                minx = v.x
            if v.y < miny:
                miny = v.y
            if v.x > maxx:
                maxx = v.x
            if v.y > maxy:
                maxy = v.y

        d_x = maxx-minx
        d_y = maxy-miny
        c_x = minx + (d_x)/2
        c_y = miny + (d_y)/2

        if d_x > d_y:
            self.scale = (self.sizex*0.8) / d_x
        else:
            self.scale = (self.sizey*0.8) / d_y

        self.tx = self.sizex/2 - c_x*self.scale
        self.ty = self.sizey/2 - c_y*self.scale

        self.D = dcel

        self.draw_dcel()

    def draw_dcel(self):
        self.draw.deleteItems(self.bgdcel_cache)
        self.draw_dcel_faces()
        self.draw_dcel_hedges()
        self.draw_dcel_vertices()
        
        self.hedge_it = self.type_iterator('hedge')
        self.face_it = self.type_iterator('face')
        self.vertex_it = self.type_iterator('vertex')

    def getClosestVertex(self, screenx, screeny):
        vertices = [np.array([v.x,v.y]) for v in self.D.vertexList]
        self.kdtree.build_index(np.array(vertices), algorithm='linear')

        x,y = self.t_(screenx, screeny)
        q = np.array([x,y])
        v_i = self.kdtree.nn_index(q,1)[0][0]

        return self.D.vertexList[v_i]

    def remove_closest(self, event):
        v = self.getClosestVertex(event.x, event.y)
        self.D.remove_vertex( v )
        self.draw_dcel()

    def report_closest(self, event):
        s = str(self.getClosestVertex(event.x, event.y))
        self.canvas.itemconfig(self.info_text, text=s )

    def iteratehedge(self, event):
        try:
            self.hedge_it.next()
        except StopIteration:
            self.hedge_it = self.type_iterator('hedge')
            self.hedge_it.next()

    def iterateface(self, event):
        try:
            self.face_it.next()
        except StopIteration:
            self.face_it = self.type_iterator('face')
            self.face_it.next()

    def iteratevertex(self, event):
        try:
            self.vertex_it.next()
        except StopIteration:
            self.vertex_it = self.type_iterator('vertex')
            self.vertex_it.next()


    def type_iterator(self, q='hedge'):
        if q == 'hedge':
            for e in self.D.hedgeList:
                yield self.explain_hedge(e)
        elif q == 'face':
            for e in self.D.faceList:
                yield self.explain_face(e)
        elif q == 'vertex':
            for e in self.D.vertexList:
                yield self.explain_vertex(e)

    def explain_hedge(self, e):
        print e
        self.draw.deleteItems(self.highlight_cache)

        i1 = self.draw_dcel_face(e.incidentFace, fill='#ffc0bf', outline='')
        i4 = self.draw_dcel_vertex(e.origin, size=7, fill='red', outline='')
        i2 = self.draw_dcel_hedge(e.next, arrow=LAST, arrowshape=(7,6,2), width=2, fill='#1a740c')
        i3 = self.draw_dcel_hedge(e.previous, arrow=LAST, arrowshape=(7,6,2), width=2, fill='#0d4174')
        i5 = self.draw_dcel_hedge(e, arrow=LAST, arrowshape=(7,6,2), width=3, fill='red')
        i6 = self.draw_dcel_hedge(e.twin, arrow=LAST, arrowshape=(7,6,2), width=3, fill='orange')

        self.highlight_cache = [i1,i2,i3,i4,i5,i6]

    def explain_vertex(self, v):
        print v
        self.draw.deleteItems(self.highlight_cache)

        i1 = self.draw_dcel_vertex(v, size=7, fill='red', outline='')
        i2 = self.draw_dcel_hedge(v.incidentEdge, arrow=LAST, arrowshape=(7,6,2), width=2, fill='red')

        self.highlight_cache = [i1,i2]

    def explain_face(self, f):
        print f
        self.draw.deleteItems(self.highlight_cache)

        i1 = self.draw_dcel_face(f, fill='#ffc0bf', outline='')
        i2 = self.draw_dcel_hedge(f.outerComponent, arrow=LAST, arrowshape=(7,6,2), width=3, fill='red')

        self.highlight_cache = [i1,i2]

    def draw_dcel_vertices(self):
        for v in self.D.vertexList:
            self.bgdcel_cache.append(self.draw_dcel_vertex(v))

    def draw_dcel_vertex(self, v, **options):
        if options == {}:
            options = {'size':5, 'fill':'blue', 'outline':''}
        
        return self.draw.point(v.x, v.y, **options)

    def draw_dcel_hedges(self):
        for e in self.D.hedgeList:
            self.bgdcel_cache.append(self.draw_dcel_hedge(e))

    def draw_dcel_hedge(self, e, **options):
        if options == {}:
            options = {'arrow':LAST, 'arrowshape':(7,6,2), 'fill': '#444444'}

        offset = .02
        sx,sy = e.origin.x, e.origin.y
        ex,ey = e.twin.origin.x, e.twin.origin.y
        vx,vy = ex - sx, ey - sy
        v = vec2(vx, vy)
        v_ = v.orthogonal_l()*offset

        v = v - v.normalized()*.25
        ex, ey = sx+v.x, sy+v.y
        
        return self.draw.edge( (sx+v_.x, sy+v_.y), (ex+v_.x, ey+v_.y) , **options)

    def draw_dcel_faces(self):
        for f in self.D.faceList:
            self.bgdcel_cache.append(self.draw_dcel_face(f))

    def draw_dcel_face(self, f, **options):
        if f == self.D.infiniteFace:
            print 'Im not drawing infiniteFace'
            return

        if options == {}:
            options = {'fill':'#eeeeee', 'outline':''}
        
        vlist = [ (v.x, v.y) for v in f.loopOuterVertices() ]
        return self.draw.polygon(vlist, **options)

    def find_closest(self, event):
        x = self.canvas.canvasx(event.x)
        y = self.canvas.canvasy(event.y)
        # print event.x, event.y
        # print x,y
        print self.canvas.find_closest(x, y)

    def exit(self, event):
        print "bye bye."
        self.quit()
        self.destroy()
Beispiel #11
0
    def _fit(self, X, skip_num_points=0):
        """Fit the model using X as training data.

        Note that sparse arrays can only be handled by method='exact'.
        It is recommended that you convert your sparse array to dense
        (e.g. `X.toarray()`) if it fits in memory, or otherwise using a
        dimensionality reduction technique (e.g. TruncatedSVD).

        Parameters
        ----------
        X : array, shape (n_samples, n_features) or (n_samples, n_samples)
            If the metric is 'precomputed' X must be a square distance
            matrix. Otherwise it contains a sample per row. Note that this
            when method='barnes_hut', X cannot be a sparse array and if need be
            will be converted to a 32 bit float array. Method='exact' allows
            sparse arrays and 64bit floating point inputs.

        skip_num_points : int (optional, default:0)
            This does not compute the gradient for points with indices below
            `skip_num_points`. This is useful when computing transforms of new
            data where you'd like to keep the old data fixed.
        """
        if self.method not in ['barnes_hut', 'exact']:
            raise ValueError("'method' must be 'barnes_hut' or 'exact'")
        if self.angle < 0.0 or self.angle > 1.0:
            raise ValueError("'angle' must be between 0.0 - 1.0")
        if self.method == 'barnes_hut' and sp.issparse(X):
            raise TypeError('A sparse matrix was passed, but dense '
                            'data is required for method="barnes_hut". Use '
                            'X.toarray() to convert to a dense numpy array if '
                            'the array is small enough for it to fit in '
                            'memory. Otherwise consider dimensionality '
                            'reduction techniques (e.g. TruncatedSVD)')
        else:
            X = check_array(X,
                            accept_sparse=['csr', 'csc', 'coo'],
                            dtype=np.float64)
        random_state = check_random_state(self.random_state)

        if self.early_exaggeration < 1.0:
            raise ValueError("early_exaggeration must be at least 1, but is "
                             "%f" % self.early_exaggeration)

        if self.n_iter < 200:
            raise ValueError("n_iter should be at least 200")

        if self.metric == "precomputed":
            if isinstance(self.init, string_types) and self.init == 'pca':
                raise ValueError("The parameter init=\"pca\" cannot be used "
                                 "with metric=\"precomputed\".")
            if X.shape[0] != X.shape[1]:
                raise ValueError("X should be a square distance matrix")
            distances = X
        else:
            if self.verbose:
                print("[t-SNE] Computing pairwise distances...")

            if self.metric == "euclidean":
                distances = pairwise_distances(X,
                                               metric=self.metric,
                                               squared=True)
            else:
                distances = pairwise_distances(X, metric=self.metric)

        if not np.all(distances >= 0):
            raise ValueError("All distances should be positive, either "
                             "the metric or precomputed distances given "
                             "as X are not correct")

        # Degrees of freedom of the Student's t-distribution. The suggestion
        # degrees_of_freedom = n_components - 1 comes from
        # "Learning a Parametric Embedding by Preserving Local Structure"
        # Laurens van der Maaten, 2009.
        degrees_of_freedom = max(self.n_components - 1.0, 1)
        n_samples = X.shape[0]
        # the number of nearest neighbors to find
        k = min(n_samples - 1, int(3. * self.perplexity + 1))

        neighbors_nn = None
        if self.method == 'barnes_hut':
            if self.verbose:
                print("[t-SNE] Computing %i nearest neighbors..." % k)
            if self.metric == 'precomputed':
                # Use the precomputed distances to find
                # the k nearest neighbors and their distances
                neighbors_nn = np.argsort(distances, axis=1)[:, :k]
            elif self.rho >= 1:
                # Find the nearest neighbors for every point
                bt = BallTree(X)
                # LvdM uses 3 * perplexity as the number of neighbors
                # And we add one to not count the data point itself
                # In the event that we have very small # of points
                # set the neighbors to n - 1
                distances_nn, neighbors_nn = bt.query(X, k=k + 1)
                neighbors_nn = neighbors_nn[:, 1:]
            elif self.rho < 1:
                # Use pyFLANN to find the nearest neighbors
                myflann = FLANN()
                testset = X
                params = myflann.build_index(testset,
                                             algorithm="autotuned",
                                             target_precision=self.rho,
                                             log_level='info')
                neighbors_nn, distances = myflann.nn_index(
                    testset, k + 1, checks=params["checks"])
                neighbors_nn = neighbors_nn[:, 1:]

            P = _joint_probabilities_nn(distances, neighbors_nn,
                                        self.perplexity, self.verbose)
        else:
            P = _joint_probabilities(distances, self.perplexity, self.verbose)
        assert np.all(np.isfinite(P)), "All probabilities should be finite"
        assert np.all(P >= 0), "All probabilities should be zero or positive"
        assert np.all(P <= 1), ("All probabilities should be less "
                                "or then equal to one")

        if isinstance(self.init, np.ndarray):
            X_embedded = self.init
        elif self.init == 'pca':
            pca = PCA(n_components=self.n_components,
                      svd_solver='randomized',
                      random_state=random_state)
            X_embedded = pca.fit_transform(X)
        elif self.init == 'random':
            X_embedded = None
        else:
            raise ValueError("Unsupported initialization scheme: %s" %
                             self.init)

        return self._tsne(P,
                          degrees_of_freedom,
                          n_samples,
                          random_state,
                          X_embedded=X_embedded,
                          neighbors=neighbors_nn,
                          skip_num_points=skip_num_points)
Beispiel #12
0
        test_data_high[cur_strt:cur_end, :] = cur_pts
        test_class[cur_strt:cur_end] = ci

    perplexity = 50

    if False:
        myflann = FLANN()
        precision = 0.5
        testset = test_data_high
        params = myflann.build_index(testset,
                                     algorithm="autotuned",
                                     target_precision=precision,
                                     log_level='info')
        result, dists = myflann.nn_index(testset,
                                         3 * 50,
                                         checks=params["checks"])

    tsne = TSNE_mod(perplexity=perplexity,
                    n_components=2,
                    init='pca',
                    n_iter=500,
                    random_state=1941)
    low_dim_embeds = tsne.fit_transform(test_data_high)

    color_map_name = 'gist_rainbow'
    cmap = plt.get_cmap(color_map_name)

    plt.figure()
    plt.hold(True)
    for cc in range(num_clusters):
Beispiel #13
0
class MA(object):
    def __init__(self,
                 datadict,
                 maxR,
                 denoise_absmin=None,
                 denoise_delta=None,
                 denoise_min=None,
                 detect_planar=None):
        self.D = datadict  # dict of numpy arrays
        # self.kd_tree = KDTree(self.D['coords'])

        # linear algorithm means brute force, which means its exact nn, which we need
        # approximate nn may cause algorithm not to converge
        self.flann = FLANN()
        self.flann.build_index(self.D['coords'],
                               algorithm='linear',
                               target_precision=1,
                               sample_fraction=0.001,
                               log_level="info")
        # print "constructed kd-tree"
        self.m, self.n = datadict['coords'].shape
        self.D['ma_coords_in'] = np.empty((self.m, self.n))
        self.D['ma_coords_in'][:] = np.nan
        self.D['ma_coords_out'] = np.empty((self.m, self.n))
        self.D['ma_coords_out'][:] = np.nan
        self.D['ma_radii_in'] = np.empty((self.m))
        self.D['ma_radii_in'][:] = np.nan
        self.D['ma_radii_out'] = np.empty((self.m))
        self.D['ma_radii_out'][:] = np.nan
        self.D['ma_f1_in'] = np.zeros((self.m), dtype=np.int)
        self.D['ma_f1_in'][:] = np.nan
        self.D['ma_f1_out'] = np.zeros((self.m), dtype=np.int)
        self.D['ma_f1_out'][:] = np.nan
        self.D['ma_f2_in'] = np.zeros((self.m), dtype=np.int)
        self.D['ma_f2_in'][:] = np.nan
        self.D['ma_f2_out'] = np.zeros((self.m), dtype=np.int)
        self.D['ma_f2_out'][:] = np.nan

        # a list of lists with indices of closest points during the ball shrinking process for every point:
        self.D['ma_shrinkhist_in'] = []
        self.D['ma_shrinkhist_out'] = []

        self.SuperR = maxR

        if denoise_absmin is None:
            self.denoise_absmin = None
        else:
            self.denoise_absmin = (math.pi / 180) * denoise_absmin
        if denoise_delta is None:
            self.denoise_delta = None
        else:
            self.denoise_delta = (math.pi / 180) * denoise_delta
        if denoise_min is None:
            self.denoise_min = None
        else:
            self.denoise_min = (math.pi / 180) * denoise_min

        if detect_planar is None:
            self.detect_planar = None
        else:
            self.detect_planar = (math.pi / 180) * detect_planar
        # self.normal_thres = 0.99

    def compute_balls_inout(self):
        for stage in self.compute_balls(inner=True):
            pass
        for stage in self.compute_balls(inner=False):
            pass

    def compute_lfs(self):
        self.ma_kd_tree = FLANN()

        # collect all ma_coords that are not NaN
        ma_coords = np.concatenate(
            [self.D['ma_coords_in'], self.D['ma_coords_out']])
        ma_coords = ma_coords[~np.isnan(ma_coords).any(axis=1)]

        self.ma_kd_tree.build_index(ma_coords, algorithm='linear')
        # we can get *squared* distances for free, so take the square root
        self.D['lfs'] = np.sqrt(
            self.ma_kd_tree.nn_index(self.D['coords'], 1)[1])

    def decimate_lfs(self, m, scramble=False, sort=False):
        i = 0
        self.D['decimate_lfs'] = np.zeros(self.m) == True

        plfs = zip(self.D['coords'], self.D['lfs'])
        if scramble:
            from random import shuffle
            shuffle(plfs)
        if sort:
            plfs.sort(key=lambda item: item[1])
            plfs.reverse()

        for p, lfs in plfs:
            if type(m) is float:
                qts = self.flann.nn_radius(p, (lfs * m)**2)[0][1:]
            else:
                qts = self.flann.nn_radius(p, m.f(lfs)**2)[0][1:]

            iqts = np.invert(self.D['decimate_lfs'][qts])
            if iqts.any():
                self.D['decimate_lfs'][i] = True
            i += 1

    def refine_lfs(self, m, scramble=False, sort=False):
        def brute_force_nn(q, coords):
            """return index of the closest point in coords"""
            distances = np.sqrt(
                np.square(coords[:, 0] - q[0]) +
                np.square(coords[:, 1] - q[1]))

            return np.argsort(distances)[0]

        i = 0
        self.D['decimate_lfs'] = np.zeros(self.m) == False

        plfs = zip(self.D['coords'], self.D['lfs'])
        if scramble:
            from random import shuffle
            shuffle(plfs)
        if sort:
            plfs.sort(key=lambda item: item[1])
            plfs.reverse()

        tmp_coords = np.array()

        for p, lfs in plfs:
            if type(m) is float:
                qts = self.flann.nn_radius(p, (lfs * m)**2)[0][1:]
            else:
                qts = self.flann.nn_radius(p, m.f(lfs)**2)[0][1:]

            iqts = np.invert(self.D['decimate_lfs'][qts])
            if iqts.any():
                self.D['decimate_lfs'][i] = True
            i += 1

    def compute_boundary_lenghts_2d(self):
        '''Compute for every point the boundary distance to the first point'''
        self.D['bound_len'] = np.zeros(self.m)
        i = 1
        for p in self.D['coords'][1:]:
            self.D['bound_len'][i] = self.D['bound_len'][
                i - 1] + np.linalg.norm(p - self.D['coords'][i - 1])
            i += 1

    def compute_lam(self, inner='in'):
        '''Compute for every boundary point p, corresponding ma point m, and other feature point p_ the distance p-p_ '''
        self.D['lam_' + inner] = np.zeros(self.m)
        self.D['lam_' + inner][:] = np.nan

        for i, p in enumerate(self.D['coords']):
            c_p = self.D['ma_coords_' + inner][i]
            if not np.isnan(c_p[0]):
                p_ = self.D['coords'][self.D['ma_f2_' + inner][i]]
                self.D['lam_' + inner][i] = np.linalg.norm(p - p_)

    def compute_theta(self, inner='in'):
        '''Compute for every boundary point p, corresponding ma point m, and other feature point p_ the angle p-m-p_ '''
        self.D['theta_' + inner] = np.zeros(self.m)
        self.D['theta_' + inner][:] = np.nan

        for i, p in enumerate(self.D['coords']):
            c_p = self.D['ma_coords_' + inner][i]
            if not np.isnan(c_p[0]):
                p_ = self.D['coords'][self.D['ma_f2_' + inner][i]]
                self.D['theta_' + inner][i] = cos_angle(p - c_p, p_ - c_p)

    def decimate_ballco(self, xi=0.1, k=4, inner='in'):
        self.D['decimate_ballco'] = np.zeros(self.m) == True

        for i, p in enumerate(self.D['coords']):
            c_p = self.D['ma_coords_' + inner][i]
            r_p = self.D['ma_radii_' + inner][i]
            if not np.isnan(c_p[0]):
                indices, dists = self.flann.nn_index(p, k + 1)

                # convert indices to coordinates and radii
                M = [(self.D['ma_coords_' + inner][index],
                      self.D['ma_radii_' + inner][index])
                     for index in indices[0][1:]]

                for m, r_m in M:
                    # can this medial ball (c_p) be contained by medial ball at m?
                    if np.linalg.norm(m - c_p) + r_p < r_m * (1 + xi):
                        self.D['decimate_ballco'][i] = True
                        break

                # ballcos = [ r_m/np.linalg.norm(m-c_p) for m, r_m in M ]
                # self.D['ballco'][i] = max(ballcos)

    def decimate_heur(self, xi=0.1, k=3, omega=math.pi / 20, inner='in'):
        '''Decimation based on heuristics as defined in ma (2012)'''
        cos_omega = math.cos(omega)
        self.D['filtered'] = np.zeros(self.m) == True

        for i, p in enumerate(self.D['coords']):
            c_p = self.D['ma_coords_' + inner][i]
            r_p = self.D['ma_radii_' + inner][i]
            if not np.isnan(c_p[0]):
                # test 1 - angle feature points
                p_ = self.D['coords'][self.D['ma_f2_' + inner][i]]
                if cos_angle(p, c_p, p_) < cos_omega:
                    self.D['filtered'][i] = True
                    break

                # test 2 - ball containmment
                indices, dists = self.flann.nn_index(p, k + 1)

                M = [(self.D['ma_coords_' + inner][index],
                      self.D['ma_radii_' + inner][index])
                     for index in indices[0][1:]]

                for m, r_m in M:
                    # can this medial ball (c_p) be contained by medial ball at m?
                    if np.linalg.norm(m - c_p) + r_p < r_m * (1 + xi):
                        self.D['filtered'][i] = True
                        break

    def filter_radiuscon(self, alpha, k, inner='in'):
        '''Filter noisy points based on contuity in radius when compared to near points'''
        self.D['filter_radiuscon'] = np.zeros(self.m) == True

        for i, p in enumerate(self.D['coords']):
            c_p = self.D['ma_coords_' + inner][i]
            r_p = self.D['ma_radii_' + inner][i]
            if c_p != None:
                indices, dists = self.flann.nn_index(p, k + 1)

                # print indices,dists
                M = []
                for index in indices[0][1:]:
                    M.append(self.D['ma_coords_' + inner][index])
                # print M

                L = []
                for m in M:
                    # projection_len = np.linalg.norm(proj(m-p,c_p-p))
                    val = np.linalg.norm(p - m) * cos_angle(m - p, c_p - p)
                    L.append(val)
                # print L, alpha * max(L), r_p

                if r_p < alpha * max(L):
                    self.D['filter_radiuscon'][i] = True
                else:
                    self.D['filter_radiuscon'][i] = False

    def filter_thetacon(self,
                        theta_min=37,
                        theta_delta=45,
                        theta_absmin=26,
                        inner='in'):
        """Filter noisy points based on continuity in separation angle as function of the ith iteration in the shrinking ball process"""
        # TODO: points with k=1 now receive no filtering... just discard them?
        self.D['filter_thetacon'] = np.zeros(self.m) == True

        theta_min *= (math.pi / 180)
        theta_delta *= (math.pi / 180)
        theta_absmin *= (math.pi / 180)

        def find_optimal_theta(thetas):
            theta_prev = thetas[0]
            for j, theta in enumerate(thetas[1:]):
                if ((theta_prev - theta) >= theta_delta
                        and theta <= theta_min) or (theta < theta_absmin):
                    return j
                theta_prev = theta
            # print
            return None

        for i, p in enumerate(self.D['coords']):
            p_n = self.D['normals'][i]

            q_indices = self.D['ma_shrinkhist_' + inner][i]
            if len(q_indices) <= 1: continue

            q_coords = self.D['coords'][q_indices]

            # if not is_inner: p_n = -p_n

            radii = [compute_radius(p, p_n, q) for q in q_coords]
            centers = [p - p_n * r for r in radii]
            thetas = [
                math.acos(cos_angle(p - c, q - c))
                for c, q in zip(centers, q_coords)
            ]

            optimal_theta = find_optimal_theta(thetas)
            # print optimal_theta
            if optimal_theta is not None:
                self.D['filter_thetacon'][i] = True

    def compute_balls(self, inner=True, verbose=False):
        """Balls shrinking algorithm. Set `inner` to False when outer balls are wanted."""

        for i, pn in enumerate(zip(self.D['coords'], self.D['normals'])):
            p, n = pn
            if not inner:
                n = -n

            # when approximating 1st point initialize q with random point not equal to p
            q = p
            # if i==0:
            #     while (q == p).all():
            #         random_index = int(rand(1)*self.D['coords'].shape[0])
            #         q = self.D['coords'][random_index]
            #     r = compute_radius(p,n,q)

            # forget optimization of r:
            r = self.SuperR

            msg = 'New iteration, initial r = {:.5}'.format(float(r))
            if verbose: print msg
            yield {'stage': 1, 'geom': (p, n), 'msg': msg}

            r_ = None
            c = None
            j = -1
            q_i = None
            q_history = []
            while True:
                j += 1
                # initialize r on last found radius
                if j > 0:
                    r = r_
                elif j == 0 and i > 0:
                    r = r

                # compute ball center
                c = p - n * r
                #
                q_i_previous = q_i

                msg = 'Current iteration: #' + str(i) + ', r = {:.5}'.format(
                    float(r))
                if verbose: print msg
                yield {'stage': 2, 'geom': (q, c, r), 'msg': msg}

                ### FINDING NEAREST NEIGHBOR OF c

                # find closest point to c and assign to q
                indices, dists = self.flann.nn_index(c, 2)
                # dists, indices = self.kd_tree.query(array([c]), k=2)
                candidate_c = self.D['coords'][indices]
                # candidate_n= self.D['normals'][indices]
                # print 'candidates:', candidates
                q = candidate_c[0][0]
                # q_n = candidate_n[0][0]
                q_i = indices[0][0]

                # yield {'stage': 3, 'geom': (q)}

                # What to do if closest point is p itself?
                if (q == p).all():
                    # 1) if r==SuperR, apparantly no other points on the halfspace spanned by -n => that's an infinite ball
                    if r == self.SuperR:
                        r_ = r
                        break
                    # 2) otherwise just pick the second closest point
                    else:
                        q = candidate_c[0][1]
                        # q_n = candidate_n[0][1]
                        q_i = indices[0][1]

                q_history.append(q_i)
                # compute new candidate radius r_
                r_ = compute_radius(p, n, q)

                # print r, r_, p-c, q-c, cos_angle(p-c, q-c)

                ### BOUNDARY CASES

                # if r_ < 0 closest point was on the wrong side of plane with normal n => start over with SuperRadius on the right side of that plance
                if r_ < 0:
                    r_ = self.SuperR
                # if r_ > SuperR, stop now because otherwise in case of planar surface point configuration, we end up in an infinite loop
                elif r_ > self.SuperR:
                    # elif cos_angle(p-c, q-c) >= self.normal_thres:
                    r_ = self.SuperR
                    break

                c_ = p - n * r_
                # this seems to work well against noisy ma points.
                if self.denoise_absmin is not None:
                    if math.acos(
                            cos_angle(p - c_, q - c_)
                    ) < self.denoise_absmin and j > 0 and r_ > np.linalg.norm(
                            q - p):
                        # msg = 'Current iteration: -#' + str(i) +', r = {:.5}'.format(float(r))
                        # yield {'stage': 2, 'geom': (q,c_,r), 'msg':msg}
                        # keep previous radius:
                        r_ = r
                        q_i = q_i_previous
                        break

                if self.denoise_delta is not None and j > 0:
                    theta_now = math.acos(cos_angle(p - c_, q - c_))
                    q_previous = self.D['coords'][q_i_previous]
                    theta_prev = math.acos(cos_angle(p - c_, q_previous - c_))

                    if theta_prev - theta_now > self.denoise_delta and theta_now < self.denoise_min and r_ > np.linalg.norm(
                            q - p):
                        # print "theta_prev:",theta_prev/math.pi * 180
                        # print "theta_now:",theta_now/math.pi * 180
                        # print "self.denoise_delta:",self.denoise_delta/math.pi * 180
                        # print "self.denoise_min:",self.denoise_min/math.pi * 180

                        # keep previous radius:
                        r_ = r
                        q_i = q_i_previous
                        break

                if self.detect_planar != None:
                    if math.acos(cos_angle(q - p,
                                           -n)) > self.detect_planar and j < 2:
                        # yield {'stage': 2, 'geom': (q,p - n*r_,r_), 'msg':msg}
                        r_ = self.SuperR
                        # r_= r
                        # q_i = q_i_previous
                        break

                ### NORMAL STOP CONDITION

                # stop iteration if r has converged
                if r == r_:
                    break

            if inner: inout = 'in'
            else: inout = 'out'

            if r_ >= self.SuperR:
                pass
            else:
                self.D['ma_radii_' + inout][i] = r_
                self.D['ma_coords_' + inout][i] = c
                self.D['ma_f1_' + inout][i] = i
                self.D['ma_f2_' + inout][i] = q_i
            self.D['ma_shrinkhist_' + inout].append(q_history[:-1])

    def construct_topo_2d(self, inner='in', project=True):
        def arrayindex(A, value):
            tmp = np.where(A == value)
            # print tmp, tmp[0].shape
            if tmp[0].shape != (0, ): return tmp[0][0]
            else: return np.nan

        self.D['ma_linepieces_' + inner] = list()
        if project:
            for index in xrange(1, self.m):
                index_1 = index - 1

                # find ma points corresponding to these three feature points
                f2_p = arrayindex(self.D['ma_f2_' + inner], index_1)
                f2 = arrayindex(self.D['ma_f2_' + inner], index)
                f1_p = arrayindex(self.D['ma_f1_' + inner], index_1)
                f1 = arrayindex(self.D['ma_f1_' + inner], index)

                # collect unique id's of corresponding ma_coords
                S = set()
                for f in [f1, f1_p, f2, f2_p]:
                    if not np.isnan(f):
                        S.add(f)

                # this is the linevector we are projecting the ma_coords on:
                l = self.D['coords'][index] - self.D['coords'][index_1]

                # compute projections of ma_coords on line l
                S_ = list()
                for s in S:
                    # if not np.isnan(self.D['ma_coords_'+inner][s]):
                    S_.append((projfac(
                        l, self.D['ma_coords_' + inner][s] -
                        self.D['coords'][index_1]), s))

                # now we can sort them on their x coordinate
                S_.sort(key=lambda item: item[0])

                # now we have the line segments
                for i in xrange(len(S_)):
                    self.D['ma_linepieces_' + inner].append(
                        (S_[i - 1][1], S_[i][1]))
        else:
            indices = list()
            for i in xrange(self.m):
                if not np.isnan(self.D['ma_coords_' + inner][i][0]):
                    indices.append(i)

            for i in xrange(1, len(indices)):
                s = indices[i - 1]
                e = indices[i]
                self.D['ma_linepieces_' + inner].append((s, e))
Beispiel #14
0
def calcTSDF_point2plane(smplVerts, smplFaces, DCMVerts):

    # TODO
    # Improve TSDF calculation. (Current code generates noisy TSDF)

    # start = time.time()

    vertFaces = smplVerts[smplFaces]
    vecAB = vertFaces[:, 1] - vertFaces[:, 0]
    vecAC = vertFaces[:, 2] - vertFaces[:, 0]

    # Calculate vertex normals
    faceNormals = np.cross(vecAB, vecAC)
    vertNormals = np.zeros((len(smplVerts), 3))
    nomalCount = np.zeros(len(smplVerts))
    for vset, facenormal in zip(smplFaces, faceNormals):
        for j in vset:
            vertNormals[j] = (vertNormals[j] * nomalCount[j] +
                              facenormal) / (nomalCount[j] + 1)
            nomalCount[j] += 1
    norms = LA.norm(vertNormals, axis=1)
    vertNormals = vertNormals / np.array([norms, norms, norms]).T

    convVal = 32767.0
    nu = 0.03

    # Find nearest neighbor
    vertIds_list = []
    Dist_p2p_list = []
    TruncatedDist_list = []
    Mask_list = []
    for i in range(1):
        flann = FLANN()
        flann.build_index(smplVerts)
        vertIds, Dist_p2p = flann.nn_index(DCMVerts, num_neighbors=1)
        vertIds_list += [vertIds]
        Dist_p2p_list += [Dist_p2p]

        print(vertIds[0:10])

        Dist_p2p /= nu
        TruncatedDist_p2p = np.minimum(1.0, np.maximum(-1.0, Dist_p2p))

        # calculate TSDF
        D = vertNormals[:,
                        0] * smplVerts[:,
                                       0] + vertNormals[:,
                                                        1] * smplVerts[:,
                                                                       1] + vertNormals[:,
                                                                                        2] * smplVerts[:,
                                                                                                       2]
        D = -D / LA.norm(vertNormals, axis=1)
        corrNormals = vertNormals[vertIds]
        Dist = corrNormals[:,
                           0] * DCMVerts[:,
                                         0] + corrNormals[:,
                                                          1] * DCMVerts[:,
                                                                        1] + corrNormals[:,
                                                                                         2] * DCMVerts[:, 2] + D[
                                                                                             vertIds]
        Dist /= nu
        TruncatedDist = np.minimum(1.0, np.maximum(-1.0, Dist))

        # Mask = np.where(np.abs(TruncatedDist_p2p)>1.0, TruncatedDist_p2p/np.abs(TruncatedDist_p2p), TruncatedDist_p2p)
        # Mask = np.where(TruncatedDist_p2p>=1.0, 0, 1)
        # Mask = np.where((Dist_p2p / Dist)>1.5, 0, 1) * Mask
        # Mask = (np.where((Dist_p2p / Dist)>1.5, 0, 1) + Mask) - np.where((Dist_p2p / Dist)>1.5, 0, 1) * Mask
        # Mask = np.where((Dist / Dist_p2p)>1.5, 0, 1)
        Mask = TruncatedDist / np.abs(TruncatedDist)
        Mask_list += [Mask]

        # TruncatedDist = Mask*TruncatedDist - (1 - Mask)

        TruncatedDist_list += [TruncatedDist]
    # TruncatedDist = np.median(TruncatedDist_list, axis=0)
    # Mask = Mask_list[0]
    # for i in range(1, len(Mask_list)):
    #     Mask = Mask * Mask_list[i]
    # Mask = np.where(np.abs(np.sum(Mask_list, axis=0))<3, 0, 1)

    # TruncatedDist = Mask*TruncatedDist - (1 - Mask)
    # TruncatedDist = np.average(TruncatedDist_list, axis=0)

    # print("Time: {}".format(time.time() - start))
    TruncatedDist = TruncatedDist_list[0]
    return TruncatedDist * convVal
def full_displacement(shap,supp,t,pol_en=False,cent=None,theta_param=1,pol_mod=False,coord_map=None,knn=None,eps = 1.e-16):
    """Computes all quantities required to compute displacement interpolation at steps ``t``.
    
    Calls:
    
    * :func:`utils.polar_coord_cloud`
    """
    from numpy import ones,zeros,copy,array,pi,int,transpose,diag
    from utils import polar_coord_cloud
    from pyflann import FLANN

    if coord_map is None:
        coord_map = zeros((shap[0],shap[1],2))
        coord_map[:,:,0] = arange(0,shap[0]).reshape((shap[0],1)).dot(ones((1,shap[1])))
        coord_map[:,:,1] = ones((shap[0],1)).dot(arange(0,shap[1]).reshape((1,shap[1])))
        if pol_en:
            if cent is None:
                cent = array([shap[0]/2,shap[1]/2])
            cloud_in = zeros((2,shap[0]*shap[1]))
            cloud_in[0,:] = copy(coord_map[:,:,0].reshape((shap[0]*shap[1],)))
            cloud_in[1,:] = copy(coord_map[:,:,1].reshape((shap[0]*shap[1],)))
            cloud_out = polar_coord_cloud(cloud_in,cent)
            coord_map[:,:,0] = cloud_out[0,:].reshape((shap[0],shap[1]))
            coord_map[:,:,1] = theta_param*cloud_out[1,:].reshape((shap[0],shap[1]))/(2*pi)
            if pol_mod:
                coord_map[:,:,1] *= coord_map[:,:,0]
        knn = FLANN()
        cloud_in = zeros((shap[0]*shap[1],2))
        cloud_in[:,0] = copy(coord_map[:,:,0].reshape((shap[0]*shap[1],)))
        cloud_in[:,1] = copy(coord_map[:,:,1].reshape((shap[0]*shap[1],)))
        params = knn.build_index(array(cloud_in, dtype=float64))

    advection_points = zeros((supp.shape[0],2,size(t)))

    for i in range(0,supp.shape[0]):
        # Matching coordinates
        pos1_i = int(supp[i,0]/(shap[0]))
        pos1_j = int(supp[i,0]%(shap[0]))
        pos2_i = int(supp[i,1]/(shap[0]))
        pos2_j = int(supp[i,1]%(shap[0]))

        if size(t)==1:
            advection_points[i,0,0] = (1-t)*coord_map[pos1_i,pos1_j,0]+t*coord_map[pos2_i,pos2_j,0]
            advection_points[i,1,0] = (1-t)*coord_map[pos1_i,pos1_j,1]+t*coord_map[pos2_i,pos2_j,1]
        else:
            for j in range(0,size(t)):
                advection_points[i,0,j] = (1-t[j])*coord_map[pos1_i,pos1_j,0]+t[j]*coord_map[pos2_i,pos2_j,0]
                advection_points[i,1,j] = (1-t[j])*coord_map[pos1_i,pos1_j,1]+t[j]*coord_map[pos2_i,pos2_j,1]

    neighbors_graph = zeros((supp.shape[0],4,size(t)))
    neighbors_graph = zeros((supp.shape[0],2,4,size(t)))
    weights_neighbors = zeros((supp.shape[0],4,size(t)))

    if size(t)==1:
        neighbors_graph_temp,dist_neighbors = knn.nn_index(advection_points[:,:,0],4)
        neighbors_graph[:,0,:,0] = neighbors_graph_temp/shap[0]
        neighbors_graph[:,1,:,0] = neighbors_graph_temp%shap[0]
        inv_dist = (dist_neighbors+eps)**(-1)
        weights_neighbors[:,:,0] = inv_dist/(inv_dist.sum(axis=1).reshape((supp.shape[0],1)).dot(ones((1,4))))
    else:
        for j in range(0,size(t)):
            print "Wavelength ",j+1,"/",size(t)
            neighbors_graph_temp,dist_neighbors = knn.nn_index(advection_points[:,:,j],4)
            neighbors_graph[:,0,:,j] = neighbors_graph_temp/shap[0]
            neighbors_graph[:,1,:,j] = neighbors_graph_temp%shap[0]
            inv_dist = (dist_neighbors+eps)**(-1)
            weights_neighbors[:,:,j] = inv_dist/(inv_dist.sum(axis=1).reshape((supp.shape[0],1)).dot(ones((1,4))))
    gc.collect()

    return neighbors_graph.astype(int),weights_neighbors,cent,coord_map,knn
Beispiel #16
0
class MA(object):

    def __init__(self, datadict, maxR, denoise_absmin=None, denoise_delta=None, denoise_min=None, detect_planar=None):
        self.D = datadict # dict of numpy arrays
        # self.kd_tree = KDTree(self.D['coords'])

        # linear algorithm means brute force, which means its exact nn, which we need
        # approximate nn may cause algorithm not to converge
        self.flann = FLANN()
        self.flann.build_index(self.D['coords'], algorithm='linear',target_precision=1, sample_fraction=0.001,  log_level = "info")
        # print "constructed kd-tree"
        self.m, self.n = datadict['coords'].shape
        self.D['ma_coords_in'] = np.empty( (self.m,self.n) )
        self.D['ma_coords_in'][:] = np.nan
        self.D['ma_coords_out'] = np.empty( (self.m,self.n) )
        self.D['ma_coords_out'][:] = np.nan
        self.D['ma_radii_in'] = np.empty( (self.m) )
        self.D['ma_radii_in'][:] = np.nan
        self.D['ma_radii_out'] = np.empty( (self.m) )
        self.D['ma_radii_out'][:] = np.nan
        self.D['ma_f1_in'] = np.zeros( (self.m), dtype=np.int )
        self.D['ma_f1_in'][:] = np.nan
        self.D['ma_f1_out'] = np.zeros( (self.m), dtype=np.int )
        self.D['ma_f1_out'][:] = np.nan
        self.D['ma_f2_in'] = np.zeros( (self.m), dtype=np.int  )
        self.D['ma_f2_in'][:] = np.nan
        self.D['ma_f2_out'] = np.zeros( (self.m), dtype=np.int  )
        self.D['ma_f2_out'][:] = np.nan

        # a list of lists with indices of closest points during the ball shrinking process for every point:
        self.D['ma_shrinkhist_in'] = []
        self.D['ma_shrinkhist_out'] = []

        self.SuperR = maxR

        if denoise_absmin is None:
            self.denoise_absmin = None
        else:
            self.denoise_absmin = (math.pi/180)*denoise_absmin
        if denoise_delta is None:
            self.denoise_delta = None
        else:
            self.denoise_delta = (math.pi/180)*denoise_delta
        if denoise_min is None:
            self.denoise_min = None
        else:
            self.denoise_min = (math.pi/180)*denoise_min

        if detect_planar is None:
            self.detect_planar = None
        else:
            self.detect_planar = (math.pi/180)*detect_planar
        # self.normal_thres = 0.99

    def compute_balls_inout(self):
        for stage in self.compute_balls(inner=True):
            pass
        for stage in self.compute_balls(inner=False):
            pass

    def compute_lfs(self):
        self.ma_kd_tree = FLANN()

        # collect all ma_coords that are not NaN
        ma_coords = np.concatenate([self.D['ma_coords_in'], self.D['ma_coords_out']])
        ma_coords = ma_coords[~np.isnan(ma_coords).any(axis=1)]

        self.ma_kd_tree.build_index(ma_coords, algorithm='linear')
        # we can get *squared* distances for free, so take the square root
        self.D['lfs'] = np.sqrt(self.ma_kd_tree.nn_index(self.D['coords'], 1)[1])

    def decimate_lfs(self, m, scramble = False, sort = False):
        i=0
        self.D['decimate_lfs'] = np.zeros(self.m) == True

        plfs = zip(self.D['coords'], self.D['lfs'])
        if scramble: 
            from random import shuffle
            shuffle( plfs )
        if sort: 
            plfs.sort(key = lambda item: item[1])
            plfs.reverse()

        for p, lfs in plfs:
            if type(m) is float:
                qts = self.flann.nn_radius(p, (lfs*m)**2)[0][1:]
            else:
                qts = self.flann.nn_radius(p, m.f(lfs)**2)[0][1:]
            
            iqts = np.invert(self.D['decimate_lfs'][qts])
            if iqts.any():
                self.D['decimate_lfs'][i] = True
            i+=1

    def refine_lfs(self, m, scramble = False, sort = False):

        def brute_force_nn(q, coords):
            """return index of the closest point in coords"""
            distances = np.sqrt( np.square( coords[:,0]-q[0] ) + np.square( coords[:,1]-q[1] ) );

            return np.argsort(distances)[0]

        i=0
        self.D['decimate_lfs'] = np.zeros(self.m) == False

        plfs = zip(self.D['coords'], self.D['lfs'])
        if scramble: 
            from random import shuffle
            shuffle( plfs )
        if sort: 
            plfs.sort(key = lambda item: item[1])
            plfs.reverse()

        tmp_coords = np.array()

        for p, lfs in plfs:
            if type(m) is float:
                qts = self.flann.nn_radius(p, (lfs*m)**2)[0][1:]
            else:
                qts = self.flann.nn_radius(p, m.f(lfs)**2)[0][1:]
            
            iqts = np.invert(self.D['decimate_lfs'][qts])
            if iqts.any():
                self.D['decimate_lfs'][i] = True
            i+=1

    def compute_boundary_lenghts_2d(self):
        '''Compute for every point the boundary distance to the first point'''
        self.D['bound_len'] = np.zeros(self.m)
        i=1
        for p in self.D['coords'][1:]:
            self.D['bound_len'][i] = self.D['bound_len'][i-1] + np.linalg.norm(p-self.D['coords'][i-1])
            i+=1

    def compute_lam(self, inner='in'):
        '''Compute for every boundary point p, corresponding ma point m, and other feature point p_ the distance p-p_ '''
        self.D['lam_'+inner] = np.zeros(self.m)
        self.D['lam_'+inner][:] = np.nan

        for i, p in enumerate(self.D['coords']):
            c_p = self.D['ma_coords_'+inner][i]
            if not np.isnan(c_p[0]):
                p_ = self.D['coords'][self.D['ma_f2_'+inner][i]]
                self.D['lam_'+inner][i] = np.linalg.norm(p-p_)

    def compute_theta(self, inner='in'):
        '''Compute for every boundary point p, corresponding ma point m, and other feature point p_ the angle p-m-p_ '''
        self.D['theta_'+inner] = np.zeros(self.m)
        self.D['theta_'+inner][:] = np.nan

        for i, p in enumerate(self.D['coords']):
            c_p = self.D['ma_coords_'+inner][i]
            if not np.isnan(c_p[0]):
                p_ = self.D['coords'][self.D['ma_f2_'+inner][i]]
                self.D['theta_'+inner][i] = cos_angle(p-c_p, p_-c_p)

    def decimate_ballco(self, xi=0.1, k=4, inner='in'):
        self.D['decimate_ballco'] = np.zeros(self.m) == True

        for i, p in enumerate(self.D['coords']):
            c_p = self.D['ma_coords_'+inner][i]
            r_p = self.D['ma_radii_'+inner][i]
            if not np.isnan(c_p[0]):
                indices,dists = self.flann.nn_index(p, k+1)

                # convert indices to coordinates and radii
                M = [ (self.D['ma_coords_'+inner][index], self.D['ma_radii_'+inner][index]) for index in indices[0][1:] ]

                for m, r_m in M:
                    # can this medial ball (c_p) be contained by medial ball at m?
                    if np.linalg.norm(m-c_p) + r_p < r_m * (1+xi):
                        self.D['decimate_ballco'][i] = True
                        break

                # ballcos = [ r_m/np.linalg.norm(m-c_p) for m, r_m in M ]
                # self.D['ballco'][i] = max(ballcos)


    def decimate_heur(self, xi=0.1, k=3, omega=math.pi/20, inner='in'):
        '''Decimation based on heuristics as defined in ma (2012)'''
        cos_omega = math.cos(omega)
        self.D['filtered'] = np.zeros(self.m) == True
        
        for i, p in enumerate(self.D['coords']):
            c_p = self.D['ma_coords_'+inner][i]
            r_p = self.D['ma_radii_'+inner][i]
            if not np.isnan(c_p[0]):
                # test 1 - angle feature points
                p_ = self.D['coords'][self.D['ma_f2_'+inner][i]]
                if cos_angle(p, c_p, p_) < cos_omega:
                    self.D['filtered'][i] = True
                    break

                # test 2 - ball containmment
                indices,dists = self.flann.nn_index(p, k+1)

                M = [ ( self.D['ma_coords_'+inner][index], self.D['ma_radii_'+inner][index] ) for index in indices[0][1:] ]

                for m, r_m in M:
                    # can this medial ball (c_p) be contained by medial ball at m?
                    if np.linalg.norm(m-c_p) + r_p < r_m * (1+xi):
                        self.D['filtered'][i] = True
                        break
                

    def filter_radiuscon(self, alpha, k, inner='in'):
        '''Filter noisy points based on contuity in radius when compared to near points'''
        self.D['filter_radiuscon'] = np.zeros(self.m) == True
        
        for i, p in enumerate(self.D['coords']):
            c_p = self.D['ma_coords_'+inner][i]
            r_p = self.D['ma_radii_'+inner][i]
            if c_p != None:
                indices,dists = self.flann.nn_index(p, k+1)

                # print indices,dists
                M = []
                for index in indices[0][1:]:
                    M.append(self.D['ma_coords_'+inner][index])
                # print M

                L = []
                for m in M:
                    # projection_len = np.linalg.norm(proj(m-p,c_p-p))
                    val = np.linalg.norm(p-m) * cos_angle(m-p, c_p-p)
                    L.append(val)
                # print L, alpha * max(L), r_p

                if r_p < alpha * max(L):
                    self.D['filter_radiuscon'][i] = True
                else:
                    self.D['filter_radiuscon'][i] = False

    def filter_thetacon(self, theta_min=37, theta_delta=45, theta_absmin=26, inner='in'):
        """Filter noisy points based on continuity in separation angle as function of the ith iteration in the shrinking ball process"""
        # TODO: points with k=1 now receive no filtering... just discard them?
        self.D['filter_thetacon'] = np.zeros(self.m) == True

        theta_min *= (math.pi/180)
        theta_delta *= (math.pi/180)
        theta_absmin *= (math.pi/180)

        def find_optimal_theta(thetas):
            theta_prev = thetas[0]
            for j, theta in enumerate(thetas[1:]):
                if ( (theta_prev - theta) >= theta_delta and theta <= theta_min ) or (theta < theta_absmin):
                    return j
                theta_prev = theta
            # print
            return None

        for i, p in enumerate(self.D['coords']):
            p_n = self.D['normals'][i]

            q_indices = self.D['ma_shrinkhist_'+inner][i]
            if len(q_indices) <= 1: continue

            q_coords = self.D['coords'][q_indices]
            
            # if not is_inner: p_n = -p_n

            radii = [ compute_radius(p,p_n,q) for q in q_coords ]
            centers = [ p - p_n * r for r in radii ]
            thetas = [ math.acos(cos_angle(p-c,q-c)) for c, q in zip(centers, q_coords) ]

            optimal_theta = find_optimal_theta(thetas)
            # print optimal_theta
            if optimal_theta is not None:
                self.D['filter_thetacon'][i] = True

    def compute_balls(self, inner=True, verbose=False):
        """Balls shrinking algorithm. Set `inner` to False when outer balls are wanted."""

        for i, pn in enumerate(zip(self.D['coords'], self.D['normals'])):
            p, n = pn
            if not inner:
                n = -n
            
            # when approximating 1st point initialize q with random point not equal to p
            q=p 
            # if i==0:
            #     while (q == p).all():
            #         random_index = int(rand(1)*self.D['coords'].shape[0])
            #         q = self.D['coords'][random_index]
            #     r = compute_radius(p,n,q)

            # forget optimization of r:
            r=self.SuperR
            
            msg='New iteration, initial r = {:.5}'.format(float(r))
            if verbose: print msg
            yield {'stage': 1, 'geom': (p,n), 'msg':msg}

            r_ = None
            c = None
            j = -1
            q_i = None
            q_history = []
            while True:
                j+=1
                # initialize r on last found radius
                if j>0:
                    r = r_
                elif j==0 and i>0:
                    r = r

                # compute ball center
                c = p - n*r
                #
                q_i_previous = q_i
                
                msg = 'Current iteration: #' + str(i) +', r = {:.5}'.format(float(r))
                if verbose: print msg
                yield {'stage': 2, 'geom': (q,c,r), 'msg':msg}

                ### FINDING NEAREST NEIGHBOR OF c

                # find closest point to c and assign to q
                indices,dists = self.flann.nn_index(c, 2)
                # dists, indices = self.kd_tree.query(array([c]), k=2)
                candidate_c = self.D['coords'][indices]
                # candidate_n= self.D['normals'][indices]
                # print 'candidates:', candidates
                q = candidate_c[0][0]
                # q_n = candidate_n[0][0]
                q_i = indices[0][0]
                
                # yield {'stage': 3, 'geom': (q)}

                # What to do if closest point is p itself?
                if (q==p).all():
                    # 1) if r==SuperR, apparantly no other points on the halfspace spanned by -n => that's an infinite ball
                    if r == self.SuperR: 
                        r_ = r
                        break
                    # 2) otherwise just pick the second closest point
                    else: 
                        q = candidate_c[0][1]
                        # q_n = candidate_n[0][1]
                        q_i = indices[0][1]
                
                q_history.append(q_i)
                # compute new candidate radius r_
                r_ = compute_radius(p,n,q)

                # print r, r_, p-c, q-c, cos_angle(p-c, q-c)

                ### BOUNDARY CASES

                # if r_ < 0 closest point was on the wrong side of plane with normal n => start over with SuperRadius on the right side of that plance
                if r_ < 0: 
                    r_ = self.SuperR
                # if r_ > SuperR, stop now because otherwise in case of planar surface point configuration, we end up in an infinite loop
                elif r_ > self.SuperR:
                # elif cos_angle(p-c, q-c) >= self.normal_thres:
                    r_ = self.SuperR
                    break

                c_ = p - n*r_
                # this seems to work well against noisy ma points.
                if self.denoise_absmin is not None:
                    if math.acos(cos_angle(p-c_, q-c_)) < self.denoise_absmin and j>0 and r_>np.linalg.norm(q-p):
                        # msg = 'Current iteration: -#' + str(i) +', r = {:.5}'.format(float(r))
                        # yield {'stage': 2, 'geom': (q,c_,r), 'msg':msg}
                        # keep previous radius:
                        r_=r
                        q_i = q_i_previous
                        break

                if self.denoise_delta is not None and j>0:
                    theta_now = math.acos(cos_angle(p-c_, q-c_))
                    q_previous = self.D['coords'][q_i_previous]
                    theta_prev = math.acos(cos_angle(p-c_, q_previous-c_))
                    
                    if theta_prev-theta_now > self.denoise_delta and theta_now < self.denoise_min and r_>np.linalg.norm(q-p):
                        # print "theta_prev:",theta_prev/math.pi * 180
                        # print "theta_now:",theta_now/math.pi * 180
                        # print "self.denoise_delta:",self.denoise_delta/math.pi * 180
                        # print "self.denoise_min:",self.denoise_min/math.pi * 180

                        # keep previous radius:
                        r_=r
                        q_i = q_i_previous
                        break

                if self.detect_planar != None:
                    if math.acos( cos_angle(q-p, -n) ) > self.detect_planar and j<2:
                        # yield {'stage': 2, 'geom': (q,p - n*r_,r_), 'msg':msg}
                        r_= self.SuperR
                        # r_= r
                        # q_i = q_i_previous
                        break

                ### NORMAL STOP CONDITION

                # stop iteration if r has converged
                if r == r_:
                    break
            
            if inner: inout = 'in'
            else: inout = 'out'
            
            if r_ >= self.SuperR:
                pass
            else:
                self.D['ma_radii_'+inout][i] = r_
                self.D['ma_coords_'+inout][i] = c
                self.D['ma_f1_'+inout][i] = i
                self.D['ma_f2_'+inout][i] = q_i
            self.D['ma_shrinkhist_'+inout].append(q_history[:-1])

    def construct_topo_2d(self, inner='in', project=True):

        def arrayindex(A, value):
            tmp = np.where(A==value)
            # print tmp, tmp[0].shape
            if tmp[0].shape != (0,): return tmp[0][0]
            else: return np.nan

        self.D['ma_linepieces_'+inner] = list()
        if project:
            for index in xrange(1,self.m):
                index_1 = index-1
                
                # find ma points corresponding to these three feature points
                f2_p = arrayindex(self.D['ma_f2_'+inner], index_1)
                f2 = arrayindex(self.D['ma_f2_'+inner], index)
                f1_p = arrayindex(self.D['ma_f1_'+inner], index_1)
                f1 = arrayindex(self.D['ma_f1_'+inner], index)

                # collect unique id's of corresponding ma_coords
                S = set()
                for f in [f1,f1_p, f2, f2_p]:
                    if not np.isnan(f):
                        S.add( f )

                # this is the linevector we are projecting the ma_coords on:
                l = self.D['coords'][index] - self.D['coords'][index_1]

                # compute projections of ma_coords on line l
                S_ = list()
                for s in S:
                    # if not np.isnan(self.D['ma_coords_'+inner][s]):
                    S_.append( (projfac(l, self.D['ma_coords_'+inner][s]-self.D['coords'][index_1] ), s) )

                # now we can sort them on their x coordinate
                S_.sort(key=lambda item: item[0])

                # now we have the line segments
                for i in xrange(len(S_)):
                    self.D['ma_linepieces_'+inner].append( (S_[i-1][1], S_[i][1]) )
        else:
            indices = list()
            for i in xrange(self.m):
                if not np.isnan(self.D['ma_coords_'+inner][i][0]):
                    indices.append(i)

            for i in xrange(1,len(indices)):
                s = indices[i-1]
                e = indices[i]
                self.D['ma_linepieces_'+inner].append((s,e))
Beispiel #17
0
class ShinkkingBallApp(CanvasApp):
    def __init__(self, sbapp_list, filename, densify, sigma_noise, denoise,
                 **args):
        CanvasApp.__init__(self, **args)
        self.sbapp_list = sbapp_list
        self.sbapp_list.append(self)

        self.window_diagonal = math.sqrt(self.sizex**2 + self.sizey**2)
        self.toplevel.title(
            "Shrink the balls [{}] - densify={}x, noise={}, denoise={} ".
            format(filename, densify, sigma_noise, denoise))

        self.toplevel.bind('h', self.print_help)

        self.toplevel.bind('a', self.ma_auto_stepper)
        self.toplevel.bind('b', self.draw_all_balls)
        self.toplevel.bind('t', self.toggle_inout)
        self.toplevel.bind('h', self.toggle_ma_stage_geom)

        self.inner_mode = True
        self.draw_stage_geom_mode = 'normal'

        self.toplevel.bind('i', self.draw_topo)
        self.toplevel.bind('o', self.draw_topo)
        self.toplevel.bind('u', self.draw_topo)
        self.toplevel.bind('p', self.draw_topo)

        self.toplevel.bind('z', self.spawn_mapperapp)
        self.toplevel.bind('f', self.spawn_filterapp)
        self.toplevel.bind('s', self.spawn_shrinkhistapp)

        self.toplevel.bind('1', self.draw_normal_map_lfs)
        self.toplevel.bind('2', self.draw_normal_map_theta)
        self.toplevel.bind('3', self.draw_normal_map_lam)
        self.toplevel.bind('4', self.draw_normal_map_radii)
        self.toplevel.bind('`', self.draw_normal_map_clear)

        self.toplevel.bind('c', self.clear_overlays)
        self.canvas.pack()

        self.toplevel.bind("<Motion>", self.draw_closest_ball)
        self.toplevel.bind("<Key>", self.ma_step)
        self.toplevel.bind("<ButtonRelease>", self.ma_step)
        self.coordstext = self.canvas.create_text(self.sizex,
                                                  self.sizey,
                                                  anchor='se',
                                                  text='')
        self.ball_info_text = self.canvas.create_text(10,
                                                      self.sizey,
                                                      anchor='sw',
                                                      text='')

        self.stage_cache = {1: [], 2: [], 3: []}
        self.topo_cache = []
        self.highlight_point_cache = []
        self.highlight_cache = []
        self.poly_cache = []
        self.normalmap_cache = []

        self.mapper_window = None
        self.plotter_window = None
        self.shrinkhist_window = None

        self.kdtree = FLANN()

    def toggle_ma_stage_geom(self, event):
        if self.draw_stage_geom_mode == 'normal':
            self.draw_stage_geom_mode = 'dontclear'
        else:
            self.draw_stage_geom_mode = 'normal'

    def spawn_shrinkhistapp(self, event):
        self.ma_ensure_complete()
        self.shrinkhist_window = ShrinkHistApp(self)

    def spawn_mapperapp(self, event):
        self.ma_ensure_complete()
        self.mapper_window = MapperApp(self)

    def spawn_filterapp(self, event):
        self.ma_ensure_complete()
        self.plot_window = FilterApp(self)

    def update_mouse_coords(self, event):
        self.mouse_x = event.x
        self.mouse_y = event.y

    def toggle_inout(self, event):
        self.inner_mode = not self.inner_mode

    def print_help(self, event):
        print HELP

    def bind_ma(self, ma, draw_poly=True):
        self.ma = ma
        self.ma_inner = True
        self.ma_complete = False
        self.ma_gen = ma.compute_balls(inner=self.ma_inner)
        minx = ma.D['coords'][:, 0].min()
        miny = ma.D['coords'][:, 1].min()
        maxx = ma.D['coords'][:, 0].max()
        maxy = ma.D['coords'][:, 1].max()

        self.set_transform(minx, maxx, miny, maxy)
        self.normal_scale = 0.02 * (self.window_diagonal / self.scale)

        if draw_poly:
            self.draw.polygon(ma.D['coords'], fill="#eeeeee")
        for p, n in zip(ma.D['coords'], ma.D['normals']):
            self.draw.normal(p,
                             n,
                             s=self.normal_scale,
                             fill='#888888',
                             width=1)

        self.kdtree.build_index(self.ma.D['coords'], algorithm='linear')
        # self.kdtree = KDTree(self.ma.D['coords'])

        self.print_help(None)

        self.canvas.update_idletasks()

    def ma_ensure_complete(self):
        while self.ma_complete == False:
            self.ma_auto_stepper(None)

    def ma_auto_stepper(self, event):
        self.ma_stepper(mode='auto_step')

    def ma_step(self, event):
        self.ma_stepper(mode='onestep')

    def ma_stepper(self, mode):
        def step_and_draw():
            d = self.ma_gen.next()
            self.ma_draw_stage(d)

        try:
            if mode == 'onestep':
                step_and_draw()
            elif mode == 'auto_step':
                while True:
                    step_and_draw()
        except StopIteration:
            if not self.ma_inner:
                self.ma.compute_lfs()
                self.ma.compute_lam()
                self.ma.compute_theta()
                self.ma.compute_lam(inner="out")
                self.ma.compute_theta(inner="out")
                self.ma_complete = True
            self.ma_inner = not self.ma_inner
            self.ma_gen = self.ma.compute_balls(self.ma_inner)

    def ma_draw_stage(self, d):
        if d['stage'] == 1:
            try:
                self.stage_cache[2].remove(self.stage_cache[2][2])
            except IndexError:
                pass

            self.deleteCache([1, 2, 3])
            p, n = d['geom']
            l = self.window_diagonal  # line length - depends on windows size
            i = self.draw.point(p[0], p[1], size=8, fill='red', outline='')
            j = self.draw.edge( (p[0]+n[0]*l, p[1]+n[1]*l),\
                                (p[0]-n[0]*l, p[1]-n[1]*l), width=1, fill='blue', dash=(4,2) )
            self.stage_cache[1] = [i, j]
            self.canvas.itemconfig(self.coordstext, text=d['msg'])

        elif d['stage'] == 2:
            if self.draw_stage_geom_mode == 'normal':
                self.draw.deleteItems(self.stage_cache[2])
            q, c, r = d['geom']
            i = self.draw.point(q[0], q[1], size=4, fill='blue', outline='')
            j = self.draw.point(c[0],
                                c[1],
                                size=r * self.scale,
                                fill='',
                                outline='blue')
            k = self.draw.point(c[0], c[1], size=2, fill='blue', outline='')
            self.stage_cache[2] = [i, j, k]
            self.canvas.itemconfig(self.coordstext, text=d['msg'])

    def draw_highlight_points(self, key, val, how, inner='in'):
        self.draw.deleteItems(self.highlight_cache)
        for m, v in zip(self.ma.D['ma_coords_' + inner], self.ma.D[key]):
            if not np.isnan(v):
                if how == 'greater' and v > val:
                    i = self.draw.point(m[0],
                                        m[1],
                                        size=4,
                                        fill='',
                                        outline='red',
                                        width=2)
                    self.highlight_cache.append(i)
                elif how == 'smaller' and v < val:
                    i = self.draw.point(m[0],
                                        m[1],
                                        size=4,
                                        fill='',
                                        outline='red',
                                        width=2)
                    self.highlight_cache.append(i)
                elif how == 'equal' and v == val:
                    i = self.draw.point(m[0],
                                        m[1],
                                        size=4,
                                        fill='',
                                        outline='red',
                                        width=2)
                    self.highlight_cache.append(i)

    def draw_topo(self, event):
        if event.char in ['i', 'u']: inner = 'in'
        elif event.char in ['o', 'p']: inner = 'out'

        if event.char in ['p', 'u']: project = True
        else: project = False

        self.draw.deleteItems(self.topo_cache)
        self.ma.construct_topo_2d(inner, project)

        for start, end in self.ma.D['ma_linepieces_' + inner]:
            s_e = self.ma.D['ma_coords_' + inner][start]
            e_e = self.ma.D['ma_coords_' + inner][end]
            i = self.draw.edge(s_e, e_e, fill='blue', width=1)
            self.topo_cache.append(i)

    def draw_all_balls(self, event):
        self.draw.deleteItems(self.highlight_cache)
        for p_i in xrange(self.ma.m):
            self.draw_medial_ball(p_i, with_points=False)

    def draw_closest_ball(self, event):
        # x,y = self.t_(self.mouse_x, self.mouse_y)
        x, y = self.t_(event.x, event.y)
        q = np.array([x, y])
        p_i = self.kdtree.nn_index(q, 1)[0][0]
        # p_i = self.kdtree.query(np.array([q]),1)[1][0]

        for sbapp in self.sbapp_list:
            sbapp.highlight_single_ball(p_i)

    def highlight_single_ball(self, p_i):
        if self.inner_mode: inner = 'in'
        else: inner = 'out'

        # plot the shrink history of this ball:
        if self.shrinkhist_window is not None:
            self.shrinkhist_window.update_plot(p_i, inner)

        def get_ball_info_text(p_i):
            if not self.ma.D.has_key('lfs'): return ""
            return "lfs\t{0:.2f}\nr\t{2:.2f}\nlambda\t{1:.2f}\ntheta\t{3:.2f} ({4:.2f} deg)\nk\t{5}\nplanar\t{6:.2f} deg".format( \
                self.ma.D['lfs'][p_i], \
                self.ma.D['lam_'+inner][p_i], \
                self.ma.D['ma_radii_'+inner][p_i], \
                self.ma.D['theta_'+inner][p_i], \
                (180/math.pi) * math.acos(self.ma.D['theta_'+inner][p_i]), \
                len(self.ma.D['ma_shrinkhist_'+inner][p_i]), \
                (90/math.pi)*( math.pi - math.acos(self.ma.D['theta_'+inner][p_i]) ) )

        self.draw.deleteItems(self.highlight_point_cache)
        self.draw_medial_ball(p_i)
        self.draw_lfs_ball(p_i)

        self.canvas.itemconfig(self.ball_info_text,
                               text=get_ball_info_text(p_i))

    def draw_medial_ball(self, p_i, with_points=True):
        inner = 'out'
        if self.inner_mode: inner = 'in'

        p1x, p1y = self.ma.D['coords'][p_i][0], self.ma.D['coords'][p_i][1]
        ma_px, ma_py = self.ma.D['ma_coords_' +
                                 inner][p_i][0], self.ma.D['ma_coords_' +
                                                           inner][p_i][1]

        if not np.isnan(ma_px):
            p2x, p2y = self.ma.D['coords'][self.ma.D[
                'ma_f2_' +
                inner][p_i]][0], self.ma.D['coords'][self.ma.D['ma_f2_' +
                                                               inner][p_i]][1]
            r = self.ma.D['ma_radii_' + inner][p_i]

            ball = self.draw.point(ma_px,
                                   ma_py,
                                   size=r * self.scale,
                                   width=1,
                                   fill='',
                                   outline='red',
                                   dash=(4, 2, 1))
            if with_points:
                self.highlight_point_cache.append(
                    self.draw.point(p1x,
                                    p1y,
                                    size=4,
                                    fill='',
                                    outline='red',
                                    width=2))
                self.highlight_point_cache.append(
                    self.draw.point(p2x,
                                    p2y,
                                    size=4,
                                    fill='',
                                    outline='purple',
                                    width=2))
                self.highlight_point_cache.append(
                    self.draw.point(ma_px,
                                    ma_py,
                                    size=4,
                                    fill='',
                                    outline='blue',
                                    dash=(1),
                                    width=2))
                self.highlight_point_cache.append(ball)
            else:
                self.highlight_cache.append(ball)

    def draw_closest_lfs_ball(self, event):
        # self.draw.deleteItems(self.highlight_cache)

        x, y = self.t_(event.x, event.y)
        q = np.array([x, y])
        p_i = self.kdtree.nn_index(q, 1)[0][0]
        # p_i = self.kdtree.query(np.array([q]),1)[1][0]

        self.draw_lfs_ball(p_i)

    def draw_lfs_ball(self, p_i):
        if self.ma.D.has_key('lfs'):
            p1x, p1y = self.ma.D['coords'][p_i][0], self.ma.D['coords'][p_i][1]
            lfs = self.ma.D['lfs'][p_i]
            if not np.isnan(lfs):
                self.highlight_point_cache.append(
                    self.draw.point(p1x,
                                    p1y,
                                    size=lfs * self.scale,
                                    fill='',
                                    outline='#888888',
                                    dash=(2, 1)))

    def draw_decimate_lfs(self, epsilon):
        self.ma.decimate_lfs(epsilon)

        dropped, total = np.count_nonzero(self.ma.D['decimate_lfs']), self.ma.m
        print 'LFS decimation e={}: {} from {} points are dropped ({:.2f}%)'.format(
            epsilon, dropped, total,
            float(dropped) / total * 100)

        self.draw.deleteItems(self.poly_cache)
        i = self.draw.polygon_alternating_edge(self.ma.D['coords'][np.invert(
            self.ma.D['decimate_lfs'])],
                                               width=3)
        self.poly_cache.extend(i)

    def draw_decimate_ballco(self, xi, k):
        self.ma.decimate_ballco(xi, k)

        dropped, total = np.count_nonzero(
            self.ma.D['decimate_ballco']), self.ma.m
        print 'BALLCO decimation xi={}, k={}: {} from {} points are dropped ({:.2f}%)'.format(
            xi, k, dropped, total,
            float(dropped) / total * 100)

        self.draw.deleteItems(self.poly_cache)
        i = self.draw.polygon_alternating_edge(self.ma.D['coords'][np.invert(
            self.ma.D['decimate_ballco'])],
                                               width=3)
        self.poly_cache.extend(i)

    def draw_normal_map_lfs(self, event):
        self.draw_normal_map('lfs', 40)

    def draw_normal_map_theta(self, event):
        self.draw_normal_map('theta_in', 30)

    def draw_normal_map_lam(self, event):
        self.draw_normal_map('lam_in', 30)

    def draw_normal_map_radii(self, event):
        self.draw_normal_map('ma_radii_in', 30)

    def draw_normal_map_clear(self, event):
        self.draw.deleteItems(self.normalmap_cache)

    def draw_normal_map(self, key, scale=30):
        self.draw.deleteItems(self.normalmap_cache)
        max_val = np.nanmax(self.ma.D[key])
        for p, p_n, val in zip(self.ma.D['coords'], self.ma.D['normals'],
                               self.ma.D[key]):
            s = scale * (val / max_val)
            i = self.draw.normal(p, p_n, s=s, width=2, fill='red')
            self.normalmap_cache.append(i)

    def clear_overlays(self, event):
        self.draw.deleteItems(self.topo_cache)
        self.draw.deleteItems(self.highlight_cache)
        self.draw.deleteItems(self.poly_cache)

    def deleteCache(self, stages):
        for s in stages:
            self.draw.deleteItems(self.stage_cache[s])
Beispiel #18
0
class DND:
    def __init__(self, kernel, num_neighbors, max_memory, optimizer, lr):
        self.kernel = kernel
        self.num_neighbors = num_neighbors
        self.max_memory = max_memory
        self.opt_name = optimizer
        self.lr = lr
        self.keys = None
        self.values = None
        self.kdtree = FLANN()

        # key_cache stores a cache of all keys that exist in the DND
        # This makes DND updates efficient
        self.key_cache = {}
        # stale_index indicates whether or not the index in self.kdtree is stale
        # This allows us to only rebuild the kdtree index when necessary
        self.stale_index = True
        # indexes_to_be_updated will be updated on a call to update_params
        # This allows us to only rebuild the necessary keys of key_cache
        self.indexes_to_be_updated = set()

        # Keys and values to be inserted into self.keys and self.values
        # when commit_insert is called
        self.keys_to_be_inserted = None
        self.values_to_be_inserted = None

        # Recently used lookup indexes
        # Moved to the back of self.keys and self.values to get LRU property
        self.move_to_back = set()

    def get_index(self, key):
        """
        If key exists in the DND, return its index
        Otherwise, return None
        """
        key = key.detach().cpu().numpy()
        if self.key_cache.get(tuple(key[0])) is not None:
            if self.stale_index:
                self.commit_insert()
            return int(self.kdtree.nn_index(key, 1)[0][0])
        else:
            return None

    def update(self, value, index):
        """
        Set self.values[index] = value
        """
        values = self.values.detach()
        values[index] = value[0].detach()
        self.values = Parameter(values)
        params = [self.keys, self.values]
        self.optimizer = get_optimizer(self.opt_name,params,self.lr)

    def insert(self, key, value):
        """
        Insert key, value pair into DND
        """
        if self.keys_to_be_inserted is None:
            # Initial insert
            self.keys_to_be_inserted = key.detach()
            self.values_to_be_inserted = value.detach()
        else:
            self.keys_to_be_inserted = torch.cat(
                [self.keys_to_be_inserted, key.detach()], 0)
            self.values_to_be_inserted = torch.cat(
                [self.values_to_be_inserted, value.detach()], 0)
        self.key_cache[tuple(key.detach().cpu().numpy()[0])] = 0
        self.stale_index = True

    def commit_insert(self):
        if self.keys is None:
            self.keys = Parameter(self.keys_to_be_inserted)
            self.values = Parameter(self.values_to_be_inserted)
        elif self.keys_to_be_inserted is not None:
            keys = torch.cat([self.keys.detach(),self.keys_to_be_inserted],0)
            self.keys = Parameter(keys)
            values = [self.values.detach(),self.values_to_be_inserted]
            values = torch.cat(values,0)
            self.values = Parameter(values)

        # Move most recently used key-value pairs to the back
        if len(self.move_to_back) != 0:
            unmoved_ids = list(set(range(len(self.keys))) - self.move_to_back)
            moved_ids = list(self.move_to_back)
            unmoved_keys = self.keys.detach()[unmoved_ids]
            moved_keys = self.keys.detach()[moved_ids]
            self.keys = Parameter(torch.cat([unmoved_keys, moved_keys], 0))
            unmoved_values = self.values.detach()[unmoved_ids]
            moved_values = self.values.detach()[moved_ids]
            self.values = Parameter(torch.cat([unmoved_values,moved_values], 0))
            self.move_to_back = set()

        if len(self.keys) > self.max_memory:
            # Expel oldest key to maintain total memory
            for key in self.keys[:-self.max_memory]:
                del self.key_cache[tuple(key.detach().cpu().numpy())]
            self.keys = Parameter(self.keys[-self.max_memory:].detach())
            self.values = Parameter(self.values[-self.max_memory:].detach())
        self.keys_to_be_inserted = None
        self.values_to_be_inserted = None
        params = [self.keys, self.values]
        self.optimizer = get_optimizer(self.opt_name,params,self.lr)
        self.kdtree.build_index(self.keys.detach().cpu().numpy())
        self.stale_index = False

    def lookup(self, lookup_key, update_flag=False):
        """
        Perform DND lookup
        if update_flag:
            add the nearest neighbor indexes to self.indexes_to_be_updated
        """
        lookup_key_np = lookup_key.detach().cpu().numpy()
        num_neighbors = min(self.num_neighbors, len(self.keys))
        lookup_indexes = self.kdtree.nn_index(lookup_key_np,num_neighbors)[0][0]
        output = 0
        kernel_sum = 0
        for i, index in enumerate(lookup_indexes):
            # Skip keys exactly equal to lookup_key (loss non-differentiable)
            if i == 0 and tuple(lookup_key_np[0]) in self.key_cache:
                continue
            if update_flag:
                self.indexes_to_be_updated.add(int(index))
            else:
                self.move_to_back.add(int(index))
            kernel_val = self.kernel(self.keys[int(index)], lookup_key[0])
            output += kernel_val * self.values[int(index)]
            kernel_sum += kernel_val
        output = output / kernel_sum
        return output

    def update_params(self):
        """
        Update self.keys and self.values via backprop
        Use self.indexes_to_be_updated to update self.key_cache accordingly
        Rebuild the index of self.kdtree
        """
        for index in self.indexes_to_be_updated:
            del self.key_cache[tuple(self.keys[index].detach().cpu().numpy())]
        self.optimizer.step()
        self.optimizer.zero_grad()
        for index in self.indexes_to_be_updated:
            self.key_cache[tuple(self.keys[index].detach().cpu().numpy())] = 0
        self.indexes_to_be_updated = set()
        self.kdtree.build_index(self.keys.detach().cpu().numpy())
        self.stale_index = False
Beispiel #19
0
class ALaCarteEmbedding():
    def __init__(self,
                 word2vec,
                 tokenize,
                 target_word_list=[],
                 ngram=[1],
                 window_size=1,
                 min_count=1):
        self.w2v = word2vec
        self.embedding_dim = self.w2v.vector_size
        self.vocab = set(self.w2v.vocab.keys())
        self.target_word_list = set(target_word_list)
        for word in self.target_word_list:
            self.vocab.add(word)
        self.tokenize = tokenize
        self.ngram = ngram
        self.window_size = window_size
        self.min_count = min_count

        self.c2v = {}
        self.target_counts = Counter()
        self.alacarte = {}
        self.flann = FLANN()

    def _get_embedding_vec(self, token):
        if type(token) == str:
            # for unigram
            if token in self.w2v.vocab:
                return self.w2v[token]
            else:
                return np.zeros(self.embedding_dim)
        else:
            # for ngram
            vec = np.zeros(self.embedding_dim)
            for t in token:
                if t in self.w2v.vocab:
                    vec += self.w2v[t]
            return vec

    def _make_context_vectors(self, tokens, n):
        if n > 1:
            token_list = ngram(tokens, n)
        else:
            token_list = tokens

        for target_token, context in window_without_center(
                token_list, self.window_size):
            context_vector = np.zeros(self.embedding_dim)
            if self.target_word_list and target_token not in self.vocab:
                # target_word_list is specified and each target token is not in the vocabulary
                continue

            for token in context:
                context_vector += self._get_embedding_vec(token)

            if target_token in self.c2v:
                self.c2v[target_token] += context_vector
            else:
                self.c2v[target_token] = context_vector
            self.vocab.add(target_token)
            self.target_counts[target_token] += 1

    def build(self, sentences):
        # compute each word’s context embedding
        for sentence in tqdm(sentences):
            tokens = self.tokenize(sentence)
            if len(tokens) > self.window_size * 2 + 1:
                for n in self.ngram:
                    self._make_context_vectors(tokens, n)

        # remove low frequency token
        for word, freq in self.target_counts.items():
            if freq < self.min_count and word in self.vocab:
                self.vocab.remove(word)

        # compute context-to-feature transform
        X_all = np.array([
            v / self.target_counts[k] for k, v in self.c2v.items()
            if k in self.vocab
        ])

        X = np.array([
            v / self.target_counts[k] for k, v in self.c2v.items()
            if k in self.w2v.vocab
        ])
        y = np.array(
            [self.w2v[k] for k, v in self.c2v.items() if k in self.w2v.vocab])
        self.A = LinearRegression(fit_intercept=False).fit(X, y).coef_.astype(
            np.float32)  # emb x emb

        # set a la carte embedding
        self.alacarte = normalize(X_all.dot(self.A.T))
        self.alacarte_vocab = [v for v in self.c2v.keys() if v in self.vocab]

        # make index for similaarity search
        self.flann.build_index(self.alacarte)

    def most_similar(self, word, topn=1):
        word_vec = self.alacarte[self.alacarte_vocab.index(word)]
        result, dists = self.flann.nn_index(word_vec, num_neighbors=topn)

        if topn != 1:
            result = result[0]
            dists = dists[0]

        output = []
        for i, index in enumerate(result.tolist()):
            text = "".join(self.alacarte_vocab[index])
            sim = dists[i]
            output.append((text, sim))
        return output

    def save(self, path):
        with open(path, "w") as f:
            f.write(f"{len(self.alacarte_vocab)} {self.embedding_dim}\n")
            for arr, word in zip(alc.alacarte, alc.alacarte_vocab):
                f.write(" ".join(["".join(word)] +
                                 [str(np.round(s, 6))
                                  for s in arr.tolist()]) + "\n")
Beispiel #20
0
class SifEmbedding():
    def __init__(self, embedding_file, a=1e-3, tokenize=tokenize):
        self.word2vec_file = embedding_file
        self.word2vec = KeyedVectors.load_word2vec_format(self.word2vec_file,
                                                          binary=True)
        self.embedding_dim = self.word2vec.vector_size
        self.tokenize = tokenize
        self.a = a
        self.sentence_list = []
        self.sentence_list_tokenized = []
        self.word_counts = Counter()
        self.sentence_embedding = np.array([])
        self.flann = FLANN()

    def _weighted_bow(self, sentence):
        vs = np.zeros(self.embedding_dim)
        sentence_length = 0

        for word in sentence:
            a_value = self.a / (self.a + self.word_counts[word]
                                )  # smooth inverse frequency, SIF
            try:
                vs = np.add(vs, np.multiply(
                    a_value, self.word2vec[word]))  # vs += sif * word_vector
                sentence_length += 1
            except Exception:
                logger.debug(f"Embedding Vector: {word} not found")

        if sentence_length != 0:
            vs = np.divide(vs, sentence_length)

        return vs

    def _fit_svd(self, X):
        svd = TruncatedSVD(n_components=1, n_iter=100, random_state=0)
        svd.fit(X)
        self.u = svd.components_
        return self

    def _transform_svd(self, X):
        vs = X - X.dot(self.u.transpose()) * self.u
        return vs

    def _fit_transform_svd(self, X):
        return self._fit_svd(X)._transform_svd(X)

    def fit(self, sentence_list):
        for sentence in sentence_list:
            self.sentence_list.append(sentence)
            self.sentence_list_tokenized.append(self.tokenize(sentence))

        self.word_counts = map_word_frequency(self.sentence_list_tokenized)

        # Alg.1 step 1
        sentence_vec = []
        for sentence in self.sentence_list_tokenized:
            sentence_vec.append(self._weighted_bow(sentence))

        # Alg.1 step 2
        self.sentence_embedding = self._fit_transform_svd(
            np.array(sentence_vec))

        # make index for similarity search
        self.flann.build_index(self.sentence_embedding)

    def infer_vector(self, sentence):
        return self._transform_svd(self._weighted_bow(self.tokenize(sentence)))

    def predict(self, sentence, topn=1):
        vs = self.infer_vector(sentence)
        result, dists = self.flann.nn_index(vs, num_neighbors=topn)

        if topn != 1:
            result = result[0]
            dists = dists[0]

        output = []
        for i, index in enumerate(result.tolist()):
            text = self.sentence_list[index]
            sim = dists[i]
            output.append([text, sim])
        return output