Ejemplo n.º 1
0
 def test_from_weight_array(self):
     mh = tm.Minhash(8, 42, 64)
     a = mh.from_weight_array(
         tm.VectorFloat([0.2, 0.6, 0.22, 0.26, 0.62, 0.66]))
     b = mh.from_weight_array(
         tm.VectorFloat([0.26, 0.6, 0.22, 0.26, 0.62, 1.0]))
     assert len(a) == 128
     assert round(mh.get_weighted_distance(a, b), 3) == 0.094
Ejemplo n.º 2
0
    def test_knn_graph(self):
        random.seed(42)
        data = []
        for _ in range(100):
            row = []
            for _ in range(10):
                row.append(random.randint(0, 20))
            data.append(tm.VectorUint(row))

        mh = tm.Minhash()
        lf = tm.LSHForest()

        lf.batch_add(mh.batch_from_sparse_binary_array(data))
        lf.index()

        f = tm.VectorUint()
        t = tm.VectorUint()
        w = tm.VectorFloat()

        lf.get_knn_graph(f, t, w, 10)
        assert len(f) == 1000
        assert t[0] == 0
        assert t[1] == 26
        assert t[2] == 36
        assert t[3] == 67
        assert t[4] == 33
        assert t[5] == 83
Ejemplo n.º 3
0
def main():
    """ Main function """

    # Use 128 permutations to create the MinHash
    enc = tm.Minhash(1024)
    lf = tm.LSHForest(128, file_backed=True)

    # d = 1000
    # n = 1000000
    d = 10000
    n = 1000

    # Generating some random data
    start = timer()
    for _ in range(n):
        # data.append(tm.VectorUint(np.random.randint(0, high=2, size=d)))
        lf.add(
            enc.from_sparse_binary_array(
                tm.VectorUint(np.random.randint(0, high=2, size=d))))

    print(f"Generating the data took {(timer() - start) * 1000}ms.")

    # Index the added data
    start = timer()
    lf.index()
    print(f"Indexing took {(timer() - start) * 1000}ms.")

    # Find the 10 nearest neighbors of the first entry
    start = timer()
    knng_from = tm.VectorUint()
    knng_to = tm.VectorUint()
    knng_weight = tm.VectorFloat()

    _ = lf.get_knn_graph(knng_from, knng_to, knng_weight, 10)
    print(f"The kNN search took {(timer() - start) * 1000}ms.")
Ejemplo n.º 4
0
def main():
    """ Main function """

    # Initialize and configure tmap
    dims = 256
    enc = tm.Minhash(len(DATA.columns), 42, dims)
    lf = tm.LSHForest(dims * 2, 32, weighted=True)

    fps = []
    for _, row in DATA.iterrows():
        fps.append(tm.VectorFloat(list(row)))

    lf.batch_add(enc.batch_from_weight_array(fps))
    lf.index()

    x, y, s, t, _ = tm.layout_from_lsh_forest(lf, CFG_TMAP)
    lf.clear()

    legend_labels = {(1, "PRAD"), (2, "LUAD"), (3, "BRCA"), (4, "KIRC"),
                     (5, "COAD")}

    # Create the plot
    faerun = Faerun(view="front", coords=False, legend_title="")
    faerun.add_scatter(
        "RNASEQ",
        {
            "x": x,
            "y": y,
            "c": LABELS,
            "labels": LABELS
        },
        colormap="tab10",
        point_scale=5.0,
        max_point_size=10,
        shader="smoothCircle",
        has_legend=True,
        categorical=True,
        legend_labels=legend_labels,
        legend_title="Tumor Types",
    )
    faerun.add_tree("RNASEQ_tree", {
        "from": s,
        "to": t
    },
                    point_helper="RNASEQ",
                    color="#666666")
    faerun.plot("rnaseq")
Ejemplo n.º 5
0
def main():
    """ Main function """

    # Use 128 permutations to create the MinHash
    enc = tm.Minhash(128)
    lf = tm.LSHForest(128)

    d = 1000
    n = 10000

    data = []

    # Generating some random data
    start = timer()
    for _ in range(n):
        data.append(tm.VectorUchar(np.random.randint(0, high=2, size=d)))
    print(f"Generating the data took {(timer() - start) * 1000}ms.")

    # Use batch_add to parallelize the insertion of the arrays
    start = timer()
    lf.batch_add(enc.batch_from_binary_array(data))
    print(f"Adding the data took {(timer() - start) * 1000}ms.")

    # Index the added data
    start = timer()
    lf.index()
    print(f"Indexing took {(timer() - start) * 1000}ms.")

    # Construct the k-nearest neighbour graph
    start = timer()
    knng_from = tm.VectorUint()
    knng_to = tm.VectorUint()
    knng_weight = tm.VectorFloat()

    _ = lf.get_knn_graph(knng_from, knng_to, knng_weight, 10)
    print(f"The kNN search took {(timer() - start) * 1000}ms.")
Ejemplo n.º 6
0
def main():
    """ Main function """

    # Initialize and configure tmap
    dims = 1024
    enc = tm.Minhash(28 * 28, 42, dims)
    lf = tm.LSHForest(dims * 2, 128)

    print("Converting images ...")
    for image in IMAGES:
        img = Image.fromarray(np.uint8(np.split(np.array(image), 28)))
        buffered = BytesIO()
        img.save(buffered, format="JPEG")
        img_str = base64.b64encode(buffered.getvalue())
        IMAGE_LABELS.append("data:image/bmp;base64," +
                            str(img_str).replace("b'", "").replace("'", ""))
    tmp = []
    for _, image in enumerate(IMAGES):
        tmp.append(tm.VectorFloat(image / 255))

    print("Running tmap ...")
    start = timer()
    lf.batch_add(enc.batch_from_weight_array(tmp))
    lf.index()
    x, y, s, t, _ = tm.layout_from_lsh_forest(lf, CFG)
    print("tmap: " + str(timer() - start))

    legend_labels = [
        (0, "T-shirt/top"),
        (1, "Trouser"),
        (2, "Pullover"),
        (3, "Dress"),
        (4, "Coat"),
        (5, "Sandal"),
        (6, "Shirt"),
        (7, "Sneaker"),
        (8, "Bag"),
        (9, "Ankle boot"),
    ]

    faerun = Faerun(clear_color="#111111", view="front", coords=False)
    faerun.add_scatter(
        "FMNIST",
        {
            "x": x,
            "y": y,
            "c": LABELS,
            "labels": IMAGE_LABELS
        },
        colormap="tab10",
        shader="smoothCircle",
        point_scale=2.5,
        max_point_size=10,
        has_legend=True,
        categorical=True,
        legend_labels=legend_labels,
    )
    faerun.add_tree("FMNIST_tree", {
        "from": s,
        "to": t
    },
                    point_helper="FMNIST",
                    color="#666666")
    faerun.plot("fmnist", template="url_image")