def test_from_weight_array(self): mh = tm.Minhash(8, 42, 64) a = mh.from_weight_array( tm.VectorFloat([0.2, 0.6, 0.22, 0.26, 0.62, 0.66])) b = mh.from_weight_array( tm.VectorFloat([0.26, 0.6, 0.22, 0.26, 0.62, 1.0])) assert len(a) == 128 assert round(mh.get_weighted_distance(a, b), 3) == 0.094
def test_knn_graph(self): random.seed(42) data = [] for _ in range(100): row = [] for _ in range(10): row.append(random.randint(0, 20)) data.append(tm.VectorUint(row)) mh = tm.Minhash() lf = tm.LSHForest() lf.batch_add(mh.batch_from_sparse_binary_array(data)) lf.index() f = tm.VectorUint() t = tm.VectorUint() w = tm.VectorFloat() lf.get_knn_graph(f, t, w, 10) assert len(f) == 1000 assert t[0] == 0 assert t[1] == 26 assert t[2] == 36 assert t[3] == 67 assert t[4] == 33 assert t[5] == 83
def main(): """ Main function """ # Use 128 permutations to create the MinHash enc = tm.Minhash(1024) lf = tm.LSHForest(128, file_backed=True) # d = 1000 # n = 1000000 d = 10000 n = 1000 # Generating some random data start = timer() for _ in range(n): # data.append(tm.VectorUint(np.random.randint(0, high=2, size=d))) lf.add( enc.from_sparse_binary_array( tm.VectorUint(np.random.randint(0, high=2, size=d)))) print(f"Generating the data took {(timer() - start) * 1000}ms.") # Index the added data start = timer() lf.index() print(f"Indexing took {(timer() - start) * 1000}ms.") # Find the 10 nearest neighbors of the first entry start = timer() knng_from = tm.VectorUint() knng_to = tm.VectorUint() knng_weight = tm.VectorFloat() _ = lf.get_knn_graph(knng_from, knng_to, knng_weight, 10) print(f"The kNN search took {(timer() - start) * 1000}ms.")
def main(): """ Main function """ # Initialize and configure tmap dims = 256 enc = tm.Minhash(len(DATA.columns), 42, dims) lf = tm.LSHForest(dims * 2, 32, weighted=True) fps = [] for _, row in DATA.iterrows(): fps.append(tm.VectorFloat(list(row))) lf.batch_add(enc.batch_from_weight_array(fps)) lf.index() x, y, s, t, _ = tm.layout_from_lsh_forest(lf, CFG_TMAP) lf.clear() legend_labels = {(1, "PRAD"), (2, "LUAD"), (3, "BRCA"), (4, "KIRC"), (5, "COAD")} # Create the plot faerun = Faerun(view="front", coords=False, legend_title="") faerun.add_scatter( "RNASEQ", { "x": x, "y": y, "c": LABELS, "labels": LABELS }, colormap="tab10", point_scale=5.0, max_point_size=10, shader="smoothCircle", has_legend=True, categorical=True, legend_labels=legend_labels, legend_title="Tumor Types", ) faerun.add_tree("RNASEQ_tree", { "from": s, "to": t }, point_helper="RNASEQ", color="#666666") faerun.plot("rnaseq")
def main(): """ Main function """ # Use 128 permutations to create the MinHash enc = tm.Minhash(128) lf = tm.LSHForest(128) d = 1000 n = 10000 data = [] # Generating some random data start = timer() for _ in range(n): data.append(tm.VectorUchar(np.random.randint(0, high=2, size=d))) print(f"Generating the data took {(timer() - start) * 1000}ms.") # Use batch_add to parallelize the insertion of the arrays start = timer() lf.batch_add(enc.batch_from_binary_array(data)) print(f"Adding the data took {(timer() - start) * 1000}ms.") # Index the added data start = timer() lf.index() print(f"Indexing took {(timer() - start) * 1000}ms.") # Construct the k-nearest neighbour graph start = timer() knng_from = tm.VectorUint() knng_to = tm.VectorUint() knng_weight = tm.VectorFloat() _ = lf.get_knn_graph(knng_from, knng_to, knng_weight, 10) print(f"The kNN search took {(timer() - start) * 1000}ms.")
def main(): """ Main function """ # Initialize and configure tmap dims = 1024 enc = tm.Minhash(28 * 28, 42, dims) lf = tm.LSHForest(dims * 2, 128) print("Converting images ...") for image in IMAGES: img = Image.fromarray(np.uint8(np.split(np.array(image), 28))) buffered = BytesIO() img.save(buffered, format="JPEG") img_str = base64.b64encode(buffered.getvalue()) IMAGE_LABELS.append("data:image/bmp;base64," + str(img_str).replace("b'", "").replace("'", "")) tmp = [] for _, image in enumerate(IMAGES): tmp.append(tm.VectorFloat(image / 255)) print("Running tmap ...") start = timer() lf.batch_add(enc.batch_from_weight_array(tmp)) lf.index() x, y, s, t, _ = tm.layout_from_lsh_forest(lf, CFG) print("tmap: " + str(timer() - start)) legend_labels = [ (0, "T-shirt/top"), (1, "Trouser"), (2, "Pullover"), (3, "Dress"), (4, "Coat"), (5, "Sandal"), (6, "Shirt"), (7, "Sneaker"), (8, "Bag"), (9, "Ankle boot"), ] faerun = Faerun(clear_color="#111111", view="front", coords=False) faerun.add_scatter( "FMNIST", { "x": x, "y": y, "c": LABELS, "labels": IMAGE_LABELS }, colormap="tab10", shader="smoothCircle", point_scale=2.5, max_point_size=10, has_legend=True, categorical=True, legend_labels=legend_labels, ) faerun.add_tree("FMNIST_tree", { "from": s, "to": t }, point_helper="FMNIST", color="#666666") faerun.plot("fmnist", template="url_image")