def main(): """ Main function """ # Initialize and configure tmap dims = 2048 enc = tm.Minhash(16384, 42, dims) lf = tm.LSHForest(dims * 2, 128, weighted=True) images = [] labels = [] image_labels = [] for file in os.listdir("coil_20"): labels.append(int(file.split("__")[0].replace("obj", "")) - 1) images.append(list(Image.open("coil_20/" + file).getdata())) for image in images: img = Image.fromarray(np.uint8(np.split(np.array(image), 128))) buffered = BytesIO() img.save(buffered, format="JPEG") img_str = base64.b64encode(buffered.getvalue()) image_labels.append("data:image/bmp;base64," + str(img_str).replace("b'", "").replace("'", "")) tmp = [] for _, image in enumerate(images): avg = sum(image) / sum([1 if x > 0 else 0 for x in image]) tmp.append([i / 255 for i in image]) lf.batch_add(enc.batch_from_weight_array(tmp)) lf.index() x, y, s, t, _ = tm.layout_from_lsh_forest(lf) faerun = Faerun(clear_color="#111111", view="front", coords=False) faerun.add_scatter( "COIL20", { "x": x, "y": y, "c": labels, "labels": image_labels }, colormap="tab20", shader="smoothCircle", point_scale=2.5, max_point_size=10, has_legend=True, categorical=True, ) faerun.add_tree("COIL20_tree", { "from": s, "to": t }, point_helper="COIL20", color="#666666") faerun.plot("coil", template="url_image")
def main(): coords, mqns, smiles = load() data = {"x": [], "y": [], "c": [], "s": [], "labels": smiles} for i, _ in enumerate(coords): data["x"].append(coords[i][0]) data["y"].append(coords[i][1]) data["c"].append(mqns[i][0]) data["s"].append(random.randint(1, 5)) df = pd.DataFrame.from_dict(data) faerun = Faerun() faerun.add_scatter( "drugbank", df, shader="smoothCircle", point_scale=2.0, colormap="jet", has_legend=True, categorical=False, ) with open("example.faerun", "wb+") as f: pickle.dump(faerun.create_python_data(), f, protocol=pickle.HIGHEST_PROTOCOL) info = ( "#Welcome to Fearun", "This is a small demonstration visualising drugs stored in [Drugbank](http://drugbank.ca) using coordinates generated by a PCA projection." "", "By the way, this info was generated using markdown, this means that you can easily:", "- Add lists", "- Build tables", "- Insert images and links", "- Add code examples", "- ...", ) host( "example.faerun", label_type="smiles", legend=True, theme="light", info="\n".join(info), )
def main(): """ Main function """ # Initialize and configure tmap dims = 1024 enc = tm.Minhash(dims) lf = tm.LSHForest(dims, 128) print("Converting images ...") for image in tqdm(IMAGES): img = Image.fromarray(np.uint8(np.split(np.array(image), 28))) buffered = BytesIO() img.save(buffered, format="JPEG") img_str = base64.b64encode(buffered.getvalue()) IMAGE_LABELS.append("data:image/bmp;base64," + str(img_str).replace("b'", "").replace("'", "")) tmp = [] for _, image in enumerate(IMAGES): avg = sum(image) / sum([1 if x > 0 else 0 for x in image]) tmp.append(tm.VectorUchar([1 if x >= avg else 0 for x in image])) # tmp.append(tm.VectorUint(image)) print("Running tmap ...") lf.batch_add(enc.batch_from_binary_array(tmp)) # LF.batch_add(ENC.batch_from_int_weight_array(tmp)) lf.index() x, y, s, t, _ = tm.layout_from_lsh_forest(lf, CFG) faerun = Faerun(clear_color="#111111", view="front", coords=False) faerun.add_scatter( "MNIST", { "x": x, "y": y, "c": LABELS, "labels": IMAGE_LABELS }, colormap="tab10", shader="smoothCircle", point_scale=2.5, max_point_size=10, has_legend=True, categorical=True, ) faerun.add_tree("MNIST_tree", { "from": s, "to": t }, point_helper="MNIST", color="#666666") faerun.plot("i3d-tmap-mnist", path="outputs", template="url_image")
def main(): coords, mqns, smiles = load() data = {'x': [], 'y': [], 'z': [], 'c': [], 'labels': smiles} for i, _ in enumerate(coords): data['x'].append(coords[i][0]) data['y'].append(coords[i][1]) data['z'].append(coords[i][2]) data['c'].append(mqns[i][0]) df = pd.DataFrame.from_dict(data) faerun = Faerun() faerun.add_scatter('drugbank', df, shader='sphere', point_scale=5.0, colormap='jet', has_legend=True, categorical=False) with open('example.faerun', 'wb+') as f: pickle.dump(faerun.create_python_data(), f, protocol=pickle.HIGHEST_PROTOCOL) info = ( '#Welcome to Fearun', 'This is a small demonstration visualising drugs stored in [Drugbank](http://drugbank.ca) using coordinates generated by a PCA projection.' '', 'By the way, this info was generated using markdown, this means that you can easily:', '- Add lists', '- Build tables', '- Insert images and links', '- Add code examples', '- ...') host('example.faerun', label_type='default', legend=True, theme='dark', info='\n'.join(info), view='free')
def df_to_faerun(df, x, y, s, t): print('Making Faerun plot') f = Faerun(view="front", coords=False) f.add_scatter( # No space in the string allowed for the name, use underscore!! # Cannot start with a number, it has to be a letter!! # the string is to be converted to a variable name, # therefore it has to be compatible with python variable naming scheme "SampleDock", { "x": x, "y": y, "c": [df['Score'], df['MolWeight'], df['LogP'], df['QED'], df['SAS']], "labels": df['SMILES'], }, shader="smoothCircle", point_scale=2.0, max_point_size=20, categorical=[False, False, False, False, False], colormap=["rainbow_r", "rainbow", "rainbow", "rainbow", "Blues"], series_title=[ "Docking Score", "Molecular Weight", "Lipophilicity", "Quantitative Estimate of Druglikeness", "Synthetic Accessibility Score", ], has_legend=True, ) # The first character of the name has to be a letter! f.add_tree("SnD_Tree", {"from": s, "to": t}, point_helper="SampleDock") print('Plotting finished') return f
def main(): """ Main function """ # Initialize and configure tmap dims = 256 enc = tm.Minhash(len(DATA.columns), 42, dims) lf = tm.LSHForest(dims * 2, 32, weighted=True) fps = [] for _, row in DATA.iterrows(): fps.append(tm.VectorFloat(list(row))) lf.batch_add(enc.batch_from_weight_array(fps)) lf.index() x, y, s, t, _ = tm.layout_from_lsh_forest(lf, CFG_TMAP) lf.clear() legend_labels = {(1, "PRAD"), (2, "LUAD"), (3, "BRCA"), (4, "KIRC"), (5, "COAD")} # Create the plot faerun = Faerun(view="front", coords=False, legend_title="") faerun.add_scatter( "RNASEQ", { "x": x, "y": y, "c": LABELS, "labels": LABELS }, colormap="tab10", point_scale=5.0, max_point_size=10, shader="smoothCircle", has_legend=True, categorical=True, legend_labels=legend_labels, legend_title="Tumor Types", ) faerun.add_tree("RNASEQ_tree", { "from": s, "to": t }, point_helper="RNASEQ", color="#666666") faerun.plot("rnaseq")
def main(): """ Main function """ # Building a k-nearest neighbor graph using annoy and cosine distance annoy = AnnoyIndex(len(DATA.columns), metric="angular") annoy_graph = [] for i, v in enumerate(DATA.values): annoy.add_item(i, v) annoy.build(10) for i in range(len(DATA)): for j in annoy.get_nns_by_item(i, 10): annoy_graph.append( (i, j, cosine_distance(DATA.values[i], DATA.values[j]))) # Creating the tmap layout x, y, s, t, _ = tm.layout_from_edge_list(len(DATA), annoy_graph) faerun = Faerun(view="front", coords=False) faerun.add_scatter( "MINIBOONE", { "x": x, "y": y, "c": LABELS, "labels": LABELS }, shader="smoothCircle", colormap="Set1", point_scale=2.0, max_point_size=20, has_legend=True, categorical=True, legend_labels={(0, "Noise"), (1, "Signal")}, ) faerun.add_tree( "MINIBOONE_tree", { "from": s, "to": t }, point_helper="MINIBOONE", color="#666666", ) faerun.plot("miniboone", template="default")
def main(): f = Faerun(clear_color="#222222", coords=True, view="front") x = np.linspace(0, 12.0, 326) y = np.sin(np.pi * x) z = np.cos(np.pi * x) c = np.random.randint(0, 2, len(x)) labels = [str(l) + "__Test" for l in c] data = {"x": x, "y": y, "z": z, "c": c, "labels": labels} f.add_scatter( "helix", data, shader="smoothCircle", colormap="Dark2", point_scale=5.0, categorical=True, has_legend=True, legend_labels=[(0, "Zero"), (1, "One")], selected_labels=["None", "Just a Small Test"], ) f.add_tree("helixtree", { "from": [1, 5, 6, 7], "to": [2, 7, 8, 9] }, point_helper="helix") f.plot("helix") with open("helix.faerun", "wb+") as handle: pickle.dump(f.create_python_data(), handle, protocol=pickle.HIGHEST_PROTOCOL)
def main(): """ Main function """ data = [] time = [] for path in PATHS: sample = fk.Sample(path) data.append(load_data(sample)) time.append(load_time(sample)) sources = [] for i, e in enumerate(data): sources.extend([i] * len(e)) data = np.concatenate(data, axis=0) time = np.concatenate(time, axis=0) d = len(data[0]) # Initialize a new Annoy object and index it using 10 trees annoy = AnnoyIndex(d, metric="angular") for i, v in enumerate(data): annoy.add_item(i, v) annoy.build(10) # Create the k-nearest neighbor graph (k = 10) edge_list = [] for i in range(len(data)): for j in annoy.get_nns_by_item(i, 10): edge_list.append((i, j, cosine_distance(data[i], data[j]))) # Compute the layout from the edge list x, y, s, t, _ = tm.layout_from_edge_list(len(data), edge_list) legend_labels = [(0, "No Target Probe Negative Control"), (1, "Stained Sample")] # Create the plot faerun = Faerun( view="front", coords=False, legend_title= "RNA Flow Cytometry: evaluation of detection sensitivity in low abundant intracellular RNA ", ) faerun.add_scatter( "CYTO", { "x": x, "y": y, "c": sources, "labels": sources }, point_scale=1.0, max_point_size=10, shader="smoothCircle", colormap="Set1", has_legend=True, categorical=True, legend_labels=legend_labels, legend_title="Cell Types", ) faerun.add_tree("CYTO_tree", { "from": s, "to": t }, point_helper="CYTO", color="#222222") faerun.plot("cyto")
mqns = np.array(mqns) pca = PCA(n_components=3) result = pca.fit_transform(mqns) return result, mqns, smiles coords, mqns, smiles = load() smiles = [s + "__This is a Test" for s in smiles] data = {"x": [], "y": [], "z": [], "c": [], "labels": smiles} for i, e in enumerate(coords): data["x"].append(coords[i][0]) data["y"].append(coords[i][1]) data["z"].append(coords[i][2]) data["c"].append(mqns[i][0]) df = pd.DataFrame.from_dict(data) faerun = Faerun(view="free", clear_color="#222222", impress='made with <a href="#">faerun</a>') faerun.add_scatter("drugbank", df, shader="sphere", point_scale=5.0, colormap="jet", has_legend=True) faerun.plot(template="smiles")
import numpy as np import pandas as pd from faerun import Faerun import pickle faerun = Faerun(view="free", clear_color="#222222") t = np.linspace(0, 12.0, 326) s = np.sin(np.pi * t) c = np.cos(np.pi * t) sizes = np.linspace(0.1, 2.0, 326) data = {"x": t, "y": s, "z": c, "c": t / max(t) * 100.0, "s": sizes} data2 = {"x": t, "y": c, "z": s, "c": t / max(t), "s": sizes, "labels": sizes} x = np.linspace(0, 12.0, 326) c = np.random.randint(0, 6, len(x)) data3 = { "x": x, "y": np.random.rand(len(x)) - 0.5, "z": np.random.rand(len(x)) - 0.5, "c": [c, x], "cs": np.random.rand(len(x)), "s": [np.random.rand(len(x)), np.random.rand(len(x))], "labels": c, } legend_labels = [(0, "A"), (1, "B"), (2, "C"), (3, "D"), (4, "E"), (5, "F")]
def main(da, dataset_path): """ Main function """ # Initialize and configure tmap # dims = 2048 # enc = tm.Minhash(int(da.emb_dim.count()), 42, dims) # lf = tm.LSHForest(dims * 4, 256, weighted=True) # enc = tm.Minhash() # lf = tm.LSHForest(weighted=True) labels = [] image_labels = [] # data = [] # da = da.isel(scene_id=0).stack(dict(tile_id=('x', 'y'))) da = da.sel(x=slice(-1000e3, None)).stack( dict(tile_id=("x", "y", "scene_id"))) # sel(tile_id=da.tile_id.values[:100]) # scene_ids = list(da.unstack().scene_id.values) x_ = [] y_ = [] z_ = [] for tile_id in tqdm(da.tile_id.values): try: da_ = da.sel(tile_id=tile_id) i0, j0 = da_.i0.item(), da_.j0.item() try: scene_id = da_.scene_id.item() except: scene_id = tile_id[-1] # x_.append(tile_id[0]) # y_.append(tile_id[1]) fn = _get_tile_image_path(dataset_path=dataset_path, scene_id=scene_id, i0=i0, j0=j0) # img_str = base64.b64encode(open(fn, "rb").read()) # "data:image/png;base64," + str(img_str).replace("b'", "").replace("'", "") # labels.append(scene_ids.index(scene_id)) # c = da_.isel(pca_dim=3).values # c = 0 # labels.append(c) image_labels.append("get_image?path=" + str(fn)) # data.append(da_.values) except FileNotFoundError: continue # lf.batch_add(enc.batch_from_weight_array(data)) # lf.index() # x, y, s, t, _ = tm.layout_from_lsh_forest(lf) # x = np.array(x_) # y = np.array(y_) # z = np.array(z_) x = da.isel(pca_dim=0).values y = da.isel(pca_dim=1).values z = da.isel(pca_dim=2).values c = da.isel(pca_dim=3).values # from sklearn.manifold import TSNE # X = da.values.T # X_embedded = TSNE(n_components=2,).fit_transform(X) # x, y = X[:,0], X[:,1] f = Faerun( clear_color="#111111", view="front", coords=True, x_title="pca dim 0", y_title="pca dim 1", ) f.add_scatter( "EMB", { "x": x, "y": y, "z": z, "c": c, "labels": image_labels }, colormap="jet", # shader="smoothCircle", shader="sphere", point_scale=5.0, max_point_size=10, has_legend=True, categorical=False, ) # f.add_tree( # "EMB_tree", {"from": s, "to": t}, point_helper="EMB", color="#666666" # ) with open("tmap.faerun", "wb+") as handle: pickle.dump(f.create_python_data(), handle, protocol=pickle.HIGHEST_PROTOCOL)
def main(): """ The main function """ df = pd.read_csv("papers.tar.xz") df.drop(df.tail(1).index, inplace=True) df["title"] = df["title"].apply(lambda t: t.replace("'", '"')) enc = tm.Minhash() lf = tm.LSHForest() ctr = Counter() texts = [] for _, row in df.iterrows(): text = re.sub(r"[^a-zA-Z-]+", " ", row["paper_text"]) text = [t.lower() for t in text.split(" ") if len(t) > 2] ctr.update(text) texts.append(text) # Remove the top n words n = 6000 ctr = ctr.most_common()[: -(len(ctr) - n) - 1 : -1] # Make it fast using a lookup map all_words = {} for i, (key, _) in enumerate(ctr): all_words[key] = i # Create the fingerprints and also check whether the word # "deep" is found in the document fingerprints = [] has_word = [] for text in texts: if "deep" in text: has_word.append(1) else: has_word.append(0) fingerprint = [] for t in text: if t in all_words: fingerprint.append(all_words[t]) fingerprints.append(tm.VectorUint(fingerprint)) # Index the article fingerprints lf.batch_add(enc.batch_from_sparse_binary_array(fingerprints)) lf.index() # Create the tmap config = tm.LayoutConfiguration() config.k = 100 x, y, s, t, _ = tm.layout_from_lsh_forest(lf, config=config) faerun = Faerun( view="front", coords=False, legend_title="", legend_number_format="{:.0f}" ) # Add a scatter that is bigger than the one above, to add colored # circles. faerun.add_scatter( "NIPS_word", {"x": x, "y": y, "c": has_word, "labels": df["title"]}, colormap="Set1", point_scale=7.5, max_point_size=25, shader="smoothCircle", has_legend=True, categorical=True, legend_title="Contains word<br/>'deep'", legend_labels=[(0, "No"), (1, "Yes")], interactive=False, ) # Add a scatter that is colored by year on top faerun.add_scatter( "NIPS", {"x": x, "y": y, "c": df["year"], "labels": df["title"]}, colormap="gray", point_scale=5.0, max_point_size=20, shader="smoothCircle", has_legend=True, legend_title="Year of<br/>Publication", ) faerun.add_tree( "NIPS_tree", {"from": s, "to": t}, point_helper="NIPS", color="#666666" ) faerun.plot("nips_papers")
mqns, smiles = get_fingerprint(drugbank) mqns = np.array(mqns) pca = PCA(n_components=3) result = pca.fit_transform(mqns) return result, mqns, smiles coords, mqns, smiles = load() smiles = [s + "__This is a Test" for s in smiles] data = {"x": [], "y": [], "z": [], "c": [], "labels": smiles} for i, e in enumerate(coords): data["x"].append(coords[i][0]) data["y"].append(coords[i][1]) data["z"].append(coords[i][2]) data["c"].append(mqns[i][0]) df = pd.DataFrame.from_dict(data) faerun = Faerun(view="free", clear_color="#222222") faerun.add_scatter("drugbank", df, shader="sphere", point_scale=5.0, colormap="jet", has_legend=True) faerun.plot(template="smiles")
import numpy as np import pandas as pd from faerun import Faerun faerun = Faerun(view='free', shader='circle') t = np.linspace(0, 12.0, 326) s = np.sin(np.pi * t) c = np.cos(np.pi * t) data = {'x': t, 'y': s, 'z': c, 'c': t / max(t)} df = pd.DataFrame.from_dict(data) faerun.plot(df)
def main(): """ Main function """ # Initialize and configure tmap dims = 1024 enc = tm.Minhash(28 * 28, 42, dims) lf = tm.LSHForest(dims * 2, 128) print("Converting images ...") for image in IMAGES: img = Image.fromarray(np.uint8(np.split(np.array(image), 28))) buffered = BytesIO() img.save(buffered, format="JPEG") img_str = base64.b64encode(buffered.getvalue()) IMAGE_LABELS.append("data:image/bmp;base64," + str(img_str).replace("b'", "").replace("'", "")) tmp = [] for _, image in enumerate(IMAGES): tmp.append(tm.VectorFloat(image / 255)) print("Running tmap ...") start = timer() lf.batch_add(enc.batch_from_weight_array(tmp)) lf.index() x, y, s, t, _ = tm.layout_from_lsh_forest(lf, CFG) print("tmap: " + str(timer() - start)) legend_labels = [ (0, "T-shirt/top"), (1, "Trouser"), (2, "Pullover"), (3, "Dress"), (4, "Coat"), (5, "Sandal"), (6, "Shirt"), (7, "Sneaker"), (8, "Bag"), (9, "Ankle boot"), ] faerun = Faerun(clear_color="#111111", view="front", coords=False) faerun.add_scatter( "FMNIST", { "x": x, "y": y, "c": LABELS, "labels": IMAGE_LABELS }, colormap="tab10", shader="smoothCircle", point_scale=2.5, max_point_size=10, has_legend=True, categorical=True, legend_labels=legend_labels, ) faerun.add_tree("FMNIST_tree", { "from": s, "to": t }, point_helper="FMNIST", color="#666666") faerun.plot("fmnist", template="url_image")
def main(): f = Faerun(title="faerun-example", clear_color="#222222", coords=False, view="free") x = np.linspace(0, 12.0, 326) y = np.sin(np.pi * x) z = np.cos(np.pi * x) c = np.random.randint(0, 2, len(x)) labels = [""] * len(c) for i, e in enumerate(c): labels[i] = str(e) + "__" + str(i % 20) data = {"x": x, "y": y, "z": z, "c": c, "labels": labels} f.add_scatter( "helix", data, shader="sphere", colormap="Dark2", point_scale=5.0, categorical=True, has_legend=True, legend_labels=[(0, "Zero"), (1, "One")], ) f.plot("helix") with open("helix.faerun", "wb+") as handle: pickle.dump(f.create_python_data(), handle, protocol=pickle.HIGHEST_PROTOCOL) def custom_label_formatter(label, index, name): return f"Example: {label} ({index}, {name})" def custom_link_formatter(label, index, name): return f"https://www.google.com/search?q={label}" info = ( "#Welcome to Fearun", "This is a small Faerun example." "", "Yay markdown! This means that you can easily:", "- Add lists", "- Build tables", "- Insert images and links", "- Add code examples", "- ...", ) host( "helix.faerun", label_type="default", title="Helix", theme="dark", label_formatter=custom_label_formatter, link_formatter=custom_link_formatter, info="\n".join(info), )
def load(): drugbank = [] with open('examples/drugbank.smi') as f: for line in f.readlines(): drugbank.append(line.split()[0].strip()) mqns, smiles = get_fingerprint(drugbank) mqns = np.array(mqns) pca = PCA(n_components=2) result = pca.fit_transform(mqns) return result, mqns, smiles faerun = Faerun(view='front', shader='circle') coords, mqns, smiles = load() data = { 'x': coords[:, 0], 'y': coords[:, 1], 'c': mqns[:, 22] / max(mqns[22]), 'smiles': smiles } df = pd.DataFrame.from_dict(data) faerun.plot(df, colormap='viridis')
def main(): """ Main function """ # Initialize and configure tmap dims = 2048 enc = tm.Minhash(dims) lf = tm.LSHForest(dims, 128, store=True) fps = [] # fps_umap = [] for row in DATA: fps.append(tm.VectorUint(list(row))) lf.batch_add(enc.batch_from_sparse_binary_array(fps)) lf.index() x_tmap, y_tmap, s, t, _ = tm.layout_from_lsh_forest(lf, CFG_TMAP) lf.clear() # Prepare custom color map tab10 = plt.get_cmap("tab10").colors colors_gray = [(0.2, 0.2, 0.2), tab10[0], tab10[1], tab10[2], tab10[3], tab10[4]] custom_cm_gray = LinearSegmentedColormap.from_list("custom_cm_gray", colors_gray, N=len(colors_gray)) legend_labels = [ (1, "Rudyard Kipling"), (2, "Herbert George Wells"), (3, "Charles Darwin"), (4, "George Bernard Shaw"), (5, "William Wymark Jacobs"), (0, "Other"), ] faerun = Faerun( clear_color="#111111", view="front", coords=False, alpha_blending=True, legend_title="", ) faerun.add_scatter( "gutenberg", { "x": x_tmap, "y": y_tmap, "c": LABELS, "labels": FAERUN_LABELS }, colormap=custom_cm_gray, point_scale=4.2, max_point_size=10, has_legend=True, categorical=True, legend_title="Authors", legend_labels=legend_labels, shader="smoothCircle", selected_labels=["Author", "Title"], ) faerun.add_tree( "gutenberg_tree", { "from": s, "to": t }, point_helper="gutenberg", color="#222222", ) faerun.plot("gutenberg", template="default")
def main(): """ The main function """ df = pd.read_csv("drugbank.csv").dropna(subset=["SMILES"]).reset_index( drop=True) enc = MHFPEncoder() lf = tm.LSHForest(2048, 128) fps = [] labels = [] groups = [] tpsa = [] logp = [] mw = [] h_acceptors = [] h_donors = [] ring_count = [] is_lipinski = [] has_coc = [] has_sa = [] has_tz = [] substruct_coc = AllChem.MolFromSmiles("COC") substruct_sa = AllChem.MolFromSmiles("NS(=O)=O") substruct_tz = AllChem.MolFromSmiles("N1N=NN=C1") total = len(df) for i, row in df.iterrows(): if i % 1000 == 0 and i > 0: print(f"{round(100 * (i / total))}% done ...") smiles = row[6] mol = AllChem.MolFromSmiles(smiles) if mol and mol.GetNumAtoms() > 5 and smiles.count(".") < 2: fps.append(tm.VectorUint(enc.encode_mol(mol, min_radius=0))) labels.append( f'{smiles}__<a href="https://www.drugbank.ca/drugs/{row[0]}" target="_blank">{row[0]}</a>__{row[1]}' .replace("'", "")) groups.append(row[3].split(";")[0]) tpsa.append(Descriptors.TPSA(mol)) logp.append(Descriptors.MolLogP(mol)) mw.append(Descriptors.MolWt(mol)) h_acceptors.append(Descriptors.NumHAcceptors(mol)) h_donors.append(Descriptors.NumHDonors(mol)) ring_count.append(Descriptors.RingCount(mol)) is_lipinski.append(lipinski_pass(mol)) has_coc.append(mol.HasSubstructMatch(substruct_coc)) has_sa.append(mol.HasSubstructMatch(substruct_sa)) has_tz.append(mol.HasSubstructMatch(substruct_tz)) # Create the labels and the integer encoded array for the groups, # as they're categorical labels_groups, groups = Faerun.create_categories(groups) tpsa_ranked = ss.rankdata(np.array(tpsa) / max(tpsa)) / len(tpsa) logp_ranked = ss.rankdata(np.array(logp) / max(logp)) / len(logp) mw_ranked = ss.rankdata(np.array(mw) / max(mw)) / len(mw) h_acceptors_ranked = ss.rankdata( np.array(h_acceptors) / max(h_acceptors)) / len(h_acceptors) h_donors_ranked = ss.rankdata( np.array(h_donors) / max(h_donors)) / len(h_donors) ring_count_ranked = ss.rankdata( np.array(ring_count) / max(ring_count)) / len(ring_count) lf.batch_add(fps) lf.index() cfg = tm.LayoutConfiguration() cfg.k = 100 # cfg.sl_extra_scaling_steps = 1 cfg.sl_repeats = 2 cfg.mmm_repeats = 2 cfg.node_size = 2 x, y, s, t, _ = tm.layout_from_lsh_forest(lf, config=cfg) # Define a colormap highlighting approved vs non-approved custom_cmap = ListedColormap( [ "#2ecc71", "#9b59b6", "#ecf0f1", "#e74c3c", "#e67e22", "#f1c40f", "#95a5a6" ], name="custom", ) bin_cmap = ListedColormap(["#e74c3c", "#2ecc71"], name="bin_cmap") f = Faerun( clear_color="#222222", coords=False, view="front", impress= 'made with <a href="http://tmap.gdb.tools" target="_blank">tmap</a><br />and <a href="https://github.com/reymond-group/faerun-python" target="_blank">faerun</a><br /><a href="https://gist.github.com/daenuprobst/5cddd0159c0cf4758fb16b4b4acbef89">source</a>', ) f.add_scatter( "Drugbank", { "x": x, "y": y, "c": [ groups, is_lipinski, has_coc, has_sa, has_tz, tpsa_ranked, logp_ranked, mw_ranked, h_acceptors_ranked, h_donors_ranked, ring_count_ranked, ], "labels": labels, }, shader="smoothCircle", colormap=[ custom_cmap, bin_cmap, bin_cmap, bin_cmap, bin_cmap, "viridis", "viridis", "viridis", "viridis", "viridis", "viridis", ], point_scale=2.5, categorical=[ True, True, True, True, True, False, False, False, False, False ], has_legend=True, legend_labels=[ labels_groups, [(0, "No"), (1, "Yes")], [(0, "No"), (1, "Yes")], [(0, "No"), (1, "Yes")], [(0, "No"), (1, "Yes")], ], selected_labels=["SMILES", "Drugbank ID", "Name"], series_title=[ "Group", "Lipinski", "Ethers", "Sulfonamides", "Tetrazoles", "TPSA", "logP", "Mol Weight", "H Acceptors", "H Donors", "Ring Count", ], max_legend_label=[ None, None, None, None, None, str(round(max(tpsa))), str(round(max(logp))), str(round(max(mw))), str(round(max(h_acceptors))), str(round(max(h_donors))), str(round(max(ring_count))), ], min_legend_label=[ None, None, None, None, None, str(round(min(tpsa))), str(round(min(logp))), str(round(min(mw))), str(round(min(h_acceptors))), str(round(min(h_donors))), str(round(min(ring_count))), ], title_index=2, legend_title="", ) f.add_tree("drugbanktree", {"from": s, "to": t}, point_helper="Drugbank") f.plot("drugbank", template="smiles")
def main(): """ Main function """ dims = 512 lf = tm.LSHForest(dims, 128, store=True) # Due to the large data size (> 1GB) the following files are not provided directly smiles, target_class, activity, chembl_id = pickle.load( open("chembl.pickle", "rb")) labels = [] for i, s in enumerate(smiles): labels.append( s + "__" + chembl_id[i] + "__" + f'<a target="_blank" href="https://www.ebi.ac.uk/chembl/compound_report_card/{chembl_id[i]}">{chembl_id[i]}</a>' ) lf.restore("chembl.dat") target_class_map = dict([(y, x + 1) for x, y in enumerate(sorted(set(target_class)))]) classes = [ "enzyme", "kinase", "protease", "cytochrome p450", "ion channel", "transporter", "transcription factor", "membrane receptor", "epigenetic regulator", ] i = 0 for key, value in target_class_map.items(): if key not in classes: target_class_map[key] = 7 else: target_class_map[key] = i i += 1 if i == 7: i = 8 cfg = tm.LayoutConfiguration() cfg.node_size = 1 / 70 cfg.mmm_repeats = 2 cfg.sl_repeats = 2 start = timer() x, y, s, t, _ = tm.layout_from_lsh_forest(lf, cfg) end = timer() print(end - start) activity = np.array(activity) activity = np.maximum(0.0, activity) activity = np.minimum(100.0, activity) activity = 10.0 - activity legend_labels = [ (0, "Cytochrome p450"), (1, "Other Enzyme"), (2, "Epigenetic Regulator"), (3, "Ion Channel"), (4, "Kinase"), (5, "Membrane Receptor"), (6, "Protease"), (8, "Transcription Factor"), (9, "Transporter"), (7, "Other"), ] vals = [int(target_class_map[x]) for x in target_class] faerun = Faerun(view="front", coords=False) faerun.add_scatter( "chembl", { "x": x, "y": y, "c": vals, "labels": labels }, colormap="tab10", point_scale=1.0, max_point_size=10, has_legend=True, categorical=True, shader="smoothCircle", legend_labels=legend_labels, title_index=1, ) faerun.add_tree("chembl_tree", { "from": s, "to": t }, point_helper="chembl", color="#222222") faerun.plot("chembl", template="smiles")
pca = PCA(n_components=3) result = pca.fit_transform(mqns) return result, mqns, smiles coords, mqns, smiles = load() data = { 'x': np.random.normal(0.0, 6.0, len(smiles) * 150), 'y': np.random.normal(0.0, 6.0, len(smiles) * 150), 'z': np.random.normal(0.0, 6.0, len(smiles) * 150), 'c': np.random.normal(0.0, 6.0, len(smiles) * 150), 'smiles': smiles * 150 } print(len(data['x'])) print(len(data['y'])) print(len(data['z'])) print(len(data['c'])) print(len(data['smiles'])) df = pd.DataFrame.from_dict(data) faerun = Faerun(view='free', shader='sphere') faerun.plot(df, colormap='viridis')