예제 #1
0
def main():
    """ Main function """

    # Initialize and configure tmap
    dims = 2048
    enc = tm.Minhash(16384, 42, dims)
    lf = tm.LSHForest(dims * 2, 128, weighted=True)

    images = []
    labels = []
    image_labels = []

    for file in os.listdir("coil_20"):
        labels.append(int(file.split("__")[0].replace("obj", "")) - 1)
        images.append(list(Image.open("coil_20/" + file).getdata()))

    for image in images:
        img = Image.fromarray(np.uint8(np.split(np.array(image), 128)))
        buffered = BytesIO()
        img.save(buffered, format="JPEG")
        img_str = base64.b64encode(buffered.getvalue())
        image_labels.append("data:image/bmp;base64," +
                            str(img_str).replace("b'", "").replace("'", ""))

    tmp = []
    for _, image in enumerate(images):
        avg = sum(image) / sum([1 if x > 0 else 0 for x in image])
        tmp.append([i / 255 for i in image])

    lf.batch_add(enc.batch_from_weight_array(tmp))
    lf.index()

    x, y, s, t, _ = tm.layout_from_lsh_forest(lf)

    faerun = Faerun(clear_color="#111111", view="front", coords=False)
    faerun.add_scatter(
        "COIL20",
        {
            "x": x,
            "y": y,
            "c": labels,
            "labels": image_labels
        },
        colormap="tab20",
        shader="smoothCircle",
        point_scale=2.5,
        max_point_size=10,
        has_legend=True,
        categorical=True,
    )
    faerun.add_tree("COIL20_tree", {
        "from": s,
        "to": t
    },
                    point_helper="COIL20",
                    color="#666666")
    faerun.plot("coil", template="url_image")
예제 #2
0
def main():
    coords, mqns, smiles = load()

    data = {"x": [], "y": [], "c": [], "s": [], "labels": smiles}

    for i, _ in enumerate(coords):
        data["x"].append(coords[i][0])
        data["y"].append(coords[i][1])
        data["c"].append(mqns[i][0])
        data["s"].append(random.randint(1, 5))

    df = pd.DataFrame.from_dict(data)

    faerun = Faerun()
    faerun.add_scatter(
        "drugbank",
        df,
        shader="smoothCircle",
        point_scale=2.0,
        colormap="jet",
        has_legend=True,
        categorical=False,
    )

    with open("example.faerun", "wb+") as f:
        pickle.dump(faerun.create_python_data(), f, protocol=pickle.HIGHEST_PROTOCOL)

    info = (
        "#Welcome to Fearun",
        "This is a small demonstration visualising drugs stored in [Drugbank](http://drugbank.ca) using coordinates generated by a PCA projection."
        "",
        "By the way, this info was generated using markdown, this means that you can easily:",
        "- Add lists",
        "- Build tables",
        "- Insert images and links",
        "- Add code examples",
        "- ...",
    )

    host(
        "example.faerun",
        label_type="smiles",
        legend=True,
        theme="light",
        info="\n".join(info),
    )
예제 #3
0
def main():
    """ Main function """

    # Initialize and configure tmap
    dims = 1024
    enc = tm.Minhash(dims)
    lf = tm.LSHForest(dims, 128)

    print("Converting images ...")
    for image in tqdm(IMAGES):
        img = Image.fromarray(np.uint8(np.split(np.array(image), 28)))
        buffered = BytesIO()
        img.save(buffered, format="JPEG")
        img_str = base64.b64encode(buffered.getvalue())
        IMAGE_LABELS.append("data:image/bmp;base64," +
                            str(img_str).replace("b'", "").replace("'", ""))
    tmp = []
    for _, image in enumerate(IMAGES):
        avg = sum(image) / sum([1 if x > 0 else 0 for x in image])
        tmp.append(tm.VectorUchar([1 if x >= avg else 0 for x in image]))
        # tmp.append(tm.VectorUint(image))

    print("Running tmap ...")
    lf.batch_add(enc.batch_from_binary_array(tmp))
    # LF.batch_add(ENC.batch_from_int_weight_array(tmp))
    lf.index()

    x, y, s, t, _ = tm.layout_from_lsh_forest(lf, CFG)

    faerun = Faerun(clear_color="#111111", view="front", coords=False)
    faerun.add_scatter(
        "MNIST",
        {
            "x": x,
            "y": y,
            "c": LABELS,
            "labels": IMAGE_LABELS
        },
        colormap="tab10",
        shader="smoothCircle",
        point_scale=2.5,
        max_point_size=10,
        has_legend=True,
        categorical=True,
    )
    faerun.add_tree("MNIST_tree", {
        "from": s,
        "to": t
    },
                    point_helper="MNIST",
                    color="#666666")
    faerun.plot("i3d-tmap-mnist", path="outputs", template="url_image")
예제 #4
0
def main():
    coords, mqns, smiles = load()

    data = {'x': [], 'y': [], 'z': [], 'c': [], 'labels': smiles}

    for i, _ in enumerate(coords):
        data['x'].append(coords[i][0])
        data['y'].append(coords[i][1])
        data['z'].append(coords[i][2])
        data['c'].append(mqns[i][0])

    df = pd.DataFrame.from_dict(data)

    faerun = Faerun()
    faerun.add_scatter('drugbank',
                       df,
                       shader='sphere',
                       point_scale=5.0,
                       colormap='jet',
                       has_legend=True,
                       categorical=False)

    with open('example.faerun', 'wb+') as f:
        pickle.dump(faerun.create_python_data(),
                    f,
                    protocol=pickle.HIGHEST_PROTOCOL)

    info = (
        '#Welcome to Fearun',
        'This is a small demonstration visualising drugs stored in [Drugbank](http://drugbank.ca) using coordinates generated by a PCA projection.'
        '',
        'By the way, this info was generated using markdown, this means that you can easily:',
        '- Add lists', '- Build tables', '- Insert images and links',
        '- Add code examples', '- ...')

    host('example.faerun',
         label_type='default',
         legend=True,
         theme='dark',
         info='\n'.join(info),
         view='free')
예제 #5
0
def df_to_faerun(df, x, y, s, t):
    print('Making Faerun plot')
    f = Faerun(view="front", coords=False)
    f.add_scatter(
        # No space in the string allowed for the name, use underscore!!
        # Cannot start with a number, it has to be a letter!!
        # the string is to be converted to a variable name,
        # therefore it has to be compatible with python variable naming scheme
        "SampleDock",
        {
            "x": x,
            "y": y,
            "c":
            [df['Score'], df['MolWeight'], df['LogP'], df['QED'], df['SAS']],
            "labels": df['SMILES'],
        },
        shader="smoothCircle",
        point_scale=2.0,
        max_point_size=20,
        categorical=[False, False, False, False, False],
        colormap=["rainbow_r", "rainbow", "rainbow", "rainbow", "Blues"],
        series_title=[
            "Docking Score",
            "Molecular Weight",
            "Lipophilicity",
            "Quantitative Estimate of Druglikeness",
            "Synthetic Accessibility Score",
        ],
        has_legend=True,
    )
    # The first character of the name has to be a letter!
    f.add_tree("SnD_Tree", {"from": s, "to": t}, point_helper="SampleDock")
    print('Plotting finished')
    return f
예제 #6
0
def main():
    """ Main function """

    # Initialize and configure tmap
    dims = 256
    enc = tm.Minhash(len(DATA.columns), 42, dims)
    lf = tm.LSHForest(dims * 2, 32, weighted=True)

    fps = []
    for _, row in DATA.iterrows():
        fps.append(tm.VectorFloat(list(row)))

    lf.batch_add(enc.batch_from_weight_array(fps))
    lf.index()

    x, y, s, t, _ = tm.layout_from_lsh_forest(lf, CFG_TMAP)
    lf.clear()

    legend_labels = {(1, "PRAD"), (2, "LUAD"), (3, "BRCA"), (4, "KIRC"),
                     (5, "COAD")}

    # Create the plot
    faerun = Faerun(view="front", coords=False, legend_title="")
    faerun.add_scatter(
        "RNASEQ",
        {
            "x": x,
            "y": y,
            "c": LABELS,
            "labels": LABELS
        },
        colormap="tab10",
        point_scale=5.0,
        max_point_size=10,
        shader="smoothCircle",
        has_legend=True,
        categorical=True,
        legend_labels=legend_labels,
        legend_title="Tumor Types",
    )
    faerun.add_tree("RNASEQ_tree", {
        "from": s,
        "to": t
    },
                    point_helper="RNASEQ",
                    color="#666666")
    faerun.plot("rnaseq")
예제 #7
0
def main():
    """ Main function """

    # Building a k-nearest neighbor graph using annoy and cosine distance
    annoy = AnnoyIndex(len(DATA.columns), metric="angular")
    annoy_graph = []

    for i, v in enumerate(DATA.values):
        annoy.add_item(i, v)
    annoy.build(10)

    for i in range(len(DATA)):
        for j in annoy.get_nns_by_item(i, 10):
            annoy_graph.append(
                (i, j, cosine_distance(DATA.values[i], DATA.values[j])))

    # Creating the tmap layout
    x, y, s, t, _ = tm.layout_from_edge_list(len(DATA), annoy_graph)

    faerun = Faerun(view="front", coords=False)
    faerun.add_scatter(
        "MINIBOONE",
        {
            "x": x,
            "y": y,
            "c": LABELS,
            "labels": LABELS
        },
        shader="smoothCircle",
        colormap="Set1",
        point_scale=2.0,
        max_point_size=20,
        has_legend=True,
        categorical=True,
        legend_labels={(0, "Noise"), (1, "Signal")},
    )
    faerun.add_tree(
        "MINIBOONE_tree",
        {
            "from": s,
            "to": t
        },
        point_helper="MINIBOONE",
        color="#666666",
    )
    faerun.plot("miniboone", template="default")
예제 #8
0
def main():
    f = Faerun(clear_color="#222222", coords=True, view="front")

    x = np.linspace(0, 12.0, 326)
    y = np.sin(np.pi * x)
    z = np.cos(np.pi * x)
    c = np.random.randint(0, 2, len(x))

    labels = [str(l) + "__Test" for l in c]

    data = {"x": x, "y": y, "z": z, "c": c, "labels": labels}

    f.add_scatter(
        "helix",
        data,
        shader="smoothCircle",
        colormap="Dark2",
        point_scale=5.0,
        categorical=True,
        has_legend=True,
        legend_labels=[(0, "Zero"), (1, "One")],
        selected_labels=["None", "Just a Small Test"],
    )

    f.add_tree("helixtree", {
        "from": [1, 5, 6, 7],
        "to": [2, 7, 8, 9]
    },
               point_helper="helix")

    f.plot("helix")

    with open("helix.faerun", "wb+") as handle:
        pickle.dump(f.create_python_data(),
                    handle,
                    protocol=pickle.HIGHEST_PROTOCOL)
예제 #9
0
def main():
    """ Main function """
    data = []
    time = []
    for path in PATHS:
        sample = fk.Sample(path)
        data.append(load_data(sample))
        time.append(load_time(sample))

    sources = []
    for i, e in enumerate(data):
        sources.extend([i] * len(e))

    data = np.concatenate(data, axis=0)
    time = np.concatenate(time, axis=0)

    d = len(data[0])

    # Initialize a new Annoy object and index it using 10 trees
    annoy = AnnoyIndex(d, metric="angular")
    for i, v in enumerate(data):
        annoy.add_item(i, v)
    annoy.build(10)

    # Create the k-nearest neighbor graph (k = 10)
    edge_list = []
    for i in range(len(data)):
        for j in annoy.get_nns_by_item(i, 10):
            edge_list.append((i, j, cosine_distance(data[i], data[j])))

    # Compute the layout from the edge list
    x, y, s, t, _ = tm.layout_from_edge_list(len(data), edge_list)

    legend_labels = [(0, "No Target Probe Negative Control"),
                     (1, "Stained Sample")]

    # Create the plot
    faerun = Faerun(
        view="front",
        coords=False,
        legend_title=
        "RNA Flow Cytometry: evaluation of detection sensitivity in low abundant intracellular RNA ",
    )
    faerun.add_scatter(
        "CYTO",
        {
            "x": x,
            "y": y,
            "c": sources,
            "labels": sources
        },
        point_scale=1.0,
        max_point_size=10,
        shader="smoothCircle",
        colormap="Set1",
        has_legend=True,
        categorical=True,
        legend_labels=legend_labels,
        legend_title="Cell Types",
    )
    faerun.add_tree("CYTO_tree", {
        "from": s,
        "to": t
    },
                    point_helper="CYTO",
                    color="#222222")

    faerun.plot("cyto")
예제 #10
0
    mqns = np.array(mqns)

    pca = PCA(n_components=3)
    result = pca.fit_transform(mqns)
    return result, mqns, smiles


coords, mqns, smiles = load()
smiles = [s + "__This is a Test" for s in smiles]

data = {"x": [], "y": [], "z": [], "c": [], "labels": smiles}

for i, e in enumerate(coords):
    data["x"].append(coords[i][0])
    data["y"].append(coords[i][1])
    data["z"].append(coords[i][2])
    data["c"].append(mqns[i][0])

df = pd.DataFrame.from_dict(data)

faerun = Faerun(view="free",
                clear_color="#222222",
                impress='made with <a href="#">faerun</a>')
faerun.add_scatter("drugbank",
                   df,
                   shader="sphere",
                   point_scale=5.0,
                   colormap="jet",
                   has_legend=True)
faerun.plot(template="smiles")
예제 #11
0
import numpy as np
import pandas as pd
from faerun import Faerun
import pickle

faerun = Faerun(view="free", clear_color="#222222")

t = np.linspace(0, 12.0, 326)
s = np.sin(np.pi * t)
c = np.cos(np.pi * t)
sizes = np.linspace(0.1, 2.0, 326)

data = {"x": t, "y": s, "z": c, "c": t / max(t) * 100.0, "s": sizes}

data2 = {"x": t, "y": c, "z": s, "c": t / max(t), "s": sizes, "labels": sizes}

x = np.linspace(0, 12.0, 326)
c = np.random.randint(0, 6, len(x))
data3 = {
    "x": x,
    "y": np.random.rand(len(x)) - 0.5,
    "z": np.random.rand(len(x)) - 0.5,
    "c": [c, x],
    "cs": np.random.rand(len(x)),
    "s": [np.random.rand(len(x)),
          np.random.rand(len(x))],
    "labels": c,
}

legend_labels = [(0, "A"), (1, "B"), (2, "C"), (3, "D"), (4, "E"), (5, "F")]
예제 #12
0
def main(da, dataset_path):
    """ Main function """

    # Initialize and configure tmap
    # dims = 2048
    # enc = tm.Minhash(int(da.emb_dim.count()), 42, dims)
    # lf = tm.LSHForest(dims * 4, 256, weighted=True)
    # enc = tm.Minhash()
    # lf = tm.LSHForest(weighted=True)

    labels = []
    image_labels = []

    # data = []

    # da = da.isel(scene_id=0).stack(dict(tile_id=('x', 'y')))
    da = da.sel(x=slice(-1000e3, None)).stack(
        dict(tile_id=("x", "y", "scene_id")))
    # sel(tile_id=da.tile_id.values[:100])

    # scene_ids = list(da.unstack().scene_id.values)

    x_ = []
    y_ = []
    z_ = []

    for tile_id in tqdm(da.tile_id.values):
        try:
            da_ = da.sel(tile_id=tile_id)
            i0, j0 = da_.i0.item(), da_.j0.item()
            try:
                scene_id = da_.scene_id.item()
            except:
                scene_id = tile_id[-1]
            # x_.append(tile_id[0])
            # y_.append(tile_id[1])

            fn = _get_tile_image_path(dataset_path=dataset_path,
                                      scene_id=scene_id,
                                      i0=i0,
                                      j0=j0)
            # img_str = base64.b64encode(open(fn, "rb").read())
            # "data:image/png;base64," + str(img_str).replace("b'", "").replace("'", "")

            # labels.append(scene_ids.index(scene_id))
            # c = da_.isel(pca_dim=3).values
            # c = 0
            # labels.append(c)

            image_labels.append("get_image?path=" + str(fn))

            # data.append(da_.values)

        except FileNotFoundError:
            continue

    # lf.batch_add(enc.batch_from_weight_array(data))
    # lf.index()

    # x, y, s, t, _ = tm.layout_from_lsh_forest(lf)

    # x = np.array(x_)
    # y = np.array(y_)
    # z = np.array(z_)

    x = da.isel(pca_dim=0).values
    y = da.isel(pca_dim=1).values
    z = da.isel(pca_dim=2).values
    c = da.isel(pca_dim=3).values

    # from sklearn.manifold import TSNE
    # X = da.values.T
    # X_embedded = TSNE(n_components=2,).fit_transform(X)
    # x, y = X[:,0], X[:,1]

    f = Faerun(
        clear_color="#111111",
        view="front",
        coords=True,
        x_title="pca dim 0",
        y_title="pca dim 1",
    )
    f.add_scatter(
        "EMB",
        {
            "x": x,
            "y": y,
            "z": z,
            "c": c,
            "labels": image_labels
        },
        colormap="jet",
        # shader="smoothCircle",
        shader="sphere",
        point_scale=5.0,
        max_point_size=10,
        has_legend=True,
        categorical=False,
    )
    # f.add_tree(
    # "EMB_tree", {"from": s, "to": t}, point_helper="EMB", color="#666666"
    # )

    with open("tmap.faerun", "wb+") as handle:
        pickle.dump(f.create_python_data(),
                    handle,
                    protocol=pickle.HIGHEST_PROTOCOL)
예제 #13
0
def main():
    """ The main function """
    df = pd.read_csv("papers.tar.xz")
    df.drop(df.tail(1).index, inplace=True)
    df["title"] = df["title"].apply(lambda t: t.replace("'", '"'))
    enc = tm.Minhash()
    lf = tm.LSHForest()

    ctr = Counter()
    texts = []
    for _, row in df.iterrows():
        text = re.sub(r"[^a-zA-Z-]+", " ", row["paper_text"])
        text = [t.lower() for t in text.split(" ") if len(t) > 2]
        ctr.update(text)
        texts.append(text)

    # Remove the top n words
    n = 6000
    ctr = ctr.most_common()[: -(len(ctr) - n) - 1 : -1]

    # Make it fast using a lookup map
    all_words = {}
    for i, (key, _) in enumerate(ctr):
        all_words[key] = i

    # Create the fingerprints and also check whether the word
    # "deep" is found in the document
    fingerprints = []
    has_word = []
    for text in texts:
        if "deep" in text:
            has_word.append(1)
        else:
            has_word.append(0)

        fingerprint = []
        for t in text:
            if t in all_words:
                fingerprint.append(all_words[t])
        fingerprints.append(tm.VectorUint(fingerprint))

    # Index the article fingerprints
    lf.batch_add(enc.batch_from_sparse_binary_array(fingerprints))
    lf.index()

    # Create the tmap
    config = tm.LayoutConfiguration()
    config.k = 100
    x, y, s, t, _ = tm.layout_from_lsh_forest(lf, config=config)

    faerun = Faerun(
        view="front", coords=False, legend_title="", legend_number_format="{:.0f}"
    )

    # Add a scatter that is bigger than the one above, to add colored
    # circles.
    faerun.add_scatter(
        "NIPS_word",
        {"x": x, "y": y, "c": has_word, "labels": df["title"]},
        colormap="Set1",
        point_scale=7.5,
        max_point_size=25,
        shader="smoothCircle",
        has_legend=True,
        categorical=True,
        legend_title="Contains word<br/>'deep'",
        legend_labels=[(0, "No"), (1, "Yes")],
        interactive=False,
    )

    # Add a scatter that is colored by year on top
    faerun.add_scatter(
        "NIPS",
        {"x": x, "y": y, "c": df["year"], "labels": df["title"]},
        colormap="gray",
        point_scale=5.0,
        max_point_size=20,
        shader="smoothCircle",
        has_legend=True,
        legend_title="Year of<br/>Publication",
    )

    faerun.add_tree(
        "NIPS_tree", {"from": s, "to": t}, point_helper="NIPS", color="#666666"
    )

    faerun.plot("nips_papers")
예제 #14
0
    mqns, smiles = get_fingerprint(drugbank)
    mqns = np.array(mqns)

    pca = PCA(n_components=3)
    result = pca.fit_transform(mqns)
    return result, mqns, smiles


coords, mqns, smiles = load()
smiles = [s + "__This is a Test" for s in smiles]

data = {"x": [], "y": [], "z": [], "c": [], "labels": smiles}

for i, e in enumerate(coords):
    data["x"].append(coords[i][0])
    data["y"].append(coords[i][1])
    data["z"].append(coords[i][2])
    data["c"].append(mqns[i][0])

df = pd.DataFrame.from_dict(data)

faerun = Faerun(view="free", clear_color="#222222")
faerun.add_scatter("drugbank",
                   df,
                   shader="sphere",
                   point_scale=5.0,
                   colormap="jet",
                   has_legend=True)
faerun.plot(template="smiles")
예제 #15
0
import numpy as np
import pandas as pd
from faerun import Faerun

faerun = Faerun(view='free', shader='circle')

t = np.linspace(0, 12.0, 326)
s = np.sin(np.pi * t)
c = np.cos(np.pi * t)

data = {'x': t, 'y': s, 'z': c, 'c': t / max(t)}

df = pd.DataFrame.from_dict(data)
faerun.plot(df)
예제 #16
0
def main():
    """ Main function """

    # Initialize and configure tmap
    dims = 1024
    enc = tm.Minhash(28 * 28, 42, dims)
    lf = tm.LSHForest(dims * 2, 128)

    print("Converting images ...")
    for image in IMAGES:
        img = Image.fromarray(np.uint8(np.split(np.array(image), 28)))
        buffered = BytesIO()
        img.save(buffered, format="JPEG")
        img_str = base64.b64encode(buffered.getvalue())
        IMAGE_LABELS.append("data:image/bmp;base64," +
                            str(img_str).replace("b'", "").replace("'", ""))
    tmp = []
    for _, image in enumerate(IMAGES):
        tmp.append(tm.VectorFloat(image / 255))

    print("Running tmap ...")
    start = timer()
    lf.batch_add(enc.batch_from_weight_array(tmp))
    lf.index()
    x, y, s, t, _ = tm.layout_from_lsh_forest(lf, CFG)
    print("tmap: " + str(timer() - start))

    legend_labels = [
        (0, "T-shirt/top"),
        (1, "Trouser"),
        (2, "Pullover"),
        (3, "Dress"),
        (4, "Coat"),
        (5, "Sandal"),
        (6, "Shirt"),
        (7, "Sneaker"),
        (8, "Bag"),
        (9, "Ankle boot"),
    ]

    faerun = Faerun(clear_color="#111111", view="front", coords=False)
    faerun.add_scatter(
        "FMNIST",
        {
            "x": x,
            "y": y,
            "c": LABELS,
            "labels": IMAGE_LABELS
        },
        colormap="tab10",
        shader="smoothCircle",
        point_scale=2.5,
        max_point_size=10,
        has_legend=True,
        categorical=True,
        legend_labels=legend_labels,
    )
    faerun.add_tree("FMNIST_tree", {
        "from": s,
        "to": t
    },
                    point_helper="FMNIST",
                    color="#666666")
    faerun.plot("fmnist", template="url_image")
예제 #17
0
def main():
    f = Faerun(title="faerun-example",
               clear_color="#222222",
               coords=False,
               view="free")

    x = np.linspace(0, 12.0, 326)
    y = np.sin(np.pi * x)
    z = np.cos(np.pi * x)
    c = np.random.randint(0, 2, len(x))

    labels = [""] * len(c)

    for i, e in enumerate(c):
        labels[i] = str(e) + "__" + str(i % 20)

    data = {"x": x, "y": y, "z": z, "c": c, "labels": labels}

    f.add_scatter(
        "helix",
        data,
        shader="sphere",
        colormap="Dark2",
        point_scale=5.0,
        categorical=True,
        has_legend=True,
        legend_labels=[(0, "Zero"), (1, "One")],
    )

    f.plot("helix")

    with open("helix.faerun", "wb+") as handle:
        pickle.dump(f.create_python_data(),
                    handle,
                    protocol=pickle.HIGHEST_PROTOCOL)

    def custom_label_formatter(label, index, name):
        return f"Example: {label} ({index}, {name})"

    def custom_link_formatter(label, index, name):
        return f"https://www.google.com/search?q={label}"

    info = (
        "#Welcome to Fearun",
        "This is a small Faerun example."
        "",
        "Yay markdown! This means that you can easily:",
        "- Add lists",
        "- Build tables",
        "- Insert images and links",
        "- Add code examples",
        "- ...",
    )

    host(
        "helix.faerun",
        label_type="default",
        title="Helix",
        theme="dark",
        label_formatter=custom_label_formatter,
        link_formatter=custom_link_formatter,
        info="\n".join(info),
    )
예제 #18
0

def load():
    drugbank = []
    with open('examples/drugbank.smi') as f:
        for line in f.readlines():
            drugbank.append(line.split()[0].strip())

    mqns, smiles = get_fingerprint(drugbank)
    mqns = np.array(mqns)

    pca = PCA(n_components=2)
    result = pca.fit_transform(mqns)
    return result, mqns, smiles


faerun = Faerun(view='front', shader='circle')

coords, mqns, smiles = load()

data = {
    'x': coords[:, 0],
    'y': coords[:, 1],
    'c': mqns[:, 22] / max(mqns[22]),
    'smiles': smiles
}

df = pd.DataFrame.from_dict(data)

faerun.plot(df, colormap='viridis')
예제 #19
0
def main():
    """ Main function """

    # Initialize and configure tmap
    dims = 2048
    enc = tm.Minhash(dims)
    lf = tm.LSHForest(dims, 128, store=True)

    fps = []
    # fps_umap = []
    for row in DATA:
        fps.append(tm.VectorUint(list(row)))

    lf.batch_add(enc.batch_from_sparse_binary_array(fps))
    lf.index()

    x_tmap, y_tmap, s, t, _ = tm.layout_from_lsh_forest(lf, CFG_TMAP)
    lf.clear()

    # Prepare custom color map
    tab10 = plt.get_cmap("tab10").colors
    colors_gray = [(0.2, 0.2, 0.2), tab10[0], tab10[1], tab10[2], tab10[3],
                   tab10[4]]
    custom_cm_gray = LinearSegmentedColormap.from_list("custom_cm_gray",
                                                       colors_gray,
                                                       N=len(colors_gray))

    legend_labels = [
        (1, "Rudyard Kipling"),
        (2, "Herbert George Wells"),
        (3, "Charles Darwin"),
        (4, "George Bernard Shaw"),
        (5, "William Wymark Jacobs"),
        (0, "Other"),
    ]

    faerun = Faerun(
        clear_color="#111111",
        view="front",
        coords=False,
        alpha_blending=True,
        legend_title="",
    )
    faerun.add_scatter(
        "gutenberg",
        {
            "x": x_tmap,
            "y": y_tmap,
            "c": LABELS,
            "labels": FAERUN_LABELS
        },
        colormap=custom_cm_gray,
        point_scale=4.2,
        max_point_size=10,
        has_legend=True,
        categorical=True,
        legend_title="Authors",
        legend_labels=legend_labels,
        shader="smoothCircle",
        selected_labels=["Author", "Title"],
    )
    faerun.add_tree(
        "gutenberg_tree",
        {
            "from": s,
            "to": t
        },
        point_helper="gutenberg",
        color="#222222",
    )
    faerun.plot("gutenberg", template="default")
예제 #20
0
def main():
    """ The main function """
    df = pd.read_csv("drugbank.csv").dropna(subset=["SMILES"]).reset_index(
        drop=True)
    enc = MHFPEncoder()
    lf = tm.LSHForest(2048, 128)

    fps = []
    labels = []
    groups = []
    tpsa = []
    logp = []
    mw = []
    h_acceptors = []
    h_donors = []
    ring_count = []
    is_lipinski = []
    has_coc = []
    has_sa = []
    has_tz = []

    substruct_coc = AllChem.MolFromSmiles("COC")
    substruct_sa = AllChem.MolFromSmiles("NS(=O)=O")
    substruct_tz = AllChem.MolFromSmiles("N1N=NN=C1")

    total = len(df)
    for i, row in df.iterrows():
        if i % 1000 == 0 and i > 0:
            print(f"{round(100 * (i / total))}% done ...")

        smiles = row[6]
        mol = AllChem.MolFromSmiles(smiles)

        if mol and mol.GetNumAtoms() > 5 and smiles.count(".") < 2:
            fps.append(tm.VectorUint(enc.encode_mol(mol, min_radius=0)))
            labels.append(
                f'{smiles}__<a href="https://www.drugbank.ca/drugs/{row[0]}" target="_blank">{row[0]}</a>__{row[1]}'
                .replace("'", ""))
            groups.append(row[3].split(";")[0])
            tpsa.append(Descriptors.TPSA(mol))
            logp.append(Descriptors.MolLogP(mol))
            mw.append(Descriptors.MolWt(mol))
            h_acceptors.append(Descriptors.NumHAcceptors(mol))
            h_donors.append(Descriptors.NumHDonors(mol))
            ring_count.append(Descriptors.RingCount(mol))
            is_lipinski.append(lipinski_pass(mol))
            has_coc.append(mol.HasSubstructMatch(substruct_coc))
            has_sa.append(mol.HasSubstructMatch(substruct_sa))
            has_tz.append(mol.HasSubstructMatch(substruct_tz))

    # Create the labels and the integer encoded array for the groups,
    # as they're categorical
    labels_groups, groups = Faerun.create_categories(groups)
    tpsa_ranked = ss.rankdata(np.array(tpsa) / max(tpsa)) / len(tpsa)
    logp_ranked = ss.rankdata(np.array(logp) / max(logp)) / len(logp)
    mw_ranked = ss.rankdata(np.array(mw) / max(mw)) / len(mw)
    h_acceptors_ranked = ss.rankdata(
        np.array(h_acceptors) / max(h_acceptors)) / len(h_acceptors)
    h_donors_ranked = ss.rankdata(
        np.array(h_donors) / max(h_donors)) / len(h_donors)
    ring_count_ranked = ss.rankdata(
        np.array(ring_count) / max(ring_count)) / len(ring_count)

    lf.batch_add(fps)
    lf.index()
    cfg = tm.LayoutConfiguration()
    cfg.k = 100
    # cfg.sl_extra_scaling_steps = 1
    cfg.sl_repeats = 2
    cfg.mmm_repeats = 2
    cfg.node_size = 2
    x, y, s, t, _ = tm.layout_from_lsh_forest(lf, config=cfg)

    # Define a colormap highlighting approved vs non-approved
    custom_cmap = ListedColormap(
        [
            "#2ecc71", "#9b59b6", "#ecf0f1", "#e74c3c", "#e67e22", "#f1c40f",
            "#95a5a6"
        ],
        name="custom",
    )

    bin_cmap = ListedColormap(["#e74c3c", "#2ecc71"], name="bin_cmap")

    f = Faerun(
        clear_color="#222222",
        coords=False,
        view="front",
        impress=
        'made with <a href="http://tmap.gdb.tools" target="_blank">tmap</a><br />and <a href="https://github.com/reymond-group/faerun-python" target="_blank">faerun</a><br /><a href="https://gist.github.com/daenuprobst/5cddd0159c0cf4758fb16b4b4acbef89">source</a>',
    )

    f.add_scatter(
        "Drugbank",
        {
            "x":
            x,
            "y":
            y,
            "c": [
                groups,
                is_lipinski,
                has_coc,
                has_sa,
                has_tz,
                tpsa_ranked,
                logp_ranked,
                mw_ranked,
                h_acceptors_ranked,
                h_donors_ranked,
                ring_count_ranked,
            ],
            "labels":
            labels,
        },
        shader="smoothCircle",
        colormap=[
            custom_cmap,
            bin_cmap,
            bin_cmap,
            bin_cmap,
            bin_cmap,
            "viridis",
            "viridis",
            "viridis",
            "viridis",
            "viridis",
            "viridis",
        ],
        point_scale=2.5,
        categorical=[
            True, True, True, True, True, False, False, False, False, False
        ],
        has_legend=True,
        legend_labels=[
            labels_groups,
            [(0, "No"), (1, "Yes")],
            [(0, "No"), (1, "Yes")],
            [(0, "No"), (1, "Yes")],
            [(0, "No"), (1, "Yes")],
        ],
        selected_labels=["SMILES", "Drugbank ID", "Name"],
        series_title=[
            "Group",
            "Lipinski",
            "Ethers",
            "Sulfonamides",
            "Tetrazoles",
            "TPSA",
            "logP",
            "Mol Weight",
            "H Acceptors",
            "H Donors",
            "Ring Count",
        ],
        max_legend_label=[
            None,
            None,
            None,
            None,
            None,
            str(round(max(tpsa))),
            str(round(max(logp))),
            str(round(max(mw))),
            str(round(max(h_acceptors))),
            str(round(max(h_donors))),
            str(round(max(ring_count))),
        ],
        min_legend_label=[
            None,
            None,
            None,
            None,
            None,
            str(round(min(tpsa))),
            str(round(min(logp))),
            str(round(min(mw))),
            str(round(min(h_acceptors))),
            str(round(min(h_donors))),
            str(round(min(ring_count))),
        ],
        title_index=2,
        legend_title="",
    )

    f.add_tree("drugbanktree", {"from": s, "to": t}, point_helper="Drugbank")

    f.plot("drugbank", template="smiles")
예제 #21
0
def main():
    """ Main function """

    dims = 512
    lf = tm.LSHForest(dims, 128, store=True)

    # Due to the large data size (> 1GB) the following files are not provided directly
    smiles, target_class, activity, chembl_id = pickle.load(
        open("chembl.pickle", "rb"))

    labels = []
    for i, s in enumerate(smiles):
        labels.append(
            s + "__" + chembl_id[i] + "__" +
            f'<a target="_blank" href="https://www.ebi.ac.uk/chembl/compound_report_card/{chembl_id[i]}">{chembl_id[i]}</a>'
        )

    lf.restore("chembl.dat")

    target_class_map = dict([(y, x + 1)
                             for x, y in enumerate(sorted(set(target_class)))])

    classes = [
        "enzyme",
        "kinase",
        "protease",
        "cytochrome p450",
        "ion channel",
        "transporter",
        "transcription factor",
        "membrane receptor",
        "epigenetic regulator",
    ]

    i = 0
    for key, value in target_class_map.items():
        if key not in classes:
            target_class_map[key] = 7
        else:
            target_class_map[key] = i
            i += 1
            if i == 7:
                i = 8

    cfg = tm.LayoutConfiguration()
    cfg.node_size = 1 / 70
    cfg.mmm_repeats = 2
    cfg.sl_repeats = 2

    start = timer()
    x, y, s, t, _ = tm.layout_from_lsh_forest(lf, cfg)
    end = timer()
    print(end - start)

    activity = np.array(activity)
    activity = np.maximum(0.0, activity)
    activity = np.minimum(100.0, activity)
    activity = 10.0 - activity

    legend_labels = [
        (0, "Cytochrome p450"),
        (1, "Other Enzyme"),
        (2, "Epigenetic Regulator"),
        (3, "Ion Channel"),
        (4, "Kinase"),
        (5, "Membrane Receptor"),
        (6, "Protease"),
        (8, "Transcription Factor"),
        (9, "Transporter"),
        (7, "Other"),
    ]

    vals = [int(target_class_map[x]) for x in target_class]

    faerun = Faerun(view="front", coords=False)
    faerun.add_scatter(
        "chembl",
        {
            "x": x,
            "y": y,
            "c": vals,
            "labels": labels
        },
        colormap="tab10",
        point_scale=1.0,
        max_point_size=10,
        has_legend=True,
        categorical=True,
        shader="smoothCircle",
        legend_labels=legend_labels,
        title_index=1,
    )
    faerun.add_tree("chembl_tree", {
        "from": s,
        "to": t
    },
                    point_helper="chembl",
                    color="#222222")

    faerun.plot("chembl", template="smiles")
예제 #22
0
    pca = PCA(n_components=3)
    result = pca.fit_transform(mqns)
    return result, mqns, smiles


coords, mqns, smiles = load()

data = {
    'x': np.random.normal(0.0, 6.0,
                          len(smiles) * 150),
    'y': np.random.normal(0.0, 6.0,
                          len(smiles) * 150),
    'z': np.random.normal(0.0, 6.0,
                          len(smiles) * 150),
    'c': np.random.normal(0.0, 6.0,
                          len(smiles) * 150),
    'smiles': smiles * 150
}

print(len(data['x']))
print(len(data['y']))
print(len(data['z']))
print(len(data['c']))
print(len(data['smiles']))

df = pd.DataFrame.from_dict(data)

faerun = Faerun(view='free', shader='sphere')
faerun.plot(df, colormap='viridis')