예제 #1
0
    def export_figure(self, output_file):
        """
        Requires a working *selenium* driver (https://www.selenium.dev/selenium/docs/api/py/).

        Related attributes are `selenium_webdriver`, `figure_export_width` and `figure_export_height`.
        """
        export_kwargs = {}
        if self.selenium_webdriver is not None:
            export_kwargs['webdriver'] = self.selenium_webdriver()
        if self.figure_export_width is not None:
            export_kwargs['width'] = self.figure_export_width
        if self.figure_export_height is not None:
            export_kwargs['height'] = self.figure_export_height
        #self.set_export_status('figure', 'exporting...')
        try:
            doc = row(self.main_figure, self.colorbar_figure)
            from bokeh.io import export
            if output_file.endswith('.png'):
                export.export_png(doc, filename=output_file, **export_kwargs)
            elif output_file.endswith('.svg'):
                export.export_svg(doc, filename=output_file, **export_kwargs)
            else:
                raise NotImplementedError("format '{}' not supported".format(
                    os.path.splitext(output_file)[1]))
            self.set_export_status('figure', 'done')
        except (KeyboardInterrupt, SystemExit):
            raise
        except:
            traceback.print_exc()
            self.set_export_status('figure', 'failed')
예제 #2
0
    def export_figure(self, output_file):
        """
        Requires a working *selenium* driver (https://www.selenium.dev/selenium/docs/api/py/).

        Related attributes are `selenium_webdriver`, `figure_export_width` and `figure_export_height`.
        """
        export_kwargs = {}
        if self.selenium_webdriver is not None:
            try:
                from importlib import import_module

                options = import_module(
                    self.selenium_webdriver.__module__[:-9] + "options")
                options = options.Options()
                options.headless = True
                webdriver = self.selenium_webdriver(options=options)
            except (ImportError, AttributeError):
                import selenium

                if self.selenium_webdriver in (
                        selenium.webdriver.Safari,
                        selenium.webdriver.Edge,
                ):
                    pass
                else:
                    import warnings, traceback

                    warnings.warn(
                        "could not access the webdriver"
                        "s options:\n" + traceback.format_exc(),
                        ImportWarning,
                    )
                webdriver = self.selenium_driver()
            export_kwargs["webdriver"] = webdriver
        if self.figure_export_width is not None:
            export_kwargs["width"] = self.figure_export_width
        if self.figure_export_height is not None:
            export_kwargs["height"] = self.figure_export_height
        # self.set_export_status('figure', 'exporting...')
        try:
            doc = row(self.main_figure, self.colorbar_figure)
            from bokeh.io import export

            if output_file.endswith(".png"):
                export.export_png(doc, filename=output_file, **export_kwargs)
            elif output_file.endswith(".svg"):
                export.export_svg(doc, filename=output_file, **export_kwargs)
            else:
                raise NotImplementedError("format '{}' not supported".format(
                    os.path.splitext(output_file)[1]))
            self.set_export_status("figure", "done")
        except (KeyboardInterrupt, SystemExit):
            raise
        except:
            traceback.print_exc()
            self.set_export_status("figure", "failed")
예제 #3
0
def draw_2d_chart(idx, x, cluster_output_path, k):
    driver = webdriver.Chrome(os.path.join(BASE_DIR, "chromedriver"),
                              options=opts)
    tsne = TSNE(random_state=42)
    points = tsne.fit_transform(x)
    t_df = pd.DataFrame(points, index=range(len(x)), columns=["x", "y"])
    t_df["cluster_no"] = idx
    colors = [colormap[x] for x in t_df["cluster_no"]]
    t_df["color"] = colors
    plot_data = ColumnDataSource(data=t_df.to_dict(orient="list"))
    p = figure(
        # title='TSNE Twitter BIO Embeddings',
        plot_width=1200,
        plot_height=1200,
        active_scroll="wheel_zoom",
        output_backend="svg",
    )
    p.add_tools(HoverTool(tooltips="@title"))
    p.circle(
        source=plot_data,
        x="x",
        y="y",
        line_alpha=0.9,
        fill_alpha=0.9,
        size=8,
        # size="radius",
        fill_color="color",
        line_color="color",
    )
    p.title.text_font_size = value("16pt")
    p.xaxis.visible = True
    p.yaxis.visible = True
    p.background_fill_color = None
    p.border_fill_color = None
    p.grid.grid_line_color = None
    p.outline_line_color = None
    # tsne_plot.grid.grid_line_color = None
    # tsne_plot.outline_line_color = None
    p.toolbar.logo = None
    p.toolbar_location = None

    export_svg(
        p,
        filename=os.path.join(cluster_output_path, f"cluster2d-{k}.svg"),
        webdriver=driver,
    )
    export_png(
        p,
        filename=os.path.join(cluster_output_path, f"cluster2d-{k}.png"),
        webdriver=driver,
    )
예제 #4
0
def write_chart(output_file_dir, title, p, driver):
    output_dir = os.path.join(output_file_dir, title)
    if not os.path.isdir(output_dir):
        os.mkdir(output_dir)
    export_png(
        p,
        webdriver=driver,
        timeout=500,
        filename=os.path.join(
            output_dir,
            f"{title}.png",
        ),
    )
    export_svg(
        p,
        webdriver=driver,
        timeout=500,
        filename=os.path.join(
            output_dir,
            f"{title}.svg",
        ),
    )
예제 #5
0
def write_cross_chart(df, cluster_output_path, k):
    height = 1600
    width = 1600
    driver = webdriver.Chrome(os.path.join(BASE_DIR, "chromedriver"),
                              options=opts)
    x = df.c3.to_list()
    y = df.c1.to_list()
    clusters = df.cluster.to_list()
    plot = figure(
        # title='TSNE Twitter BIO Embeddings',
        plot_width=width,
        plot_height=height,
        active_scroll="wheel_zoom",
        # x_range=r,
        # y_range=r,
        output_backend="svg",
    )
    plot.add_tools(HoverTool(tooltips="@title"))
    new_x = []
    new_y = []
    for coord in zip(x, y):
        x_coord, y_coord = coord
        x_rand = random.uniform(-(0.5**0.5), 0.5**0.5)
        y_rand_range = (0.5 - x_rand**2)**0.5
        y_rand = random.uniform(-y_rand_range, y_rand_range)
        new_x.append(x_coord + x_rand)
        new_y.append(y_coord + y_rand)
    colors = [colormap[clusters[i]] for i in range(len(new_y))]
    source = ColumnDataSource(data={"x": new_x, "y": new_y, "color": colors})
    plot.scatter(
        source=source,
        x="x",
        y="y",
        line_alpha=0.6,
        fill_alpha=0.6,
        size=10,
        color="color",
    )

    # size
    # count_map = defaultdict(int)
    # for coord in zip(x, y):
    #     count_map[coord] += 1 * 0.5
    # source = ColumnDataSource(
    #     data={
    #         "x": [k[0] for k in count_map.keys()],
    #         "y": [k[1] for k in count_map.keys()],
    #         "size": list(count_map.values()),
    #     }
    # )
    # plot.scatter(
    #     source=source,
    #     x="x",
    #     y="y",
    #     line_alpha=0.6,
    #     fill_alpha=0.6,
    #     size="size",
    # )

    plot.yaxis.axis_label_text_font_size = "25pt"
    plot.yaxis.major_label_text_font_size = "25pt"
    plot.xaxis.axis_label_text_font_size = "25pt"
    plot.xaxis.major_label_text_font_size = "25pt"
    plot.title.text_font_size = value("32pt")
    plot.xaxis.visible = True
    # plot.xaxis.bounds = (0, 0)
    plot.yaxis.visible = True
    label_opts1 = dict(
        x_offset=0,
        y_offset=750,
        text_font_size="30px",
    )
    msg1 = "C1"
    caption1 = Label(text=msg1, **label_opts1)
    label_opts2 = dict(
        x_offset=0,
        y_offset=-750,
        text_font_size="30px",
    )
    msg2 = "-C1"
    caption2 = Label(text=msg2, **label_opts2)
    label_opts3 = dict(
        x_offset=750,
        y_offset=0,
        text_font_size="30px",
    )
    msg3 = "C3"
    caption3 = Label(text=msg3, **label_opts3)
    label_opts4 = dict(
        x_offset=-750,
        y_offset=0,
        text_font_size="30px",
    )
    msg4 = "-C3"
    caption4 = Label(text=msg4, **label_opts4)
    plot.add_layout(caption1, "center")
    plot.add_layout(caption2, "center")
    plot.add_layout(caption3, "center")
    plot.add_layout(caption4, "center")
    plot.background_fill_color = None
    plot.border_fill_color = None
    plot.grid.grid_line_color = None
    plot.outline_line_color = None
    plot.yaxis.fixed_location = 0
    plot.xaxis.fixed_location = 0
    plot.toolbar.logo = None
    plot.toolbar_location = None
    export_svg(
        plot,
        filename=os.path.join(cluster_output_path, f"cross-{k}.svg"),
        webdriver=driver,
        height=height,
        width=width,
    )
    export_png(
        plot,
        filename=os.path.join(cluster_output_path, f"cross-{k}.png"),
        webdriver=driver,
        height=height,
        width=width,
    )
예제 #6
0
def draw_vectors():
    driver = webdriver.Chrome(os.path.join(BASE_DIR, "chromedriver"),
                              options=options)

    df = pd.read_csv(os.path.join(OUTPUTS_DIR,
                                  "normalized_future_vectors.csv"))
    filter_list = {
        (5, 6),
        (5, 7),
        (6, 7),
        (8, 9),
        (8, 10),
        (9, 10),
        (11, 12),
        (11, 13),
        (12, 13),
    }
    comb = list(combinations(range(5, len(df.columns)), 2))
    comb = [c for c in comb if c not in filter_list]
    for idx, coord in enumerate(comb, 1):
        x, y = coord
        X = df[df.columns[x]].to_list()
        Y = df[df.columns[y]].to_list()
        tsne_df = pd.DataFrame(zip(X, Y),
                               index=range(len(X)),
                               columns=["x_coord", "y_coord"])
        tsne_df["title"] = df["title"].to_list()
        tsne_df["cluster_no"] = df["cluster"].to_list()
        colormap = {3: "#ffee33", 2: "#00a152", 1: "#2979ff", 0: "#d500f9"}
        # colormap = {3: "#bdbdbd", 2: "#bdbdbd", 1: "#bdbdbd", 0: "#d500f9"}
        # colormap = {3: "#bdbdbd", 2: "#bdbdbd", 1: "#2979ff", 0: "#bdbdbd"}
        # colormap = {3: "#bdbdbd", 2: "#00a152", 1: "#bdbdbd", 0: "#bdbdbd"}
        # colormap = {3: "#ffee33", 2: "#bdbdbd", 1: "#bdbdbd", 0: "#bdbdbd"}
        only_one_cluster = pd.DataFrame(tsne_df.loc[tsne_df.cluster_no == 3])
        colors = [colormap[x] for x in only_one_cluster["cluster_no"]]

        only_one_cluster["color"] = colors
        plot_data = ColumnDataSource(data=only_one_cluster.to_dict(
            orient="list"))
        plot = figure(
            # title='TSNE Twitter BIO Embeddings',
            plot_width=1600,
            plot_height=1600,
            active_scroll="wheel_zoom",
            output_backend="svg",
            x_range=(-1.1, 1.1),
            y_range=(-1.1, 1.1),
        )
        plot.add_tools(HoverTool(tooltips="@title"))
        plot.circle(
            source=plot_data,
            x="x_coord",
            y="y_coord",
            line_alpha=0.6,
            fill_alpha=0.6,
            size=20,
            fill_color="color",
            line_color="color",
        )
        plot.yaxis.axis_label_text_font_size = "25pt"
        plot.yaxis.major_label_text_font_size = "25pt"
        plot.xaxis.axis_label_text_font_size = "25pt"
        plot.xaxis.major_label_text_font_size = "25pt"
        start_x, end_x = df.columns[x].split("|")
        start_y, end_y = df.columns[y].split("|")
        start_x = start_x.strip()
        end_x = end_x.strip()
        start_y = start_y.strip()
        end_y = end_y.strip()
        plot.title.text_font_size = value("32pt")
        plot.xaxis.visible = True
        # plot.xaxis.bounds = (0, 0)
        plot.yaxis.visible = True
        label_opts1 = dict(
            x_offset=0,
            y_offset=750,
            text_font_size="30px",
        )
        msg1 = end_y
        caption1 = Label(text=msg1, **label_opts1)
        label_opts2 = dict(
            x_offset=0,
            y_offset=-750,
            text_font_size="30px",
        )
        msg2 = start_y
        caption2 = Label(text=msg2, **label_opts2)
        label_opts3 = dict(
            x_offset=600,
            y_offset=0,
            text_font_size="30px",
        )
        msg3 = end_x
        caption3 = Label(text=msg3, **label_opts3)
        label_opts4 = dict(
            x_offset=-750,
            y_offset=0,
            text_font_size="30px",
        )
        msg4 = start_x
        caption4 = Label(text=msg4, **label_opts4)
        plot.add_layout(caption1, "center")
        plot.add_layout(caption2, "center")
        plot.add_layout(caption3, "center")
        plot.add_layout(caption4, "center")
        plot.background_fill_color = None
        plot.border_fill_color = None
        plot.grid.grid_line_color = None
        plot.outline_line_color = None
        plot.yaxis.fixed_location = 0
        plot.xaxis.fixed_location = 0
        plot.toolbar.logo = None
        plot.toolbar_location = None
        print(idx)
        export_svg(
            plot,
            filename=f"svgs/{idx}.svg",
            webdriver=driver,
            height=1600,
            width=1600,
        )
        export_png(
            plot,
            filename=f"pngs/{idx}.png",
            webdriver=driver,
            height=1600,
            width=1600,
        )
예제 #7
0
def draw_chart(df):
    driver = webdriver.Chrome(os.path.join(BASE_DIR, "chromedriver"))
    X = df["wv"].to_list()
    y = df["cluster"].to_list()
    tsne_filepath = "tsne3000.pkl"

    if not os.path.exists(tsne_filepath):
        tsne = TSNE(random_state=42)
        tsne_points = tsne.fit_transform(X)
        with open(tsne_filepath, "wb+") as f:
            pickle.dump(tsne_points, f)
    else:  # Cache Hits!
        with open(tsne_filepath, "rb") as f:
            tsne_points = pickle.load(f)
    tsne_df = pd.DataFrame(
        tsne_points, index=range(len(X)), columns=["x_coord", "y_coord"]
    )

    tsne_df["title"] = df["title"].to_list()
    tsne_df["tokens_len"] = df["tokens_len"].to_list()
    tsne_df["cluster_no"] = y
    colormap = {0: "#ffee33", 1: "#00a152", 2: "#2979ff", 3: "#d500f9"}
    colors = [colormap[x] for x in tsne_df["cluster_no"]]
    tsne_df["color"] = colors
    normalized = min_max_normalize(tsne_df.tokens_len.to_list())
    tsne_df["radius"] = [5 + x * 10 for x in normalized]
    print(tsne_df.to_dict(orient="list"))
    plot_data = ColumnDataSource(data=tsne_df.to_dict(orient="list"))
    print(plot_data)
    tsne_plot = figure(
        # title='TSNE Twitter BIO Embeddings',
        plot_width=1200,
        plot_height=1200,
        active_scroll="wheel_zoom",
        output_backend="svg",
    )
    tsne_plot.add_tools(HoverTool(tooltips="@title"))
    tsne_plot.circle(
        source=plot_data,
        x="x_coord",
        y="y_coord",
        line_alpha=0.6,
        fill_alpha=0.6,
        size="radius",
        fill_color="color",
        line_color="color",
    )
    tsne_plot.title.text_font_size = value("16pt")
    tsne_plot.xaxis.visible = True
    tsne_plot.yaxis.visible = True
    tsne_plot.background_fill_color = None
    tsne_plot.border_fill_color = None
    tsne_plot.grid.grid_line_color = None
    tsne_plot.outline_line_color = None
    # tsne_plot.grid.grid_line_color = None
    # tsne_plot.outline_line_color = None
    show(tsne_plot)
    tsne_plot.toolbar.logo = None
    tsne_plot.toolbar_location = None
    export_svg(
        tsne_plot, filename=f"cluster.svg", webdriver=driver,
    )
    export_png(
        tsne_plot, filename=f"cluster.png", webdriver=driver,
    )
예제 #8
0
def calculate_cluster_number():
    driver = webdriver.Chrome(
        os.path.join(BASE_DIR, "chromedriver"), options=opts
    )
    df = pd.read_csv(os.path.join(H_IN_DIRS, "happiness.csv"))
    x = []

    for row in df.iterrows():
        idx, r = row
        temp = []
        for c in df.columns[1:-1]:
            temp.append(r[c])
        x.append(temp)

    ok = 0
    kmax = 10
    maximum = 0
    for k in range(2, kmax + 1):
        kmeans = KMeans(n_clusters=k)
        fit = kmeans.fit(x)
        labels = fit.labels_
        score = silhouette_score(x, labels, metric="euclidean")
        idx = kmeans.fit_predict(x)
        tsne = TSNE(random_state=42)
        points = tsne.fit_transform(x)
        t_df = pd.DataFrame(points, index=range(len(x)), columns=["x", "y"])
        t_df["cluster_no"] = idx
        colormap = {
            0: "#f44336",
            1: "#673ab7",
            2: "#9c27b0",
            3: "#e91e63",
            4: "#3f51b5",
            5: "#2196f3",
            6: "#03a9f4",
            7: "#00bcd4",
            8: "#009688",
            9: "#cddc39",
        }
        colors = [colormap[x] for x in t_df["cluster_no"]]
        t_df["color"] = colors
        plot_data = ColumnDataSource(data=t_df.to_dict(orient="list"))
        p = figure(
            # title='TSNE Twitter BIO Embeddings',
            plot_width=1200,
            plot_height=1200,
            active_scroll="wheel_zoom",
            output_backend="svg",
        )
        p.add_tools(HoverTool(tooltips="@title"))
        p.circle(
            source=plot_data,
            x="x",
            y="y",
            line_alpha=0.9,
            fill_alpha=0.9,
            # size="radius",
            fill_color="color",
            line_color="color",
        )
        p.title.text_font_size = value("16pt")
        p.xaxis.visible = True
        p.yaxis.visible = True
        p.background_fill_color = None
        p.border_fill_color = None
        p.grid.grid_line_color = None
        p.outline_line_color = None
        # tsne_plot.grid.grid_line_color = None
        # tsne_plot.outline_line_color = None
        p.toolbar.logo = None
        p.toolbar_location = None
        export_svg(
            p, filename=f"cluster-number{k}.svg", webdriver=driver,
        )
        export_png(
            p, filename=f"cluster-number{k}.png", webdriver=driver,
        )
        if score > maximum:
            maximum = score
            ok = k
    print(ok)
예제 #9
0
index_cmap = factor_cmap('cyl_mfr',
                         palette=Spectral5,
                         factors=sorted(df.cyl.unique()),
                         end=1)

p = figure(width=800,
           height=300,
           title="Mean MPG by # Cylinders and Manufacturer",
           x_range=group,
           toolbar_location=None,
           tooltips=[("MPG", "@mpg_mean"), ("Cyl, Mfr", "@cyl_mfr")])

p.vbar(
    x='cyl_mfr',
    top='mpg_mean',
    width=1,
    source=group,
    line_color="white",
    fill_color=index_cmap,
)

p.y_range.start = 0
p.x_range.range_padding = 0.05
p.xgrid.grid_line_color = None
p.xaxis.axis_label = "Manufacturer grouped by # Cylinders"
p.xaxis.major_label_orientation = 1.2
p.outline_line_color = None

export_svg(p, filename="plot.svg")
예제 #10
0
 def export_as_svg(self, filename):
     p = self._tabs.tabs[self._tabs.active].child
     self._change_backend_to_svg(p)
     export_svg(p, filename=filename)