def export_figure(self, output_file): """ Requires a working *selenium* driver (https://www.selenium.dev/selenium/docs/api/py/). Related attributes are `selenium_webdriver`, `figure_export_width` and `figure_export_height`. """ export_kwargs = {} if self.selenium_webdriver is not None: export_kwargs['webdriver'] = self.selenium_webdriver() if self.figure_export_width is not None: export_kwargs['width'] = self.figure_export_width if self.figure_export_height is not None: export_kwargs['height'] = self.figure_export_height #self.set_export_status('figure', 'exporting...') try: doc = row(self.main_figure, self.colorbar_figure) from bokeh.io import export if output_file.endswith('.png'): export.export_png(doc, filename=output_file, **export_kwargs) elif output_file.endswith('.svg'): export.export_svg(doc, filename=output_file, **export_kwargs) else: raise NotImplementedError("format '{}' not supported".format( os.path.splitext(output_file)[1])) self.set_export_status('figure', 'done') except (KeyboardInterrupt, SystemExit): raise except: traceback.print_exc() self.set_export_status('figure', 'failed')
def export_figure(self, output_file): """ Requires a working *selenium* driver (https://www.selenium.dev/selenium/docs/api/py/). Related attributes are `selenium_webdriver`, `figure_export_width` and `figure_export_height`. """ export_kwargs = {} if self.selenium_webdriver is not None: try: from importlib import import_module options = import_module( self.selenium_webdriver.__module__[:-9] + "options") options = options.Options() options.headless = True webdriver = self.selenium_webdriver(options=options) except (ImportError, AttributeError): import selenium if self.selenium_webdriver in ( selenium.webdriver.Safari, selenium.webdriver.Edge, ): pass else: import warnings, traceback warnings.warn( "could not access the webdriver" "s options:\n" + traceback.format_exc(), ImportWarning, ) webdriver = self.selenium_driver() export_kwargs["webdriver"] = webdriver if self.figure_export_width is not None: export_kwargs["width"] = self.figure_export_width if self.figure_export_height is not None: export_kwargs["height"] = self.figure_export_height # self.set_export_status('figure', 'exporting...') try: doc = row(self.main_figure, self.colorbar_figure) from bokeh.io import export if output_file.endswith(".png"): export.export_png(doc, filename=output_file, **export_kwargs) elif output_file.endswith(".svg"): export.export_svg(doc, filename=output_file, **export_kwargs) else: raise NotImplementedError("format '{}' not supported".format( os.path.splitext(output_file)[1])) self.set_export_status("figure", "done") except (KeyboardInterrupt, SystemExit): raise except: traceback.print_exc() self.set_export_status("figure", "failed")
def draw_2d_chart(idx, x, cluster_output_path, k): driver = webdriver.Chrome(os.path.join(BASE_DIR, "chromedriver"), options=opts) tsne = TSNE(random_state=42) points = tsne.fit_transform(x) t_df = pd.DataFrame(points, index=range(len(x)), columns=["x", "y"]) t_df["cluster_no"] = idx colors = [colormap[x] for x in t_df["cluster_no"]] t_df["color"] = colors plot_data = ColumnDataSource(data=t_df.to_dict(orient="list")) p = figure( # title='TSNE Twitter BIO Embeddings', plot_width=1200, plot_height=1200, active_scroll="wheel_zoom", output_backend="svg", ) p.add_tools(HoverTool(tooltips="@title")) p.circle( source=plot_data, x="x", y="y", line_alpha=0.9, fill_alpha=0.9, size=8, # size="radius", fill_color="color", line_color="color", ) p.title.text_font_size = value("16pt") p.xaxis.visible = True p.yaxis.visible = True p.background_fill_color = None p.border_fill_color = None p.grid.grid_line_color = None p.outline_line_color = None # tsne_plot.grid.grid_line_color = None # tsne_plot.outline_line_color = None p.toolbar.logo = None p.toolbar_location = None export_svg( p, filename=os.path.join(cluster_output_path, f"cluster2d-{k}.svg"), webdriver=driver, ) export_png( p, filename=os.path.join(cluster_output_path, f"cluster2d-{k}.png"), webdriver=driver, )
def write_chart(output_file_dir, title, p, driver): output_dir = os.path.join(output_file_dir, title) if not os.path.isdir(output_dir): os.mkdir(output_dir) export_png( p, webdriver=driver, timeout=500, filename=os.path.join( output_dir, f"{title}.png", ), ) export_svg( p, webdriver=driver, timeout=500, filename=os.path.join( output_dir, f"{title}.svg", ), )
def write_cross_chart(df, cluster_output_path, k): height = 1600 width = 1600 driver = webdriver.Chrome(os.path.join(BASE_DIR, "chromedriver"), options=opts) x = df.c3.to_list() y = df.c1.to_list() clusters = df.cluster.to_list() plot = figure( # title='TSNE Twitter BIO Embeddings', plot_width=width, plot_height=height, active_scroll="wheel_zoom", # x_range=r, # y_range=r, output_backend="svg", ) plot.add_tools(HoverTool(tooltips="@title")) new_x = [] new_y = [] for coord in zip(x, y): x_coord, y_coord = coord x_rand = random.uniform(-(0.5**0.5), 0.5**0.5) y_rand_range = (0.5 - x_rand**2)**0.5 y_rand = random.uniform(-y_rand_range, y_rand_range) new_x.append(x_coord + x_rand) new_y.append(y_coord + y_rand) colors = [colormap[clusters[i]] for i in range(len(new_y))] source = ColumnDataSource(data={"x": new_x, "y": new_y, "color": colors}) plot.scatter( source=source, x="x", y="y", line_alpha=0.6, fill_alpha=0.6, size=10, color="color", ) # size # count_map = defaultdict(int) # for coord in zip(x, y): # count_map[coord] += 1 * 0.5 # source = ColumnDataSource( # data={ # "x": [k[0] for k in count_map.keys()], # "y": [k[1] for k in count_map.keys()], # "size": list(count_map.values()), # } # ) # plot.scatter( # source=source, # x="x", # y="y", # line_alpha=0.6, # fill_alpha=0.6, # size="size", # ) plot.yaxis.axis_label_text_font_size = "25pt" plot.yaxis.major_label_text_font_size = "25pt" plot.xaxis.axis_label_text_font_size = "25pt" plot.xaxis.major_label_text_font_size = "25pt" plot.title.text_font_size = value("32pt") plot.xaxis.visible = True # plot.xaxis.bounds = (0, 0) plot.yaxis.visible = True label_opts1 = dict( x_offset=0, y_offset=750, text_font_size="30px", ) msg1 = "C1" caption1 = Label(text=msg1, **label_opts1) label_opts2 = dict( x_offset=0, y_offset=-750, text_font_size="30px", ) msg2 = "-C1" caption2 = Label(text=msg2, **label_opts2) label_opts3 = dict( x_offset=750, y_offset=0, text_font_size="30px", ) msg3 = "C3" caption3 = Label(text=msg3, **label_opts3) label_opts4 = dict( x_offset=-750, y_offset=0, text_font_size="30px", ) msg4 = "-C3" caption4 = Label(text=msg4, **label_opts4) plot.add_layout(caption1, "center") plot.add_layout(caption2, "center") plot.add_layout(caption3, "center") plot.add_layout(caption4, "center") plot.background_fill_color = None plot.border_fill_color = None plot.grid.grid_line_color = None plot.outline_line_color = None plot.yaxis.fixed_location = 0 plot.xaxis.fixed_location = 0 plot.toolbar.logo = None plot.toolbar_location = None export_svg( plot, filename=os.path.join(cluster_output_path, f"cross-{k}.svg"), webdriver=driver, height=height, width=width, ) export_png( plot, filename=os.path.join(cluster_output_path, f"cross-{k}.png"), webdriver=driver, height=height, width=width, )
def draw_vectors(): driver = webdriver.Chrome(os.path.join(BASE_DIR, "chromedriver"), options=options) df = pd.read_csv(os.path.join(OUTPUTS_DIR, "normalized_future_vectors.csv")) filter_list = { (5, 6), (5, 7), (6, 7), (8, 9), (8, 10), (9, 10), (11, 12), (11, 13), (12, 13), } comb = list(combinations(range(5, len(df.columns)), 2)) comb = [c for c in comb if c not in filter_list] for idx, coord in enumerate(comb, 1): x, y = coord X = df[df.columns[x]].to_list() Y = df[df.columns[y]].to_list() tsne_df = pd.DataFrame(zip(X, Y), index=range(len(X)), columns=["x_coord", "y_coord"]) tsne_df["title"] = df["title"].to_list() tsne_df["cluster_no"] = df["cluster"].to_list() colormap = {3: "#ffee33", 2: "#00a152", 1: "#2979ff", 0: "#d500f9"} # colormap = {3: "#bdbdbd", 2: "#bdbdbd", 1: "#bdbdbd", 0: "#d500f9"} # colormap = {3: "#bdbdbd", 2: "#bdbdbd", 1: "#2979ff", 0: "#bdbdbd"} # colormap = {3: "#bdbdbd", 2: "#00a152", 1: "#bdbdbd", 0: "#bdbdbd"} # colormap = {3: "#ffee33", 2: "#bdbdbd", 1: "#bdbdbd", 0: "#bdbdbd"} only_one_cluster = pd.DataFrame(tsne_df.loc[tsne_df.cluster_no == 3]) colors = [colormap[x] for x in only_one_cluster["cluster_no"]] only_one_cluster["color"] = colors plot_data = ColumnDataSource(data=only_one_cluster.to_dict( orient="list")) plot = figure( # title='TSNE Twitter BIO Embeddings', plot_width=1600, plot_height=1600, active_scroll="wheel_zoom", output_backend="svg", x_range=(-1.1, 1.1), y_range=(-1.1, 1.1), ) plot.add_tools(HoverTool(tooltips="@title")) plot.circle( source=plot_data, x="x_coord", y="y_coord", line_alpha=0.6, fill_alpha=0.6, size=20, fill_color="color", line_color="color", ) plot.yaxis.axis_label_text_font_size = "25pt" plot.yaxis.major_label_text_font_size = "25pt" plot.xaxis.axis_label_text_font_size = "25pt" plot.xaxis.major_label_text_font_size = "25pt" start_x, end_x = df.columns[x].split("|") start_y, end_y = df.columns[y].split("|") start_x = start_x.strip() end_x = end_x.strip() start_y = start_y.strip() end_y = end_y.strip() plot.title.text_font_size = value("32pt") plot.xaxis.visible = True # plot.xaxis.bounds = (0, 0) plot.yaxis.visible = True label_opts1 = dict( x_offset=0, y_offset=750, text_font_size="30px", ) msg1 = end_y caption1 = Label(text=msg1, **label_opts1) label_opts2 = dict( x_offset=0, y_offset=-750, text_font_size="30px", ) msg2 = start_y caption2 = Label(text=msg2, **label_opts2) label_opts3 = dict( x_offset=600, y_offset=0, text_font_size="30px", ) msg3 = end_x caption3 = Label(text=msg3, **label_opts3) label_opts4 = dict( x_offset=-750, y_offset=0, text_font_size="30px", ) msg4 = start_x caption4 = Label(text=msg4, **label_opts4) plot.add_layout(caption1, "center") plot.add_layout(caption2, "center") plot.add_layout(caption3, "center") plot.add_layout(caption4, "center") plot.background_fill_color = None plot.border_fill_color = None plot.grid.grid_line_color = None plot.outline_line_color = None plot.yaxis.fixed_location = 0 plot.xaxis.fixed_location = 0 plot.toolbar.logo = None plot.toolbar_location = None print(idx) export_svg( plot, filename=f"svgs/{idx}.svg", webdriver=driver, height=1600, width=1600, ) export_png( plot, filename=f"pngs/{idx}.png", webdriver=driver, height=1600, width=1600, )
def draw_chart(df): driver = webdriver.Chrome(os.path.join(BASE_DIR, "chromedriver")) X = df["wv"].to_list() y = df["cluster"].to_list() tsne_filepath = "tsne3000.pkl" if not os.path.exists(tsne_filepath): tsne = TSNE(random_state=42) tsne_points = tsne.fit_transform(X) with open(tsne_filepath, "wb+") as f: pickle.dump(tsne_points, f) else: # Cache Hits! with open(tsne_filepath, "rb") as f: tsne_points = pickle.load(f) tsne_df = pd.DataFrame( tsne_points, index=range(len(X)), columns=["x_coord", "y_coord"] ) tsne_df["title"] = df["title"].to_list() tsne_df["tokens_len"] = df["tokens_len"].to_list() tsne_df["cluster_no"] = y colormap = {0: "#ffee33", 1: "#00a152", 2: "#2979ff", 3: "#d500f9"} colors = [colormap[x] for x in tsne_df["cluster_no"]] tsne_df["color"] = colors normalized = min_max_normalize(tsne_df.tokens_len.to_list()) tsne_df["radius"] = [5 + x * 10 for x in normalized] print(tsne_df.to_dict(orient="list")) plot_data = ColumnDataSource(data=tsne_df.to_dict(orient="list")) print(plot_data) tsne_plot = figure( # title='TSNE Twitter BIO Embeddings', plot_width=1200, plot_height=1200, active_scroll="wheel_zoom", output_backend="svg", ) tsne_plot.add_tools(HoverTool(tooltips="@title")) tsne_plot.circle( source=plot_data, x="x_coord", y="y_coord", line_alpha=0.6, fill_alpha=0.6, size="radius", fill_color="color", line_color="color", ) tsne_plot.title.text_font_size = value("16pt") tsne_plot.xaxis.visible = True tsne_plot.yaxis.visible = True tsne_plot.background_fill_color = None tsne_plot.border_fill_color = None tsne_plot.grid.grid_line_color = None tsne_plot.outline_line_color = None # tsne_plot.grid.grid_line_color = None # tsne_plot.outline_line_color = None show(tsne_plot) tsne_plot.toolbar.logo = None tsne_plot.toolbar_location = None export_svg( tsne_plot, filename=f"cluster.svg", webdriver=driver, ) export_png( tsne_plot, filename=f"cluster.png", webdriver=driver, )
def calculate_cluster_number(): driver = webdriver.Chrome( os.path.join(BASE_DIR, "chromedriver"), options=opts ) df = pd.read_csv(os.path.join(H_IN_DIRS, "happiness.csv")) x = [] for row in df.iterrows(): idx, r = row temp = [] for c in df.columns[1:-1]: temp.append(r[c]) x.append(temp) ok = 0 kmax = 10 maximum = 0 for k in range(2, kmax + 1): kmeans = KMeans(n_clusters=k) fit = kmeans.fit(x) labels = fit.labels_ score = silhouette_score(x, labels, metric="euclidean") idx = kmeans.fit_predict(x) tsne = TSNE(random_state=42) points = tsne.fit_transform(x) t_df = pd.DataFrame(points, index=range(len(x)), columns=["x", "y"]) t_df["cluster_no"] = idx colormap = { 0: "#f44336", 1: "#673ab7", 2: "#9c27b0", 3: "#e91e63", 4: "#3f51b5", 5: "#2196f3", 6: "#03a9f4", 7: "#00bcd4", 8: "#009688", 9: "#cddc39", } colors = [colormap[x] for x in t_df["cluster_no"]] t_df["color"] = colors plot_data = ColumnDataSource(data=t_df.to_dict(orient="list")) p = figure( # title='TSNE Twitter BIO Embeddings', plot_width=1200, plot_height=1200, active_scroll="wheel_zoom", output_backend="svg", ) p.add_tools(HoverTool(tooltips="@title")) p.circle( source=plot_data, x="x", y="y", line_alpha=0.9, fill_alpha=0.9, # size="radius", fill_color="color", line_color="color", ) p.title.text_font_size = value("16pt") p.xaxis.visible = True p.yaxis.visible = True p.background_fill_color = None p.border_fill_color = None p.grid.grid_line_color = None p.outline_line_color = None # tsne_plot.grid.grid_line_color = None # tsne_plot.outline_line_color = None p.toolbar.logo = None p.toolbar_location = None export_svg( p, filename=f"cluster-number{k}.svg", webdriver=driver, ) export_png( p, filename=f"cluster-number{k}.png", webdriver=driver, ) if score > maximum: maximum = score ok = k print(ok)
index_cmap = factor_cmap('cyl_mfr', palette=Spectral5, factors=sorted(df.cyl.unique()), end=1) p = figure(width=800, height=300, title="Mean MPG by # Cylinders and Manufacturer", x_range=group, toolbar_location=None, tooltips=[("MPG", "@mpg_mean"), ("Cyl, Mfr", "@cyl_mfr")]) p.vbar( x='cyl_mfr', top='mpg_mean', width=1, source=group, line_color="white", fill_color=index_cmap, ) p.y_range.start = 0 p.x_range.range_padding = 0.05 p.xgrid.grid_line_color = None p.xaxis.axis_label = "Manufacturer grouped by # Cylinders" p.xaxis.major_label_orientation = 1.2 p.outline_line_color = None export_svg(p, filename="plot.svg")
def export_as_svg(self, filename): p = self._tabs.tabs[self._tabs.active].child self._change_backend_to_svg(p) export_svg(p, filename=filename)