def export_figure(self, output_file): """ Requires a working *selenium* driver (https://www.selenium.dev/selenium/docs/api/py/). Related attributes are `selenium_webdriver`, `figure_export_width` and `figure_export_height`. """ export_kwargs = {} if self.selenium_webdriver is not None: export_kwargs['webdriver'] = self.selenium_webdriver() if self.figure_export_width is not None: export_kwargs['width'] = self.figure_export_width if self.figure_export_height is not None: export_kwargs['height'] = self.figure_export_height #self.set_export_status('figure', 'exporting...') try: doc = row(self.main_figure, self.colorbar_figure) from bokeh.io import export if output_file.endswith('.png'): export.export_png(doc, filename=output_file, **export_kwargs) elif output_file.endswith('.svg'): export.export_svg(doc, filename=output_file, **export_kwargs) else: raise NotImplementedError("format '{}' not supported".format( os.path.splitext(output_file)[1])) self.set_export_status('figure', 'done') except (KeyboardInterrupt, SystemExit): raise except: traceback.print_exc() self.set_export_status('figure', 'failed')
def save(self, format='html', file_name='Parallel-Coordinates', path=None) -> None: """ This function allows to save the plot in a specific format. :param format: The format that we want for our file :param file_name: The corresponding name of the file :param path: The path where we want to store the plot, as default we assign the actual directory and create new one in there; 'results' :return: """ if path is None: path = self.my_path() valid_format = ['html', 'png', 'svg', 'all'] if format not in valid_format: raise Exception('The format is incorrect') if format == 'html' or format == 'all': file_name = self.file_name_with_ext_and_path( file_name, 'html', path) save(self.parallel_plot, filename=file_name) if format == 'png' or format == 'all': file_name = self.file_name_with_ext_and_path( file_name, 'png', path) be.export_png(self.parallel_plot, filename=file_name) if format == 'svg' or format == 'all': file_name = self.file_name_with_ext_and_path( file_name, 'svg', path) self.parallel_plot.output_backend = "svg" be.export_svgs(self.parallel_plot, filename=file_name)
def export_figure(self, output_file): """ Requires a working *selenium* driver (https://www.selenium.dev/selenium/docs/api/py/). Related attributes are `selenium_webdriver`, `figure_export_width` and `figure_export_height`. """ export_kwargs = {} if self.selenium_webdriver is not None: try: from importlib import import_module options = import_module( self.selenium_webdriver.__module__[:-9] + "options") options = options.Options() options.headless = True webdriver = self.selenium_webdriver(options=options) except (ImportError, AttributeError): import selenium if self.selenium_webdriver in ( selenium.webdriver.Safari, selenium.webdriver.Edge, ): pass else: import warnings, traceback warnings.warn( "could not access the webdriver" "s options:\n" + traceback.format_exc(), ImportWarning, ) webdriver = self.selenium_driver() export_kwargs["webdriver"] = webdriver if self.figure_export_width is not None: export_kwargs["width"] = self.figure_export_width if self.figure_export_height is not None: export_kwargs["height"] = self.figure_export_height # self.set_export_status('figure', 'exporting...') try: doc = row(self.main_figure, self.colorbar_figure) from bokeh.io import export if output_file.endswith(".png"): export.export_png(doc, filename=output_file, **export_kwargs) elif output_file.endswith(".svg"): export.export_svg(doc, filename=output_file, **export_kwargs) else: raise NotImplementedError("format '{}' not supported".format( os.path.splitext(output_file)[1])) self.set_export_status("figure", "done") except (KeyboardInterrupt, SystemExit): raise except: traceback.print_exc() self.set_export_status("figure", "failed")
def draw_2d_chart(idx, x, cluster_output_path, k): driver = webdriver.Chrome(os.path.join(BASE_DIR, "chromedriver"), options=opts) tsne = TSNE(random_state=42) points = tsne.fit_transform(x) t_df = pd.DataFrame(points, index=range(len(x)), columns=["x", "y"]) t_df["cluster_no"] = idx colors = [colormap[x] for x in t_df["cluster_no"]] t_df["color"] = colors plot_data = ColumnDataSource(data=t_df.to_dict(orient="list")) p = figure( # title='TSNE Twitter BIO Embeddings', plot_width=1200, plot_height=1200, active_scroll="wheel_zoom", output_backend="svg", ) p.add_tools(HoverTool(tooltips="@title")) p.circle( source=plot_data, x="x", y="y", line_alpha=0.9, fill_alpha=0.9, size=8, # size="radius", fill_color="color", line_color="color", ) p.title.text_font_size = value("16pt") p.xaxis.visible = True p.yaxis.visible = True p.background_fill_color = None p.border_fill_color = None p.grid.grid_line_color = None p.outline_line_color = None # tsne_plot.grid.grid_line_color = None # tsne_plot.outline_line_color = None p.toolbar.logo = None p.toolbar_location = None export_svg( p, filename=os.path.join(cluster_output_path, f"cluster2d-{k}.svg"), webdriver=driver, ) export_png( p, filename=os.path.join(cluster_output_path, f"cluster2d-{k}.png"), webdriver=driver, )
def save_png(model, filename): """ Saves a bokeh model to png Arguments --------- model: bokeh.model.Model Model to save to png filename: str Filename to save to """ if not state.webdriver: state.webdriver = create_webdriver() webdriver = state.webdriver export_png(model, filename, webdriver=webdriver)
def save_png(model, filename): """ Saves a bokeh model to png Arguments --------- model: bokeh.model.Model Model to save to png filename: str Filename to save to """ from bokeh.io.webdriver import webdriver_control if not state.webdriver: state.webdriver = webdriver_control.create() webdriver = state.webdriver export_png(model, filename=filename, webdriver=webdriver)
def save_png(model, filename, template=None, template_variables=None): """ Saves a bokeh model to png Arguments --------- model: bokeh.model.Model Model to save to png filename: str Filename to save to template: template file, as used by bokeh.file_html. If None will use bokeh defaults template_variables: template_variables file dict, as used by bokeh.file_html """ from bokeh.io.webdriver import webdriver_control if not state.webdriver: state.webdriver = webdriver_control.create() webdriver = state.webdriver try: if template: def get_layout_html(obj, resources, width, height): return file_html(obj, resources, title="", template=template, template_variables=template_variables, suppress_callback_warning=True, _always_new=True) old_layout_fn = bokeh.io.export.get_layout_html bokeh.io.export.get_layout_html = get_layout_html export_png(model, filename=filename, webdriver=webdriver) except Exception: raise finally: if template: bokeh.io.export.get_layout_html = old_layout_fn
def show_manhattan_plot(df, group_by, x_axis, y_axis, title='Manhattan Plot', save_to=None): chroms = df[group_by].unique().to_array() plot_width = len(chroms) * 50 manhattan_fig = figure(title=title) manhattan_fig.xaxis.axis_label = 'Chromosomes' manhattan_fig.yaxis.axis_label = '-log10(p)' manhattan_fig.xaxis.ticker = FixedTicker(ticks=[t for t in chroms]) start_position = 0.5 for chrom in chroms: query = '%s == %s' % (group_by, chrom) cdf = df.query(query) x_array = cupy.fromDlpack(cdf[x_axis].to_dlpack()) + start_position y_array = -cupy.log10(cupy.fromDlpack(cdf[y_axis].to_dlpack())) manhattan_fig.circle(x_array.get(), y_array.get(), size=2, color='orange' if (start_position - 0.5) % 2 == 0 else 'gray', alpha=0.5) start_position += 1 if save_to: export_png(manhattan_fig, filename=save_to) else: manhattan_handle = show(manhattan_fig, notebook_handle=True) push_notebook(handle=manhattan_handle) return manhattan_fig
def show_qq_plot(df, x_axis, y_axis, title="QQ", save_to=None, x_max=None, y_max=None): x_values = cupy.fromDlpack(df[x_axis].to_dlpack()) y_values = cupy.fromDlpack(df[y_axis].to_dlpack()) x_values = -cupy.log10(x_values) y_values = -cupy.log10(y_values) if x_max is None: x_max = cupy.max(x_values).tolist() if y_max is None: y_max = cupy.max(y_values).tolist() if y_max == cupy.inf: print("Please pass y_max. Input contains inf.") return if x_max == cupy.inf: print("Please pass x_max. Input contains inf.") return qq_fig = figure(x_range=(0, x_max), y_range=(0, y_max), title=title) qq_fig.circle(x_values.get(), y_values.get(), size=1) qq_fig.line([0, x_max], [0, y_max], line_color='orange', line_width=2) if save_to: export_png(qq_fig, filename=save_to) else: qq_handle = show(qq_fig, notebook_handle=True) push_notebook(handle=qq_handle) return qq_fig
def Violin_SHM_Plot(clone_df, png = None, title = "", vshm_col = "V_SHM", jshm_col = "J_SHM", split_col = None, quads = True, violin_width = 0.8, line_width = 0.4, figsize = (1000, 600), hover_tooltip = True): """Creates a SHM violin plot that can be used to compare multiple categories in a Repertoire. Parameters ---------- clone_df: pandas DataFrame Returns ---------- script: str div: str """ figure_params = { "plot_width": figsize[0], "plot_height": figsize[1], "y_range": Range1d(-0.005, 0.3, bounds = (-0.01, 0.31)), "title": title, "tools": "pan, wheel_zoom, box_zoom, save, reset, help", "active_scroll": "wheel_zoom", "toolbar_location": "right" } plot = figure(**figure_params) plot.grid.visible = False plot.xaxis.minor_tick_line_color = None plot.xaxis.major_label_text_font_size = "10pt" plot.yaxis.axis_label = "V/J Gene SHM" plot.yaxis.major_label_text_font_size = "10pt" plot.yaxis.formatter = NumeralTickFormatter(format = "0.00%") if hover_tooltip: hover_tooltips = [("Mean SHM", "@mean{(0.00%)}"), ("Max SHM", "@max{(0.00%)}"), ("25th Percentile", "@quantile25{(0.00%)}"), ("75th Percentile", "@quantile75{(0.00%)}")] hover_tool = HoverTool(point_policy = "follow_mouse", tooltips = hover_tooltips) plot.add_tools(hover_tool) shm_cols = [] if vshm_col is not None: shm_cols.append(vshm_col) if jshm_col is not None: shm_cols.append(jshm_col) #To compare samples, add the sample column to split on to the DataFrame if split_col is not None: shm_cols.append(split_col) shm_df = clone_df[shm_cols] samples = [] shm_dfs = [] for sample, df in shm_df.groupby([split_col]): samples.append(sample) shm_dfs.append(df) else: samples = ["Repertoire"] shm_dfs = [clone_df[shm_cols]] vshm_violin_color = "lightgreen" jshm_violin_color = "slateblue" violin_xs = [] violin_ys = [] violin_colors = [] violin_legends = [] hover_means = [] hover_maxes = [] hover_25quantiles = [] hover_75quantiles = [] x_location_to_category = {} violin_x_offset = 0 for sample, df in zip(samples, shm_dfs): #Create the density functions if vshm_col in df.columns: vshm_mean = df[vshm_col].mean() vshm_max = df[vshm_col].max() hover_means.append([vshm_mean]) hover_maxes.append([vshm_max]) hover_25quantiles.append([df[vshm_col].quantile(0.25)]) hover_75quantiles.append([df[vshm_col].quantile(0.75)]) y_points = numpy.linspace(0.0, vshm_max, 300) #Create the y range of 300 points from min to max reversed_y_points = numpy.flipud(y_points) v_kernel = gaussian_kde(df[vshm_col], "scott") vshm_x_points = v_kernel(y_points) #Normalize the x range to standard width; negate V SHM points to place it on the left half of the violin vshm_x_points = -vshm_x_points / vshm_x_points.max() * violin_width / 2.0 #Return to the patch starting points if a different violin is drawn for the other half, or mirror data if jshm_col in df.columns: vshm_x_points = numpy.append(vshm_x_points, abs(vshm_x_points).min()) vshm_y_points = numpy.append(y_points, y_points.min()) else: reversed_vshm_x = numpy.flipud(-vshm_x_points) vshm_x_points = numpy.append(vshm_x_points, reversed_vshm_x) vshm_y_points = numpy.append(y_points, reversed_y_points) violin_xs.append(vshm_x_points + violin_x_offset) violin_ys.append(vshm_y_points) violin_colors.append(vshm_violin_color) violin_legends.append("V Gene SHM") if jshm_col in df.columns: jshm_mean = df[jshm_col].mean() jshm_max = df[jshm_col].max() hover_means.append([jshm_mean]) hover_maxes.append([jshm_max]) hover_25quantiles.append([df[jshm_col].quantile(0.25)]) hover_75quantiles.append([df[jshm_col].quantile(0.75)]) y_points = numpy.linspace(0.0, jshm_max, 300) #Create the y range of 300 points from min to max reversed_y_points = numpy.flipud(y_points) j_kernel = gaussian_kde(df[jshm_col], "scott") jshm_x_points = j_kernel(y_points) #Normalize the x range to standard width jshm_x_points = jshm_x_points / jshm_x_points.max() * violin_width / 2.0 #Return to the patch starting points if a different violin is drawn for the other half, or mirror data if vshm_col in df.columns: jshm_x_points = numpy.append(jshm_x_points, abs(jshm_x_points).min()) jshm_y_points = numpy.append(y_points, y_points.min()) else: reversed_jshm_x = numpy.flipud(-jshm_x_points) jshm_x_points = numpy.append(jshm_x_points, reversed_jshm_x) jshm_y_points = numpy.append(y_points, reversed_y_points) violin_xs.append(jshm_x_points + violin_x_offset) violin_ys.append(jshm_y_points) violin_colors.append(jshm_violin_color) violin_legends.append("J Gene SHM") if quads: pass x_location_to_category[violin_x_offset] = sample violin_x_offset += violin_width * 1.2 violin_data = { "xs": violin_xs, "ys": violin_ys, "fill_color": violin_colors, "legend": violin_legends, "mean": hover_means, "max": hover_maxes, "quantile25": hover_25quantiles, "quantile75": hover_75quantiles } violin_source = ColumnDataSource(violin_data) plot.patches(xs = "xs", ys = "ys", fill_color = "fill_color", line_color = "black", line_width = line_width, legend = "legend", source = violin_source) #Replace / remap the X axis tickers to the categorical samples plot.xaxis.ticker = FixedTicker(ticks = [loc for loc in x_location_to_category]) plot.xaxis.major_label_overrides = x_location_to_category plot.x_range.bounds = (min(x_location_to_category.keys()) - 1, max(x_location_to_category.keys()) + 1) if png is not None: export_png(plot, png) return plot
def Rarefaction_Plot(align_df, png = None, title = "", cdr_col = "CDR3_AA", split_col = None, cdr_identity = 0.96, steps = 50, reads = None, figsize = (800, 600), hover_tooltip = True, save_to_file = False): figure_params = { "plot_width": figsize[0], "plot_height": figsize[1], "title": title, "tools": "save, help", "toolbar_location": "right" } plot = figure(**figure_params) plot.xgrid.grid_line_color = None plot.xaxis.axis_label = "Total Sampled Reads" plot.yaxis.axis_label = "Total Clonotypes" plot.axis.formatter = NumeralTickFormatter(format = "0") tooltips = [("Total Sampled Reads", "@xs"), ("Total Clones", "@ys")] if hover_tooltip: hover_tool = HoverTool(point_policy = "snap_to_data", tooltips = tooltips, mode = "hline", names = ["rar_line"]) plot.add_tools(hover_tool) #If comparing multiple samples, add the sample column to split on to the DataFrame if split_col is not None: reads_df = align_df[[cdr_col, split_col]] samples = [] reads_dfs = [] for sample, df in reads_df.groupby([split_col]): samples.append(sample) reads_dfs.append(df) if hover_tooltip and len(samples) > 1: tooltips.append(("Sample", "@sample")) else: samples = ["Repertoire"] reads_dfs = [align_df[[cdr_col]]] sample_colors = ["#1EA078", "#DC5A00", "#786EB4", "#E6288C", "#B4D28C", "#A028B4"] for sample, df, color in zip(samples, reads_dfs, sample_colors[:len(samples)]): total_reads = len(df) subsamp_sizes = [] cur_total = 0 if reads is not None: subsamp_steps = reads else: subsamp_steps = math.floor(total_reads / steps) #Create the list of all read subsample counts to clonotype while cur_total < total_reads: if cur_total != 0: subsamp_sizes.append(cur_total) cur_total += subsamp_steps subsamp_sizes.append(total_reads) subsamp_clones = [] for n in subsamp_sizes: sub_read_df = df.sample(n) sub_total_clones = Clonotype_Usearch(sub_read_df[cdr_col], identity = cdr_identity) subsamp_clones.append(sub_total_clones) rarefaction_data = { "reads": subsamp_sizes, "clones": subsamp_clones, "sample": [sample if len(samples) > 1 else None] * len(subsamp_sizes) } rar_source = ColumnDataSource(rarefaction_data) plot.line(x = "reads", y = "clones", color = color, line_width = 3, source = rar_source, legend = "sample", name = "rar_line") plot.scatter(x = "reads", y = "clones", color = color, source = rar_source) if save_to_file: with open(sample + "_Rarefaction_Data.txt", "w") as rarefaction_text_file: rarefaction_text_file.write("Reads\tClones\n") for read_count, clone_count in zip(subsamp_sizes, subsamp_clones): rarefaction_text_file.write("{0}\t{1}\n".format(read_count, clone_count)) if png is not None: export_png(plot, png) return plot
def CDR_Length_Histogram_Plot(clone_df, png = None, title = "", cdr_col = "CDR3_AA", split_col = None, quantile_boundries = (0.0001, 0.9999), figsize = (800, 600)): figure_params = { "plot_width": figsize[0], "plot_height": figsize[1], "title": title, "tools": "pan, wheel_zoom, box_zoom, save, reset, help", "active_scroll": "wheel_zoom", "toolbar_location": "right" } plot = figure(**figure_params) plot.grid.visible = False plot.xaxis.minor_tick_line_color = None plot.xaxis.axis_label = "CDR3 Length" plot.xaxis.axis_label_text_font_size = "12pt" plot.xaxis.major_label_text_font_size = "12pt" plot.yaxis.axis_label = "P(x)" plot.yaxis.axis_label_text_font_size = "12pt" plot.yaxis.major_label_text_font_size = "12pt" #To compare samples, add the sample column to split on to the DataFrame if split_col is not None: cdr3_df = clone_df[[cdr_col, split_col]] samples = [] cdr3_lens = [] for sample, df in cdr3_df.groupby([split_col]): samples.append(sample) cdr3_lens.append(df[cdr_col].str.len()) else: samples = ["Repertoire"] cdr3_lens = [clone_df[cdr_col].str.len()] bin_min = min([cdr_len_series.min() for cdr_len_series in cdr3_lens]) bin_max = max([cdr_len_series.max() for cdr_len_series in cdr3_lens]) + 1 bin_range = [i for i in range(bin_min, bin_max)] bar_colors = ["#A0C8E6", "#32A032", "#1E78B4", "#B4DC8C"] bar_offset = 0.0 bar_width = 1 / len(samples) upper_y = 0.0 for idx, (sample, cdr_len_series) in enumerate(zip(samples, cdr3_lens)): heights, lefts = numpy.histogram(cdr_len_series, density = True, bins = bin_range) #Ensure proper Y axis scrolling boundaries are set if heights.max() > upper_y: upper_y = heights.max() #Shift bars if multiple samples are being plotted lefts = lefts.astype(float) lefts += bar_offset bar_lefts = lefts[:-1] bar_rights = bar_lefts + bar_width plot.quad(top = heights, bottom = 0, left = bar_lefts, right = bar_rights, fill_color = bar_colors[idx], line_color = None, legend = sample) bar_offset += bar_width plot.y_range.start = -0.001 plot.y_range.end = upper_y plot.y_range.bounds = (-0.05, upper_y * 1.5) if quantile_boundries is not None: lower_x = clone_df[cdr_col].str.len().quantile(quantile_boundries[0]) upper_x = clone_df[cdr_col].str.len().quantile(quantile_boundries[1]) plot.x_range.start = lower_x plot.x_range.end = upper_x plot.x_range.bounds = (0, bin_max + 4) if png is not None: export_png(plot, png) return plot
def VJ_Gene_Plot(clone_df, png=None, title="", vgene_col="VGene", jgene_col="JGene", count_col="Clustered", vgene_colors=vgene_colors, vfamily_colors=vfamily_colors, jgene_colors=jgene_colors, vj_gap=0.008, vgene_gap=0.0, line_width=0.4, figsize=(800, 800), hover_tooltip=True): """Creates a donut (??) chart for prevalence of all V/J gene pairs in a Repertoire. Parameters ---------- clone_df: pandas DataFrame Returns ---------- script: str div: str """ figure_params = { "plot_width": figsize[0], "plot_height": figsize[1], #"sizing_mode": "scale_both", "x_range": Range1d(-0.5, 1.5, bounds=(-1.5, 2.5)), "y_range": Range1d(-0.5, 1.5, bounds=(-1.5, 2.5)), #"outline_line_alpha": 0.0, "title": title, "tools": "pan, wheel_zoom, box_zoom, tap, save, reset, help", "active_scroll": "wheel_zoom", "toolbar_location": "right" } plot = figure(**figure_params) plot.grid.visible = False plot.axis.visible = False if hover_tooltip: hover_tool = HoverTool(tooltips=[("Gene", "@legend"), ("Percent", "@percent{(0.00%)}")], point_policy="snap_to_data") plot.add_tools(hover_tool) gene_df = clone_df[[vgene_col, jgene_col, count_col]].groupby([vgene_col, jgene_col]).agg({count_col: sum}) #Sort by V gene ascending, then J gene ascending gene_df = gene_df.sort_index() gene_df = gene_df.reset_index() total_vgenes = len(gene_df[vgene_col].drop_duplicates()) total_gapsize = total_vgenes * vgene_gap remaining_size = 360.0 - float(total_gapsize) gap_size = float(vgene_gap) total_counts = gene_df[count_col].sum() gene_df["Arc_Length"] = gene_df[count_col] / total_counts * remaining_size #Starting at 90 degrees (top center of the circle) plus half the gap size #cur_v_start = -90.0 + (gap_size / 2.0) cur_v_start = 90.0 + (gap_size / 2.0) v_start_angles = [] v_end_angles = [] vgene_facecolors = [] vgene_hover_colors = [] vfamily_facecolors = [] vfamily_hover_colors = [] v_legend_text = [] v_legend_percent = [] j_start_angles = [] j_end_angles = [] jgene_facecolors = [] jgene_hover_colors = [] j_legend_text = [] j_legend_percent = [] for vgene in gene_df[vgene_col].drop_duplicates(): cur_vgene_df = gene_df[gene_df[vgene_col] == vgene] vfamily = vgene.split("-")[0] vgene_color = vgene_colors[vgene] vgene_hover_color = vgene_color.darken(0.05) vfamily_color = vfamily_colors[vfamily] vfamily_hover_color = vfamily_color.darken(0.05) v_arc_length = cur_vgene_df["Arc_Length"].sum() cur_v_end = cur_v_start + v_arc_length v_start_angles.append(cur_v_start) v_end_angles.append(cur_v_end) vgene_facecolors.append(vgene_color) vgene_hover_colors.append(vgene_hover_color) vfamily_facecolors.append(vfamily_color) vfamily_hover_colors.append(vfamily_hover_color) v_legend_text.append(vgene) cur_vgene_counts = cur_vgene_df["Clustered"].sum() v_legend_percent.append(cur_vgene_counts / total_counts) cur_j_start = cur_v_start for jgene, jgene_arc_length in zip(cur_vgene_df[jgene_col], cur_vgene_df["Arc_Length"]): cur_j_end = cur_j_start + jgene_arc_length jgene_color = jgene_colors[jgene] jgene_hover_color = jgene_color.darken(0.05) j_start_angles.append(cur_j_start) j_end_angles.append(cur_j_end) jgene_facecolors.append(jgene_color) jgene_hover_colors.append(jgene_hover_color) cur_j_start = cur_j_end j_legend_text.append(jgene) cur_jgene_counts = cur_vgene_df[cur_vgene_df[jgene_col] == jgene]["Clustered"].sum() j_legend_percent.append(cur_jgene_counts / cur_vgene_counts) cur_v_start = cur_v_end + gap_size v_wedge_data = { "start_angle": v_start_angles, "end_angle": v_end_angles, "fill_color": vgene_facecolors, "legend": v_legend_text, "percent": v_legend_percent, "vgene_facecolors": vgene_facecolors, "vfamily_facecolors": vfamily_facecolors, "hover_fill_color": vgene_hover_colors, "vgene_hover_colors": vgene_hover_colors, "vfamily_hover_colors": vfamily_hover_colors } v_source = ColumnDataSource(v_wedge_data) v_inner_rad = 0.4 v_outer_rad = 0.692 plot.annular_wedge(x=0.5, y=0.5, start_angle="start_angle", end_angle="end_angle", fill_color="fill_color", selection_fill_color="fill_color", nonselection_fill_color="fill_color", selection_fill_alpha=1.0, nonselection_fill_alpha=0.2, hover_fill_color="hover_fill_color", inner_radius=v_inner_rad, outer_radius=v_outer_rad, line_color="black", line_width=line_width, source=v_source, legend="legend", start_angle_units="deg", end_angle_units="deg") j_wedge_data = { "start_angle": j_start_angles, "end_angle": j_end_angles, "fill_color": jgene_facecolors, "legend": j_legend_text, "percent": j_legend_percent, "hover_fill_color": jgene_hover_colors } j_source = ColumnDataSource(j_wedge_data) j_inner_rad = v_outer_rad + vj_gap j_outer_rad = j_inner_rad + 0.15 plot.annular_wedge(x=0.5, y=0.5, start_angle="start_angle", end_angle="end_angle", fill_color="fill_color", selection_fill_color="fill_color", nonselection_fill_color="fill_color", selection_fill_alpha=1.0, nonselection_fill_alpha=0.2, hover_fill_color="hover_fill_color", inner_radius=j_inner_rad, outer_radius=j_outer_rad, line_color="black", line_width=line_width, source=j_source, legend="legend", start_angle_units="deg", end_angle_units="deg") if png is not None: export_png(plot, png) change_v_color = CustomJS(args={"source": v_source}, code=""" var selection = cb_obj.value; var new_color_array; var new_hover_array; if(selection.toLowerCase().indexOf("gene") !== -1) { new_color_array = source.data["vgene_facecolors"]; new_hover_array = source.data["vgene_hover_colors"]; } else { new_color_array = source.data["vfamily_facecolors"]; new_hover_array = source.data["vfamily_hover_colors"]; } var fill_color = source.data["fill_color"]; var hover_fill_color = source.data["hover_fill_color"]; for(idx = 0; idx < fill_color.length; idx++) { fill_color[idx] = new_color_array[idx]; hover_fill_color[idx] = new_hover_array[idx]; } source.change.emit(); """) v_data_color_by = Select(title="Color by:", options=["V Gene", "V Family"], value="V Gene", callback=change_v_color) plot_layout = column(v_data_color_by, plot) return plot_layout
def write_cross_chart(df, cluster_output_path, k): height = 1600 width = 1600 driver = webdriver.Chrome(os.path.join(BASE_DIR, "chromedriver"), options=opts) x = df.c3.to_list() y = df.c1.to_list() clusters = df.cluster.to_list() plot = figure( # title='TSNE Twitter BIO Embeddings', plot_width=width, plot_height=height, active_scroll="wheel_zoom", # x_range=r, # y_range=r, output_backend="svg", ) plot.add_tools(HoverTool(tooltips="@title")) new_x = [] new_y = [] for coord in zip(x, y): x_coord, y_coord = coord x_rand = random.uniform(-(0.5**0.5), 0.5**0.5) y_rand_range = (0.5 - x_rand**2)**0.5 y_rand = random.uniform(-y_rand_range, y_rand_range) new_x.append(x_coord + x_rand) new_y.append(y_coord + y_rand) colors = [colormap[clusters[i]] for i in range(len(new_y))] source = ColumnDataSource(data={"x": new_x, "y": new_y, "color": colors}) plot.scatter( source=source, x="x", y="y", line_alpha=0.6, fill_alpha=0.6, size=10, color="color", ) # size # count_map = defaultdict(int) # for coord in zip(x, y): # count_map[coord] += 1 * 0.5 # source = ColumnDataSource( # data={ # "x": [k[0] for k in count_map.keys()], # "y": [k[1] for k in count_map.keys()], # "size": list(count_map.values()), # } # ) # plot.scatter( # source=source, # x="x", # y="y", # line_alpha=0.6, # fill_alpha=0.6, # size="size", # ) plot.yaxis.axis_label_text_font_size = "25pt" plot.yaxis.major_label_text_font_size = "25pt" plot.xaxis.axis_label_text_font_size = "25pt" plot.xaxis.major_label_text_font_size = "25pt" plot.title.text_font_size = value("32pt") plot.xaxis.visible = True # plot.xaxis.bounds = (0, 0) plot.yaxis.visible = True label_opts1 = dict( x_offset=0, y_offset=750, text_font_size="30px", ) msg1 = "C1" caption1 = Label(text=msg1, **label_opts1) label_opts2 = dict( x_offset=0, y_offset=-750, text_font_size="30px", ) msg2 = "-C1" caption2 = Label(text=msg2, **label_opts2) label_opts3 = dict( x_offset=750, y_offset=0, text_font_size="30px", ) msg3 = "C3" caption3 = Label(text=msg3, **label_opts3) label_opts4 = dict( x_offset=-750, y_offset=0, text_font_size="30px", ) msg4 = "-C3" caption4 = Label(text=msg4, **label_opts4) plot.add_layout(caption1, "center") plot.add_layout(caption2, "center") plot.add_layout(caption3, "center") plot.add_layout(caption4, "center") plot.background_fill_color = None plot.border_fill_color = None plot.grid.grid_line_color = None plot.outline_line_color = None plot.yaxis.fixed_location = 0 plot.xaxis.fixed_location = 0 plot.toolbar.logo = None plot.toolbar_location = None export_svg( plot, filename=os.path.join(cluster_output_path, f"cross-{k}.svg"), webdriver=driver, height=height, width=width, ) export_png( plot, filename=os.path.join(cluster_output_path, f"cross-{k}.png"), webdriver=driver, height=height, width=width, )
f.legend.label_text_font_size = font_size f.legend.glyph_width = 100 f.legend.glyph_height = 40 f.legend.spacing = 20 for f in [f_pme_vs_ni, f_pse_vs_ni]: f.legend.location = 'bottom_left' f_kl_vs_ni.legend.location = 'center_left' f_pe_vs_ni.legend.location = 'center_left' if dnms[k] == 'delays10k': f_pme_vs_ni.yaxis.ticker = FixedTicker(ticks=[0.1, 0.4, 0.7]) elif dnms[k] == 'airfoil': f_pe_vs_ni.yaxis.ticker = FixedTicker(ticks=[0.3, 0.6, 0.9]) f_pse_vs_ni.yaxis.ticker = FixedTicker(ticks=[0.1, 0.5, 1]) #bkp.show(bkl.gridplot([[f_pe_vs_ni, f_pme_vs_ni, f_pse_vs_ni], [f_pe_vs_cput, f_pme_vs_cput, f_pse_vs_cput]])) #figs = [f_kl_vs_ni, f_pe_vs_ni, f_pme_vs_ni, f_pme_best_vs_ni, f_pme_obj_vs_ni, f_pse_vs_ni, f_pse_best_vs_ni, f_pse_obj_vs_ni, hypchg_vs_ni] figs = [ f_kl_vs_ni, f_pe_vs_ni, f_pme_vs_ni, f_pme_obj_vs_ni, f_pse_vs_ni, f_pse_obj_vs_ni ] if pngs: for i, f in enumerate(figs): export_png(f, 'figures/%s%d.png' % (dnms[k], i + 1)) else: bkp.output_file('figures/' + dnms[k] + '.html') bki.save(bkl.gridplot([figs])) #bkp.show(bkl.gridplot([figs]))
def draw_vectors(): driver = webdriver.Chrome(os.path.join(BASE_DIR, "chromedriver"), options=options) df = pd.read_csv(os.path.join(OUTPUTS_DIR, "normalized_future_vectors.csv")) filter_list = { (5, 6), (5, 7), (6, 7), (8, 9), (8, 10), (9, 10), (11, 12), (11, 13), (12, 13), } comb = list(combinations(range(5, len(df.columns)), 2)) comb = [c for c in comb if c not in filter_list] for idx, coord in enumerate(comb, 1): x, y = coord X = df[df.columns[x]].to_list() Y = df[df.columns[y]].to_list() tsne_df = pd.DataFrame(zip(X, Y), index=range(len(X)), columns=["x_coord", "y_coord"]) tsne_df["title"] = df["title"].to_list() tsne_df["cluster_no"] = df["cluster"].to_list() colormap = {3: "#ffee33", 2: "#00a152", 1: "#2979ff", 0: "#d500f9"} # colormap = {3: "#bdbdbd", 2: "#bdbdbd", 1: "#bdbdbd", 0: "#d500f9"} # colormap = {3: "#bdbdbd", 2: "#bdbdbd", 1: "#2979ff", 0: "#bdbdbd"} # colormap = {3: "#bdbdbd", 2: "#00a152", 1: "#bdbdbd", 0: "#bdbdbd"} # colormap = {3: "#ffee33", 2: "#bdbdbd", 1: "#bdbdbd", 0: "#bdbdbd"} only_one_cluster = pd.DataFrame(tsne_df.loc[tsne_df.cluster_no == 3]) colors = [colormap[x] for x in only_one_cluster["cluster_no"]] only_one_cluster["color"] = colors plot_data = ColumnDataSource(data=only_one_cluster.to_dict( orient="list")) plot = figure( # title='TSNE Twitter BIO Embeddings', plot_width=1600, plot_height=1600, active_scroll="wheel_zoom", output_backend="svg", x_range=(-1.1, 1.1), y_range=(-1.1, 1.1), ) plot.add_tools(HoverTool(tooltips="@title")) plot.circle( source=plot_data, x="x_coord", y="y_coord", line_alpha=0.6, fill_alpha=0.6, size=20, fill_color="color", line_color="color", ) plot.yaxis.axis_label_text_font_size = "25pt" plot.yaxis.major_label_text_font_size = "25pt" plot.xaxis.axis_label_text_font_size = "25pt" plot.xaxis.major_label_text_font_size = "25pt" start_x, end_x = df.columns[x].split("|") start_y, end_y = df.columns[y].split("|") start_x = start_x.strip() end_x = end_x.strip() start_y = start_y.strip() end_y = end_y.strip() plot.title.text_font_size = value("32pt") plot.xaxis.visible = True # plot.xaxis.bounds = (0, 0) plot.yaxis.visible = True label_opts1 = dict( x_offset=0, y_offset=750, text_font_size="30px", ) msg1 = end_y caption1 = Label(text=msg1, **label_opts1) label_opts2 = dict( x_offset=0, y_offset=-750, text_font_size="30px", ) msg2 = start_y caption2 = Label(text=msg2, **label_opts2) label_opts3 = dict( x_offset=600, y_offset=0, text_font_size="30px", ) msg3 = end_x caption3 = Label(text=msg3, **label_opts3) label_opts4 = dict( x_offset=-750, y_offset=0, text_font_size="30px", ) msg4 = start_x caption4 = Label(text=msg4, **label_opts4) plot.add_layout(caption1, "center") plot.add_layout(caption2, "center") plot.add_layout(caption3, "center") plot.add_layout(caption4, "center") plot.background_fill_color = None plot.border_fill_color = None plot.grid.grid_line_color = None plot.outline_line_color = None plot.yaxis.fixed_location = 0 plot.xaxis.fixed_location = 0 plot.toolbar.logo = None plot.toolbar_location = None print(idx) export_svg( plot, filename=f"svgs/{idx}.svg", webdriver=driver, height=1600, width=1600, ) export_png( plot, filename=f"pngs/{idx}.png", webdriver=driver, height=1600, width=1600, )
def Mosaic_Plot(clone_df, png=None, title="", top_clones=5000, count_col="Clustered", vgene_col="VGene", jgene_col="JGene", isotype_col="Isotype", vshm_col="V_SHM", jshm_col="J_SHM", vgene_colors=vgene_colors, vfamily_colors=vfamily_colors, jgene_colors=jgene_colors, isotype_colors=isotype_colors, line_width=0.3, figsize=(600, 600), hover_tooltip=True): figure_params = { "plot_width": figsize[0], "plot_height": figsize[1], #"sizing_mode": "scale_both", "x_range": Range1d(-0.1, 1.1, bounds=(-1.0, 2.0)), "y_range": Range1d(-0.1, 1.1, bounds=(-1.0, 2.0)), #"outline_line_alpha": 0.0, "title": title, "tools": "pan, wheel_zoom, box_zoom, save, reset, help", "active_scroll": "wheel_zoom", "toolbar_location": "right" } plot = figure(**figure_params) plot.grid.visible = False plot.axis.visible = False hover_tooltips = [("Clone ID", "@CloneID")] info_cols = [count_col] if vgene_col is not None: info_cols.append(vgene_col) hover_tooltips.append(("V Gene", "@" + vgene_col)) if jgene_col is not None: info_cols.append(jgene_col) hover_tooltips.append(("J Gene", "@" + jgene_col)) if isotype_col is not None: info_cols.append(isotype_col) hover_tooltips.append(("Isotype", "@" + isotype_col)) if vshm_col is not None: info_cols.append(vshm_col) hover_tooltips.append(("V Gene SHM", "@" + vshm_col + "{(0.00%)}")) if jshm_col is not None: info_cols.append(jshm_col) hover_tooltips.append(("J Gene SHM", "@" + jshm_col + "{(0.00%)}")) if hover_tooltip: hover_tool = HoverTool(point_policy="snap_to_data", tooltips=hover_tooltips) plot.add_tools(hover_tool) mosaic_df = clone_df[info_cols] mosaic_df = mosaic_df.sort_values([count_col], ascending=[False]) if top_clones: mosaic_df = mosaic_df.head(top_clones) total_area = float(mosaic_df[count_col].sum()) mosaic_df["Clone_Frequencies"] = mosaic_df[count_col].astype( float) / total_area hover_tooltips.append(("Clone Frequency", "@Clone_Frequencies{(0.00%)}")) mosaic_rects = squarify(mosaic_df["Clone_Frequencies"].tolist(), 0.0, 0.0, 1.0, 1.0) #Add half width/height to x/y position for center points mosaic_df["x"] = [rect["x"] + rect["dx"] / 2.0 for rect in mosaic_rects] mosaic_df["y"] = [rect["y"] + rect["dy"] / 2.0 for rect in mosaic_rects] mosaic_df["width"] = [rect["dx"] for rect in mosaic_rects] mosaic_df["height"] = [rect["dy"] for rect in mosaic_rects] #By default there is no legend text, since colors are alternating and non-informative mosaic_df["legend"] = "" mosaic_df["Empty_Legend"] = "" alternating_colors = [ RGB(102, 194, 165), RGB(252, 141, 98), RGB(141, 160, 203) ] alt2_color_cycle = cycle(alternating_colors[0:2]) alt3_color_cycle = cycle(alternating_colors) mosaic_df["alternating2_colors"] = [ next(alt2_color_cycle) for _ in mosaic_rects ] mosaic_df["alternating3_colors"] = [ next(alt3_color_cycle) for _ in mosaic_rects ] #Default color scheme is alternating 3 colors mosaic_df["fill_color"] = mosaic_df["alternating3_colors"] #Set up various mosaic coloring options and associated legends color_select_options = ["Alternating (2)", "Alternating (3)"] if vgene_col in mosaic_df.columns: mosaic_df["vgene_colors"] = mosaic_df[vgene_col].map(vgene_colors) vfamilies = mosaic_df[vgene_col].str.split("-").str[0] mosaic_df["vfamily_colors"] = vfamilies.map(vfamily_colors) color_select_options.append("V Gene") color_select_options.append("V Family") mosaic_df["VGene_Legend"] = mosaic_df[vgene_col] mosaic_df["VFamily_Legend"] = mosaic_df[vgene_col].str.split( "-").str[0] if jgene_col in mosaic_df.columns: mosaic_df["jgene_colors"] = mosaic_df[jgene_col].map(jgene_colors) color_select_options.append("J Gene") mosaic_df["JGene_Legend"] = mosaic_df[jgene_col] if isotype_col in mosaic_df.columns: mosaic_df["isotype_colors"] = mosaic_df[isotype_col].map( isotype_colors) color_select_options.append("Isotype") mosaic_df["Isotype_Legend"] = mosaic_df[isotype_col] #Using viridis as a quantitative heatmap color scheme for SHM values #The SHM values are binned into 180 groups; viridis in >180 bins uses some values twice, which pandas.cut can't use shm_viridis = list(viridis(180)) colorbar_tick_formatter = NumeralTickFormatter(format="0.00%") if vshm_col in mosaic_df.columns: vshm_min = mosaic_df[vshm_col].min() vshm_max = mosaic_df[vshm_col].max() #Use pandas.cut to bin the V gene SHM values into the heatmap colors mosaic_df["vshm_colors"] = pandas.cut(mosaic_df[vshm_col], bins=180, labels=shm_viridis) color_select_options.append("V Gene SHM") vshm_color_mapper = LinearColorMapper(palette=shm_viridis, low=vshm_min, high=vshm_max) vshm_ticks = FixedTicker(ticks=numpy.linspace(vshm_min, vshm_max, 8)) vshm_colorbar = ColorBar(color_mapper=vshm_color_mapper, location=(0, 0), name="vshm_colorbar", label_standoff=12, formatter=colorbar_tick_formatter, ticker=vshm_ticks) plot.add_layout(vshm_colorbar, "right") if jshm_col in mosaic_df.columns: jshm_min = mosaic_df[jshm_col].min() jshm_max = mosaic_df[jshm_col].max() #Use pandas.cut to bin the J gene SHM values into the heatmap colors mosaic_df["jshm_colors"] = pandas.cut(mosaic_df[jshm_col], bins=180, labels=shm_viridis) color_select_options.append("J Gene SHM") jshm_color_mapper = LinearColorMapper(palette=shm_viridis, low=jshm_min, high=jshm_max) jshm_ticks = FixedTicker(ticks=numpy.linspace(jshm_min, jshm_max, 8)) jshm_colorbar = ColorBar(color_mapper=jshm_color_mapper, location=(0, 0), name="jshm_colorbar", label_standoff=12, formatter=colorbar_tick_formatter, ticker=jshm_ticks) plot.add_layout(jshm_colorbar, "right") mosaic_source = ColumnDataSource(mosaic_df) plot.rect(x="x", y="y", width="width", height="height", fill_color="fill_color", legend="legend", line_color="black", line_width=line_width, source=mosaic_source) #By default, the plot legend and ColorBar should be turned off (since the color is repeating and uninformative) plot.legend[0].visible = False vshm_colorbar = plot.select("vshm_colorbar")[0] jshm_colorbar = plot.select("jshm_colorbar")[0] vshm_colorbar.visible = False jshm_colorbar.visible = False if png is not None: export_png(plot, png) change_args = { "source": mosaic_source, "legend_obj": plot.legend[0], "vshm_colorbar_obj": vshm_colorbar, "jshm_colorbar_obj": jshm_colorbar } change_rect_color = CustomJS(args=change_args, code=""" var selection = cb_obj.value.toLowerCase(); var new_color_array; var new_legend_array; if(selection.indexOf("v gene shm") !== -1) { new_color_array = source.data["vshm_colors"]; new_legend_array = source.data["Empty_Legend"]; legend_obj.visible = false; vshm_colorbar_obj.visible = true; jshm_colorbar_obj.visible = false; } else if(selection.indexOf("j gene shm") !== -1) { new_color_array = source.data["jshm_colors"]; new_legend_array = source.data["Empty_Legend"]; legend_obj.visible = false; vshm_colorbar_obj.visible = false; jshm_colorbar_obj.visible = true; } else if(selection.indexOf("v gene") !== -1) { new_color_array = source.data["vgene_colors"]; new_legend_array = source.data["VGene_Legend"]; legend_obj.visible = true; vshm_colorbar_obj.visible = false; jshm_colorbar_obj.visible = false; } else if(selection.indexOf("v family") !== -1) { new_color_array = source.data["vfamily_colors"]; new_legend_array = source.data["VFamily_Legend"]; legend_obj.visible = true; vshm_colorbar_obj.visible = false; jshm_colorbar_obj.visible = false; } else if(selection.indexOf("j gene") !== -1) { new_color_array = source.data["jgene_colors"]; new_legend_array = source.data["JGene_Legend"]; legend_obj.visible = true; vshm_colorbar_obj.visible = false; jshm_colorbar_obj.visible = false; } else if(selection.indexOf("isotype") !== -1) { new_color_array = source.data["isotype_colors"]; new_legend_array = source.data["Isotype_Legend"]; legend_obj.visible = true; vshm_colorbar_obj.visible = false; jshm_colorbar_obj.visible = false; } else if(selection.indexOf("2") !== -1) { new_color_array = source.data["alternating2_colors"]; new_legend_array = source.data["Empty_Legend"]; legend_obj.visible = false; vshm_colorbar_obj.visible = false; jshm_colorbar_obj.visible = false; } else { new_color_array = source.data["alternating3_colors"]; new_legend_array = source.data["Empty_Legend"]; legend_obj.visible = false; vshm_colorbar_obj.visible = false; jshm_colorbar_obj.visible = false; } var fill_color = source.data["fill_color"]; var legend = source.data["legend"]; for(idx = 0; idx < fill_color.length; idx++) { fill_color[idx] = new_color_array[idx]; legend[idx] = new_legend_array[idx]; } source.change.emit(); """) patch_coloring_select = Select(title="Color by:", options=color_select_options, value="Alternating (3)", callback=change_rect_color) plot_layout = column(patch_coloring_select, plot) return plot_layout
def Burtin_VGene_SHM_Plot(clone_df, png=None, title="", vgene_col="VGene", vshm_col="V_SHM", split_col=None, vfamily_colors=vfamily_colors, label_arc=20, figsize=(900, 900)): figure_params = { "plot_width": figsize[0], "plot_height": figsize[1], "x_axis_type": None, "y_axis_type": None, "x_range": Range1d(-45, 45, bounds=(-50, 50)), "y_range": Range1d(-45, 45, bounds=(-50, 50)), "title": title, "tools": "pan, wheel_zoom, box_zoom, save, reset, help", "active_scroll": "wheel_zoom", "toolbar_location": "right", "background_fill_color": RGB(216, 216, 216) } plot = figure(**figure_params) plot.grid.visible = False plot.axis.visible = False label_offset = 90 #Offset the SHM % labels to the top of the plot plot_data_degrees = 360 - label_arc initial_angle = label_offset + label_arc / 2 ending_angle = label_offset + 360 - label_arc / 2 plot_inner_rad = 10 plot_outer_rad = 35 plot_thickness = plot_outer_rad - plot_inner_rad df_cols = [vgene_col, vshm_col] #If comparing multiple samples, add the sample column to split on to the DataFrame if split_col is not None: df_cols.append(split_col) vgene_shm_df = clone_df[df_cols].sort_values([vgene_col]) total_vgenes = len(vgene_shm_df[vgene_col].drop_duplicates()) vgene_arc_degrees = plot_data_degrees / total_vgenes #Create and color arc backgrounds by V family vgene_family_df = vgene_shm_df[[vgene_col ]].drop_duplicates().reset_index(drop=True) vgene_family_df["VFamily"] = vgene_family_df[vgene_col].str.split( "-").str[0] vgene_family_df["fill_color"] = vgene_family_df["VFamily"].map( vfamily_colors) vfamily_arc_length = plot_data_degrees / total_vgenes vgene_family_df[ "start_angle"] = vgene_family_df.index * vfamily_arc_length + initial_angle vgene_family_df[ "end_angle"] = vgene_family_df["start_angle"] + vfamily_arc_length vfamily_source = ColumnDataSource(vgene_family_df) plot.annular_wedge(x=0, y=0, start_angle="start_angle", end_angle="end_angle", fill_color="fill_color", inner_radius=plot_inner_rad, outer_radius=plot_outer_rad, line_color=None, source=vfamily_source, start_angle_units="deg", end_angle_units="deg") if split_col in vgene_shm_df: vgene_shm_dfs = [ sample_df_tup for sample_df_tup in vgene_shm_df.groupby([split_col]) ] samples = [] grouped_vgene_shm_dfs = [] for sample, df in vgene_shm_dfs: samples.append(sample) grouped_vgene_shm_dfs.append( df.groupby([vgene_col])[vshm_col].agg({"mean"})) #Add the V genes that may be present in one sample but not in the current one all_vgenes = vgene_shm_df[vgene_col].drop_duplicates().tolist() grouped_vgene_shm_dfs = [ df.reindex(all_vgenes).reset_index() for df in grouped_vgene_shm_dfs ] vshm_min = min([df["mean"].min() for df in grouped_vgene_shm_dfs]) vshm_max = max([df["mean"].max() for df in grouped_vgene_shm_dfs]) else: grouped_vgene_shm_df = vgene_shm_df.groupby([vgene_col])[vshm_col].agg( {"mean"}).reset_index() grouped_vgene_shm_df = grouped_vgene_shm_df.sort_values( [vgene_col]).reset_index(drop=True) vshm_min = grouped_vgene_shm_df["mean"].min() vshm_max = grouped_vgene_shm_df["mean"].max() samples = ["All"] grouped_vgene_shm_dfs = [grouped_vgene_shm_df] #Create the labels and radial axis lines for the SHM data shm_labels = [ "{0:.1%}".format(shm) for shm in numpy.linspace(vshm_min, vshm_max, 7) ] shm_label_radii = numpy.linspace(plot_inner_rad, plot_outer_rad, 7) plot.circle(x=0, y=0, radius=shm_label_radii, fill_color=None, line_color="white") plot.text(x=0, y=shm_label_radii[1:], text=shm_labels[1:], text_font_size="10pt", text_align="center", text_baseline="middle") #Create line-width annular wedges to separate V genes sep_angles = numpy.linspace(initial_angle, ending_angle, total_vgenes + 1) sep_inner_radius = plot_inner_rad - 1 sep_outer_radius = plot_outer_rad + 1 plot.annular_wedge(x=0, y=0, start_angle=sep_angles, end_angle=sep_angles, fill_color=None, inner_radius=sep_inner_radius, outer_radius=sep_outer_radius, line_color="black", start_angle_units="deg", end_angle_units="deg") #Gene text labels; text angle location is the midpoint of the V gene separation lines text_radius = plot_outer_rad + 3.5 text_radian_locs = numpy.deg2rad((sep_angles[1:] + sep_angles[:-1]) / 2) text_x = text_radius * numpy.cos(text_radian_locs) text_y = text_radius * numpy.sin(text_radian_locs) #Angle the text based on the position around the circle; reverse the left half so the text isn't upside-down mid_graph_radian = numpy.deg2rad(label_offset + 180) text_angles = [ rad if rad > mid_graph_radian else rad + numpy.pi for rad in text_radian_locs ] plot.text(x=text_x, y=text_y, text=vgene_family_df[vgene_col], angle=text_angles, text_font_size="10pt", text_align="center", text_baseline="middle") #Finally draw the bars and legend for the mean SHM values for all clones of a specific V gene total_samples = len(grouped_vgene_shm_dfs) vgene_arc_radians = numpy.deg2rad(vgene_arc_degrees) bar_width = vgene_arc_radians / (total_samples + 1) spacer_width = bar_width / (total_samples + 1) sample_colors = (RGB(60, 60, 60), RGB(130, 40, 40), RGB(60, 60, 130), RGB(10, 50, 100), RGB(150, 100, 20)) sample_label_ys = numpy.linspace(-total_samples, total_samples, total_samples) arc_starts = text_radian_locs - (vgene_arc_radians / 2) + spacer_width for sample, cur_df in enumerate(grouped_vgene_shm_dfs): bar_start_angles = arc_starts + sample * (bar_width + spacer_width) bar_end_angles = bar_start_angles + bar_width cur_df["Normalized_SHM"] = cur_df["mean"] / vshm_max shm_bars = cur_df["Normalized_SHM"] * plot_thickness + plot_inner_rad plot.annular_wedge(x=0, y=0, start_angle=bar_start_angles, end_angle=bar_end_angles, line_color=None, inner_radius=plot_inner_rad, outer_radius=shm_bars, fill_color=sample_colors[sample]) if total_samples > 1: plot.rect(x=-2, y=sample_label_ys[sample], width=2.5, height=1.5, color=sample_colors[sample]) plot.text(x=0, y=sample_label_ys[sample], text={"value": samples[sample]}, text_font_size="10pt", text_baseline="middle") if png is not None: export_png(plot, png) return plot
def draw_chart(df): driver = webdriver.Chrome(os.path.join(BASE_DIR, "chromedriver")) X = df["wv"].to_list() y = df["cluster"].to_list() tsne_filepath = "tsne3000.pkl" if not os.path.exists(tsne_filepath): tsne = TSNE(random_state=42) tsne_points = tsne.fit_transform(X) with open(tsne_filepath, "wb+") as f: pickle.dump(tsne_points, f) else: # Cache Hits! with open(tsne_filepath, "rb") as f: tsne_points = pickle.load(f) tsne_df = pd.DataFrame( tsne_points, index=range(len(X)), columns=["x_coord", "y_coord"] ) tsne_df["title"] = df["title"].to_list() tsne_df["tokens_len"] = df["tokens_len"].to_list() tsne_df["cluster_no"] = y colormap = {0: "#ffee33", 1: "#00a152", 2: "#2979ff", 3: "#d500f9"} colors = [colormap[x] for x in tsne_df["cluster_no"]] tsne_df["color"] = colors normalized = min_max_normalize(tsne_df.tokens_len.to_list()) tsne_df["radius"] = [5 + x * 10 for x in normalized] print(tsne_df.to_dict(orient="list")) plot_data = ColumnDataSource(data=tsne_df.to_dict(orient="list")) print(plot_data) tsne_plot = figure( # title='TSNE Twitter BIO Embeddings', plot_width=1200, plot_height=1200, active_scroll="wheel_zoom", output_backend="svg", ) tsne_plot.add_tools(HoverTool(tooltips="@title")) tsne_plot.circle( source=plot_data, x="x_coord", y="y_coord", line_alpha=0.6, fill_alpha=0.6, size="radius", fill_color="color", line_color="color", ) tsne_plot.title.text_font_size = value("16pt") tsne_plot.xaxis.visible = True tsne_plot.yaxis.visible = True tsne_plot.background_fill_color = None tsne_plot.border_fill_color = None tsne_plot.grid.grid_line_color = None tsne_plot.outline_line_color = None # tsne_plot.grid.grid_line_color = None # tsne_plot.outline_line_color = None show(tsne_plot) tsne_plot.toolbar.logo = None tsne_plot.toolbar_location = None export_svg( tsne_plot, filename=f"cluster.svg", webdriver=driver, ) export_png( tsne_plot, filename=f"cluster.png", webdriver=driver, )
def calculate_cluster_number(): driver = webdriver.Chrome( os.path.join(BASE_DIR, "chromedriver"), options=opts ) df = pd.read_csv(os.path.join(H_IN_DIRS, "happiness.csv")) x = [] for row in df.iterrows(): idx, r = row temp = [] for c in df.columns[1:-1]: temp.append(r[c]) x.append(temp) ok = 0 kmax = 10 maximum = 0 for k in range(2, kmax + 1): kmeans = KMeans(n_clusters=k) fit = kmeans.fit(x) labels = fit.labels_ score = silhouette_score(x, labels, metric="euclidean") idx = kmeans.fit_predict(x) tsne = TSNE(random_state=42) points = tsne.fit_transform(x) t_df = pd.DataFrame(points, index=range(len(x)), columns=["x", "y"]) t_df["cluster_no"] = idx colormap = { 0: "#f44336", 1: "#673ab7", 2: "#9c27b0", 3: "#e91e63", 4: "#3f51b5", 5: "#2196f3", 6: "#03a9f4", 7: "#00bcd4", 8: "#009688", 9: "#cddc39", } colors = [colormap[x] for x in t_df["cluster_no"]] t_df["color"] = colors plot_data = ColumnDataSource(data=t_df.to_dict(orient="list")) p = figure( # title='TSNE Twitter BIO Embeddings', plot_width=1200, plot_height=1200, active_scroll="wheel_zoom", output_backend="svg", ) p.add_tools(HoverTool(tooltips="@title")) p.circle( source=plot_data, x="x", y="y", line_alpha=0.9, fill_alpha=0.9, # size="radius", fill_color="color", line_color="color", ) p.title.text_font_size = value("16pt") p.xaxis.visible = True p.yaxis.visible = True p.background_fill_color = None p.border_fill_color = None p.grid.grid_line_color = None p.outline_line_color = None # tsne_plot.grid.grid_line_color = None # tsne_plot.outline_line_color = None p.toolbar.logo = None p.toolbar_location = None export_svg( p, filename=f"cluster-number{k}.svg", webdriver=driver, ) export_png( p, filename=f"cluster-number{k}.png", webdriver=driver, ) if score > maximum: maximum = score ok = k print(ok)
def Diversity_Plot(clone_df, png=None, title="", count_col="Clustered", split_col=None, line_width=3, add_control_diversities=True, figsize=(1000, 700)): """Creates a plot comparing clonal repertoire diversity rates, using the Hill Diversity metric. Parameters ---------- clone_df: pandas DataFrame DataFrame of the repertoire(s) to plot png: str Title of the output PNG filename or None if none should be made; default is None title: str Title of the output graph; default is "" count_col: str Column name in clone_df of the clone counts/frequencies; default is "Clustered" split_col: str Column separating various repertoire subsets in clone_df or None if single repertoire; default is None line_width: int Width for the plot lines add_control_diversities: bool Whether to add lines for control diversities of artificial polarity; default is True figsize: tuple of (int, int) The width and height of the output plot; default is (100, 700) Returns ---------- plot: bokeh figure The figure object for the diversity plot """ figure_params = { "plot_width": figsize[0], "plot_height": figsize[1], "x_range": Range1d(0, 10), "y_axis_type": "log", "title": title, "tools": "save, help", "toolbar_location": "right" } plot = figure(**figure_params) plot.xgrid.grid_line_alpha = 0.0 plot.xaxis.axis_label = "Order (N)" plot.yaxis.axis_label = "Hill Diversity Constant" plot.yaxis.formatter = BasicTickFormatter() #If comparing multiple samples, add the sample column to split on to the DataFrame if split_col is not None: diversity_df = clone_df[[count_col, split_col]] samples = [] diversity_dfs = [] for sample, df in diversity_df.groupby([split_col]): samples.append(sample) diversity_dfs.append(df) else: samples = ["Repertoire"] diversity_dfs = [clone_df[[count_col]]] sample_colors = (RGB(30, 160, 120), RGB(220, 90, 0), RGB(120, 110, 180), RGB(230, 40, 140)) for sample, df, line_color in zip(samples, diversity_dfs, sample_colors[:len(samples)]): hill_indices = Hill_Diversity_Index(df[count_col]) n_orders = [i[0] for i in hill_indices] order_diversities = [i[1] for i in hill_indices] #ADD MORE LINE STYLES (dotted, etc.) plot.line(x=n_orders, y=order_diversities, color=line_color, line_width=line_width, legend=sample) if add_control_diversities: total_clones = max([len(i) for i in diversity_dfs]) total_counts = max([df[count_col].sum() for df in diversity_dfs]) #Very highly polarized data creates a sample in which the top 20 clones are 20% of the total by prevalence top20_20_data = [total_counts * 0.2 / 20] * 20 top20_20_data += [ total_counts * 0.8 / (total_clones - 20) for _ in range(total_clones - 20) ] #Highly polarized data has the top 20 clones at 15% of the total top20_15_data = [total_counts * 0.15 / 20] * 20 top20_15_data += [ total_counts * 0.85 / (total_clones - 20) for _ in range(total_clones - 20) ] #Moderately polarized data has the top 20 clones at 10% of the total top20_10_data = [total_counts * 0.1 / 20] * 20 top20_10_data += [ total_counts * 0.9 / (total_clones - 20) for _ in range(total_clones - 20) ] #Lowly polarized data has the top 20 clones at 5% of the total top20_5_data = [total_counts * 0.05 / 20] * 20 top20_5_data += [ total_counts * 0.95 / (total_clones - 20) for _ in range(total_clones - 20) ] top20_20_diversities = [ i[1] for i in Hill_Diversity_Index(top20_20_data) ] top20_15_diversities = [ i[1] for i in Hill_Diversity_Index(top20_15_data) ] top20_10_diversities = [ i[1] for i in Hill_Diversity_Index(top20_10_data) ] top20_5_diversities = [ i[1] for i in Hill_Diversity_Index(top20_5_data) ] plot.line(x=n_orders, y=top20_20_diversities, color=RGB(160, 200, 230), alpha=0.8, line_dash=(12, ), line_width=line_width, legend="Very Highly Polarized (Top 20 Clones 20%)") plot.line(x=n_orders, y=top20_15_diversities, color=RGB(30, 120, 180), alpha=0.8, line_dash=(12, ), line_width=line_width, legend="Highly Polarized (Top 20 Clones 15%)") plot.line(x=n_orders, y=top20_10_diversities, color=RGB(180, 220, 140), alpha=0.8, line_dash=(12, ), line_width=line_width, legend="Moderately Polarized (Top 20 Clones 10%)") plot.line(x=n_orders, y=top20_5_diversities, color=RGB(50, 160, 40), alpha=0.8, line_dash=(12, ), line_width=line_width, legend="Lowly Polarized (Top 20 Clones 5%)") if png is not None: export_png(plot, png) return plot
index_cmap = factor_cmap('cyl_mfr', palette=Spectral5, factors=sorted(df.cyl.unique()), end=1) p = figure(width=800, height=300, title="Mean MPG by # Cylinders and Manufacturer", x_range=group, toolbar_location=None, tooltips=[("MPG", "@mpg_mean"), ("Cyl, Mfr", "@cyl_mfr")]) p.vbar( x='cyl_mfr', top='mpg_mean', width=1, source=group, line_color="white", fill_color=index_cmap, ) p.y_range.start = 0 p.x_range.range_padding = 0.05 p.xgrid.grid_line_color = None p.xaxis.axis_label = "Manufacturer grouped by # Cylinders" p.xaxis.major_label_orientation = 1.2 p.outline_line_color = None export_png(p, filename="plot.png")