Ejemplo n.º 1
0
    def export_figure(self, output_file):
        """
        Requires a working *selenium* driver (https://www.selenium.dev/selenium/docs/api/py/).

        Related attributes are `selenium_webdriver`, `figure_export_width` and `figure_export_height`.
        """
        export_kwargs = {}
        if self.selenium_webdriver is not None:
            export_kwargs['webdriver'] = self.selenium_webdriver()
        if self.figure_export_width is not None:
            export_kwargs['width'] = self.figure_export_width
        if self.figure_export_height is not None:
            export_kwargs['height'] = self.figure_export_height
        #self.set_export_status('figure', 'exporting...')
        try:
            doc = row(self.main_figure, self.colorbar_figure)
            from bokeh.io import export
            if output_file.endswith('.png'):
                export.export_png(doc, filename=output_file, **export_kwargs)
            elif output_file.endswith('.svg'):
                export.export_svg(doc, filename=output_file, **export_kwargs)
            else:
                raise NotImplementedError("format '{}' not supported".format(
                    os.path.splitext(output_file)[1]))
            self.set_export_status('figure', 'done')
        except (KeyboardInterrupt, SystemExit):
            raise
        except:
            traceback.print_exc()
            self.set_export_status('figure', 'failed')
Ejemplo n.º 2
0
 def save(self,
          format='html',
          file_name='Parallel-Coordinates',
          path=None) -> None:
     """
     This function allows to save the plot in a specific format.
     :param format: The format that we want for our file
     :param file_name: The corresponding name of the file
     :param path: The path where we want to store the plot, as default we assign the actual directory and
     create new one in there; 'results'
     :return:
     """
     if path is None:
         path = self.my_path()
     valid_format = ['html', 'png', 'svg', 'all']
     if format not in valid_format:
         raise Exception('The format is incorrect')
     if format == 'html' or format == 'all':
         file_name = self.file_name_with_ext_and_path(
             file_name, 'html', path)
         save(self.parallel_plot, filename=file_name)
     if format == 'png' or format == 'all':
         file_name = self.file_name_with_ext_and_path(
             file_name, 'png', path)
         be.export_png(self.parallel_plot, filename=file_name)
     if format == 'svg' or format == 'all':
         file_name = self.file_name_with_ext_and_path(
             file_name, 'svg', path)
         self.parallel_plot.output_backend = "svg"
         be.export_svgs(self.parallel_plot, filename=file_name)
Ejemplo n.º 3
0
    def export_figure(self, output_file):
        """
        Requires a working *selenium* driver (https://www.selenium.dev/selenium/docs/api/py/).

        Related attributes are `selenium_webdriver`, `figure_export_width` and `figure_export_height`.
        """
        export_kwargs = {}
        if self.selenium_webdriver is not None:
            try:
                from importlib import import_module

                options = import_module(
                    self.selenium_webdriver.__module__[:-9] + "options")
                options = options.Options()
                options.headless = True
                webdriver = self.selenium_webdriver(options=options)
            except (ImportError, AttributeError):
                import selenium

                if self.selenium_webdriver in (
                        selenium.webdriver.Safari,
                        selenium.webdriver.Edge,
                ):
                    pass
                else:
                    import warnings, traceback

                    warnings.warn(
                        "could not access the webdriver"
                        "s options:\n" + traceback.format_exc(),
                        ImportWarning,
                    )
                webdriver = self.selenium_driver()
            export_kwargs["webdriver"] = webdriver
        if self.figure_export_width is not None:
            export_kwargs["width"] = self.figure_export_width
        if self.figure_export_height is not None:
            export_kwargs["height"] = self.figure_export_height
        # self.set_export_status('figure', 'exporting...')
        try:
            doc = row(self.main_figure, self.colorbar_figure)
            from bokeh.io import export

            if output_file.endswith(".png"):
                export.export_png(doc, filename=output_file, **export_kwargs)
            elif output_file.endswith(".svg"):
                export.export_svg(doc, filename=output_file, **export_kwargs)
            else:
                raise NotImplementedError("format '{}' not supported".format(
                    os.path.splitext(output_file)[1]))
            self.set_export_status("figure", "done")
        except (KeyboardInterrupt, SystemExit):
            raise
        except:
            traceback.print_exc()
            self.set_export_status("figure", "failed")
Ejemplo n.º 4
0
def draw_2d_chart(idx, x, cluster_output_path, k):
    driver = webdriver.Chrome(os.path.join(BASE_DIR, "chromedriver"),
                              options=opts)
    tsne = TSNE(random_state=42)
    points = tsne.fit_transform(x)
    t_df = pd.DataFrame(points, index=range(len(x)), columns=["x", "y"])
    t_df["cluster_no"] = idx
    colors = [colormap[x] for x in t_df["cluster_no"]]
    t_df["color"] = colors
    plot_data = ColumnDataSource(data=t_df.to_dict(orient="list"))
    p = figure(
        # title='TSNE Twitter BIO Embeddings',
        plot_width=1200,
        plot_height=1200,
        active_scroll="wheel_zoom",
        output_backend="svg",
    )
    p.add_tools(HoverTool(tooltips="@title"))
    p.circle(
        source=plot_data,
        x="x",
        y="y",
        line_alpha=0.9,
        fill_alpha=0.9,
        size=8,
        # size="radius",
        fill_color="color",
        line_color="color",
    )
    p.title.text_font_size = value("16pt")
    p.xaxis.visible = True
    p.yaxis.visible = True
    p.background_fill_color = None
    p.border_fill_color = None
    p.grid.grid_line_color = None
    p.outline_line_color = None
    # tsne_plot.grid.grid_line_color = None
    # tsne_plot.outline_line_color = None
    p.toolbar.logo = None
    p.toolbar_location = None

    export_svg(
        p,
        filename=os.path.join(cluster_output_path, f"cluster2d-{k}.svg"),
        webdriver=driver,
    )
    export_png(
        p,
        filename=os.path.join(cluster_output_path, f"cluster2d-{k}.png"),
        webdriver=driver,
    )
Ejemplo n.º 5
0
def save_png(model, filename):
    """
    Saves a bokeh model to png

    Arguments
    ---------
    model: bokeh.model.Model
      Model to save to png
    filename: str
      Filename to save to
    """
    if not state.webdriver:
        state.webdriver = create_webdriver()

    webdriver = state.webdriver
    export_png(model, filename, webdriver=webdriver)
Ejemplo n.º 6
0
def save_png(model, filename):
    """
    Saves a bokeh model to png

    Arguments
    ---------
    model: bokeh.model.Model
      Model to save to png
    filename: str
      Filename to save to
    """
    from bokeh.io.webdriver import webdriver_control
    if not state.webdriver:
        state.webdriver = webdriver_control.create()

    webdriver = state.webdriver
    export_png(model, filename=filename, webdriver=webdriver)
Ejemplo n.º 7
0
def save_png(model, filename, template=None, template_variables=None):
    """
    Saves a bokeh model to png

    Arguments
    ---------
    model: bokeh.model.Model
      Model to save to png
    filename: str
      Filename to save to
    template:
      template file, as used by bokeh.file_html. If None will use bokeh defaults
    template_variables:
      template_variables file dict, as used by bokeh.file_html
    """
    from bokeh.io.webdriver import webdriver_control
    if not state.webdriver:
        state.webdriver = webdriver_control.create()

    webdriver = state.webdriver

    try:
        if template:

            def get_layout_html(obj, resources, width, height):
                return file_html(obj,
                                 resources,
                                 title="",
                                 template=template,
                                 template_variables=template_variables,
                                 suppress_callback_warning=True,
                                 _always_new=True)

            old_layout_fn = bokeh.io.export.get_layout_html
            bokeh.io.export.get_layout_html = get_layout_html
        export_png(model, filename=filename, webdriver=webdriver)
    except Exception:
        raise
    finally:
        if template:
            bokeh.io.export.get_layout_html = old_layout_fn
Ejemplo n.º 8
0
def show_manhattan_plot(df,
                        group_by,
                        x_axis,
                        y_axis,
                        title='Manhattan Plot',
                        save_to=None):
    chroms = df[group_by].unique().to_array()

    plot_width = len(chroms) * 50

    manhattan_fig = figure(title=title)
    manhattan_fig.xaxis.axis_label = 'Chromosomes'
    manhattan_fig.yaxis.axis_label = '-log10(p)'

    manhattan_fig.xaxis.ticker = FixedTicker(ticks=[t for t in chroms])

    start_position = 0.5
    for chrom in chroms:
        query = '%s == %s' % (group_by, chrom)
        cdf = df.query(query)

        x_array = cupy.fromDlpack(cdf[x_axis].to_dlpack()) + start_position
        y_array = -cupy.log10(cupy.fromDlpack(cdf[y_axis].to_dlpack()))

        manhattan_fig.circle(x_array.get(),
                             y_array.get(),
                             size=2,
                             color='orange' if
                             (start_position - 0.5) % 2 == 0 else 'gray',
                             alpha=0.5)

        start_position += 1

    if save_to:
        export_png(manhattan_fig, filename=save_to)
    else:
        manhattan_handle = show(manhattan_fig, notebook_handle=True)
        push_notebook(handle=manhattan_handle)

    return manhattan_fig
Ejemplo n.º 9
0
def show_qq_plot(df,
                 x_axis,
                 y_axis,
                 title="QQ",
                 save_to=None,
                 x_max=None,
                 y_max=None):

    x_values = cupy.fromDlpack(df[x_axis].to_dlpack())
    y_values = cupy.fromDlpack(df[y_axis].to_dlpack())

    x_values = -cupy.log10(x_values)
    y_values = -cupy.log10(y_values)

    if x_max is None:
        x_max = cupy.max(x_values).tolist()
    if y_max is None:
        y_max = cupy.max(y_values).tolist()

    if y_max == cupy.inf:
        print("Please pass y_max. Input contains inf.")
        return
    if x_max == cupy.inf:
        print("Please pass x_max. Input contains inf.")
        return

    qq_fig = figure(x_range=(0, x_max), y_range=(0, y_max), title=title)
    qq_fig.circle(x_values.get(), y_values.get(), size=1)
    qq_fig.line([0, x_max], [0, y_max], line_color='orange', line_width=2)

    if save_to:
        export_png(qq_fig, filename=save_to)
    else:
        qq_handle = show(qq_fig, notebook_handle=True)
        push_notebook(handle=qq_handle)

    return qq_fig
Ejemplo n.º 10
0
def Violin_SHM_Plot(clone_df, png = None, title = "", vshm_col = "V_SHM", jshm_col = "J_SHM", split_col = None,
					quads = True, violin_width = 0.8, line_width = 0.4, figsize = (1000, 600), hover_tooltip = True):
	"""Creates a SHM violin plot that can be used to compare multiple categories in a Repertoire.

	Parameters
	----------
	clone_df: pandas DataFrame

	Returns
	----------
	script: str
	div: str
	"""

	figure_params = {
		"plot_width": figsize[0],
		"plot_height": figsize[1],
		"y_range": Range1d(-0.005, 0.3, bounds = (-0.01, 0.31)),
		"title": title,
		"tools": "pan, wheel_zoom, box_zoom, save, reset, help",
		"active_scroll": "wheel_zoom",
		"toolbar_location": "right"
	}

	plot = figure(**figure_params)
	plot.grid.visible = False
	plot.xaxis.minor_tick_line_color = None
	plot.xaxis.major_label_text_font_size = "10pt"
	plot.yaxis.axis_label = "V/J Gene SHM"
	plot.yaxis.major_label_text_font_size = "10pt"
	plot.yaxis.formatter = NumeralTickFormatter(format = "0.00%")

	if hover_tooltip:
		hover_tooltips = [("Mean SHM", "@mean{(0.00%)}"), ("Max SHM", "@max{(0.00%)}"),
						  ("25th Percentile", "@quantile25{(0.00%)}"), ("75th Percentile", "@quantile75{(0.00%)}")]
		hover_tool = HoverTool(point_policy = "follow_mouse", tooltips = hover_tooltips)
		plot.add_tools(hover_tool)

	shm_cols = []
	if vshm_col is not None:
		shm_cols.append(vshm_col)
	if jshm_col is not None:
		shm_cols.append(jshm_col)

	#To compare samples, add the sample column to split on to the DataFrame
	if split_col is not None:
		shm_cols.append(split_col)
		shm_df = clone_df[shm_cols]

		samples = []
		shm_dfs = []
		for sample, df in shm_df.groupby([split_col]):
			samples.append(sample)
			shm_dfs.append(df)

	else:
		samples = ["Repertoire"]
		shm_dfs = [clone_df[shm_cols]]

	vshm_violin_color = "lightgreen"
	jshm_violin_color = "slateblue"
	violin_xs = []
	violin_ys = []
	violin_colors = []
	violin_legends = []
	hover_means = []
	hover_maxes = []
	hover_25quantiles = []
	hover_75quantiles = []
	x_location_to_category = {}
	violin_x_offset = 0

	for sample, df in zip(samples, shm_dfs):
		#Create the density functions
		if vshm_col in df.columns:
			vshm_mean = df[vshm_col].mean()
			vshm_max = df[vshm_col].max()
			hover_means.append([vshm_mean])
			hover_maxes.append([vshm_max])
			hover_25quantiles.append([df[vshm_col].quantile(0.25)])
			hover_75quantiles.append([df[vshm_col].quantile(0.75)])

			y_points = numpy.linspace(0.0, vshm_max, 300)  #Create the y range of 300 points from min to max
			reversed_y_points = numpy.flipud(y_points)
			v_kernel = gaussian_kde(df[vshm_col], "scott")
			vshm_x_points = v_kernel(y_points)

			#Normalize the x range to standard width; negate V SHM points to place it on the left half of the violin
			vshm_x_points = -vshm_x_points / vshm_x_points.max() * violin_width / 2.0

			#Return to the patch starting points if a different violin is drawn for the other half, or mirror data
			if jshm_col in df.columns:
				vshm_x_points = numpy.append(vshm_x_points, abs(vshm_x_points).min())
				vshm_y_points = numpy.append(y_points, y_points.min())
			else:
				reversed_vshm_x = numpy.flipud(-vshm_x_points)
				vshm_x_points = numpy.append(vshm_x_points, reversed_vshm_x)
				vshm_y_points = numpy.append(y_points, reversed_y_points)

			violin_xs.append(vshm_x_points + violin_x_offset)
			violin_ys.append(vshm_y_points)
			violin_colors.append(vshm_violin_color)
			violin_legends.append("V Gene SHM")

		if jshm_col in df.columns:
			jshm_mean = df[jshm_col].mean()
			jshm_max = df[jshm_col].max()
			hover_means.append([jshm_mean])
			hover_maxes.append([jshm_max])
			hover_25quantiles.append([df[jshm_col].quantile(0.25)])
			hover_75quantiles.append([df[jshm_col].quantile(0.75)])

			y_points = numpy.linspace(0.0, jshm_max, 300)  #Create the y range of 300 points from min to max
			reversed_y_points = numpy.flipud(y_points)
			j_kernel = gaussian_kde(df[jshm_col], "scott")
			jshm_x_points = j_kernel(y_points)

			#Normalize the x range to standard width
			jshm_x_points = jshm_x_points / jshm_x_points.max() * violin_width / 2.0

			#Return to the patch starting points if a different violin is drawn for the other half, or mirror data
			if vshm_col in df.columns:
				jshm_x_points = numpy.append(jshm_x_points, abs(jshm_x_points).min())
				jshm_y_points = numpy.append(y_points, y_points.min())
			else:
				reversed_jshm_x = numpy.flipud(-jshm_x_points)
				jshm_x_points = numpy.append(jshm_x_points, reversed_jshm_x)
				jshm_y_points = numpy.append(y_points, reversed_y_points)

			violin_xs.append(jshm_x_points + violin_x_offset)
			violin_ys.append(jshm_y_points)
			violin_colors.append(jshm_violin_color)
			violin_legends.append("J Gene SHM")

		if quads:
			pass

		x_location_to_category[violin_x_offset] = sample
		violin_x_offset += violin_width * 1.2

	violin_data = {
		"xs": violin_xs,
		"ys": violin_ys,
		"fill_color": violin_colors,
		"legend": violin_legends,
		"mean": hover_means,
		"max": hover_maxes,
		"quantile25": hover_25quantiles,
		"quantile75": hover_75quantiles
	}
	violin_source = ColumnDataSource(violin_data)

	plot.patches(xs = "xs", ys = "ys", fill_color = "fill_color", line_color = "black", line_width = line_width,
				 legend = "legend", source = violin_source)

	#Replace / remap the X axis tickers to the categorical samples
	plot.xaxis.ticker = FixedTicker(ticks = [loc for loc in x_location_to_category])
	plot.xaxis.major_label_overrides = x_location_to_category
	plot.x_range.bounds = (min(x_location_to_category.keys()) - 1, max(x_location_to_category.keys()) + 1)

	if png is not None:
		export_png(plot, png)

	return plot
Ejemplo n.º 11
0
def Rarefaction_Plot(align_df, png = None, title = "", cdr_col = "CDR3_AA", split_col = None, cdr_identity = 0.96,
					 steps = 50, reads = None, figsize = (800, 600), hover_tooltip = True, save_to_file = False):
	figure_params = {
		"plot_width": figsize[0],
		"plot_height": figsize[1],
		"title": title,
		"tools": "save, help",
		"toolbar_location": "right"
	}
	plot = figure(**figure_params)
	plot.xgrid.grid_line_color = None
	plot.xaxis.axis_label = "Total Sampled Reads"
	plot.yaxis.axis_label = "Total Clonotypes"
	plot.axis.formatter = NumeralTickFormatter(format = "0")

	tooltips = [("Total Sampled Reads", "@xs"), ("Total Clones", "@ys")]
	if hover_tooltip:
		hover_tool = HoverTool(point_policy = "snap_to_data", tooltips = tooltips, mode = "hline", names = ["rar_line"])
		plot.add_tools(hover_tool)

	#If comparing multiple samples, add the sample column to split on to the DataFrame
	if split_col is not None:
		reads_df = align_df[[cdr_col, split_col]]

		samples = []
		reads_dfs = []
		for sample, df in reads_df.groupby([split_col]):
			samples.append(sample)
			reads_dfs.append(df)

		if hover_tooltip and len(samples) > 1:
			tooltips.append(("Sample", "@sample"))

	else:
		samples = ["Repertoire"]
		reads_dfs = [align_df[[cdr_col]]]

	sample_colors = ["#1EA078", "#DC5A00", "#786EB4", "#E6288C", "#B4D28C", "#A028B4"]
	for sample, df, color in zip(samples, reads_dfs, sample_colors[:len(samples)]):
		total_reads = len(df)
		subsamp_sizes = []
		cur_total = 0

		if reads is not None:
			subsamp_steps = reads
		else:
			subsamp_steps = math.floor(total_reads / steps)

		#Create the list of all read subsample counts to clonotype
		while cur_total < total_reads:
			if cur_total != 0:
				subsamp_sizes.append(cur_total)

			cur_total += subsamp_steps

		subsamp_sizes.append(total_reads)

		subsamp_clones = []
		for n in subsamp_sizes:
			sub_read_df = df.sample(n)
			sub_total_clones = Clonotype_Usearch(sub_read_df[cdr_col], identity = cdr_identity)
			subsamp_clones.append(sub_total_clones)

		rarefaction_data = {
			"reads": subsamp_sizes,
			"clones": subsamp_clones,
			"sample": [sample if len(samples) > 1 else None] * len(subsamp_sizes)
		}
		rar_source = ColumnDataSource(rarefaction_data)

		plot.line(x = "reads", y = "clones", color = color, line_width = 3, source = rar_source,
				  legend = "sample", name = "rar_line")
		plot.scatter(x = "reads", y = "clones", color = color, source = rar_source)

		if save_to_file:
			with open(sample + "_Rarefaction_Data.txt", "w") as rarefaction_text_file:
				rarefaction_text_file.write("Reads\tClones\n")
				for read_count, clone_count in zip(subsamp_sizes, subsamp_clones):
					rarefaction_text_file.write("{0}\t{1}\n".format(read_count, clone_count))

	if png is not None:
		export_png(plot, png)

	return plot
Ejemplo n.º 12
0
def CDR_Length_Histogram_Plot(clone_df, png = None, title = "", cdr_col = "CDR3_AA", split_col = None,
							  quantile_boundries = (0.0001, 0.9999), figsize = (800, 600)):
	figure_params = {
		"plot_width": figsize[0],
		"plot_height": figsize[1],
		"title": title,
		"tools": "pan, wheel_zoom, box_zoom, save, reset, help",
		"active_scroll": "wheel_zoom",
		"toolbar_location": "right"
	}

	plot = figure(**figure_params)
	plot.grid.visible = False
	plot.xaxis.minor_tick_line_color = None
	plot.xaxis.axis_label = "CDR3 Length"
	plot.xaxis.axis_label_text_font_size = "12pt"
	plot.xaxis.major_label_text_font_size = "12pt"
	plot.yaxis.axis_label = "P(x)"
	plot.yaxis.axis_label_text_font_size = "12pt"
	plot.yaxis.major_label_text_font_size = "12pt"

	#To compare samples, add the sample column to split on to the DataFrame
	if split_col is not None:
		cdr3_df = clone_df[[cdr_col, split_col]]

		samples = []
		cdr3_lens = []
		for sample, df in cdr3_df.groupby([split_col]):
			samples.append(sample)
			cdr3_lens.append(df[cdr_col].str.len())

	else:
		samples = ["Repertoire"]
		cdr3_lens = [clone_df[cdr_col].str.len()]

	bin_min = min([cdr_len_series.min() for cdr_len_series in cdr3_lens])
	bin_max = max([cdr_len_series.max() for cdr_len_series in cdr3_lens]) + 1
	bin_range = [i for i in range(bin_min, bin_max)]

	bar_colors = ["#A0C8E6", "#32A032", "#1E78B4", "#B4DC8C"]
	bar_offset = 0.0
	bar_width = 1 / len(samples)

	upper_y = 0.0

	for idx, (sample, cdr_len_series) in enumerate(zip(samples, cdr3_lens)):
		heights, lefts = numpy.histogram(cdr_len_series, density = True, bins = bin_range)

		#Ensure proper Y axis scrolling boundaries are set
		if heights.max() > upper_y:
			upper_y = heights.max()

		#Shift bars if multiple samples are being plotted
		lefts = lefts.astype(float)
		lefts += bar_offset

		bar_lefts = lefts[:-1]
		bar_rights = bar_lefts + bar_width

		plot.quad(top = heights, bottom = 0, left = bar_lefts, right = bar_rights, fill_color = bar_colors[idx],
				  line_color = None, legend = sample)

		bar_offset += bar_width

	plot.y_range.start = -0.001
	plot.y_range.end = upper_y
	plot.y_range.bounds = (-0.05, upper_y * 1.5)

	if quantile_boundries is not None:
		lower_x = clone_df[cdr_col].str.len().quantile(quantile_boundries[0])
		upper_x = clone_df[cdr_col].str.len().quantile(quantile_boundries[1])

		plot.x_range.start = lower_x
		plot.x_range.end = upper_x

	plot.x_range.bounds = (0, bin_max + 4)

	if png is not None:
		export_png(plot, png)

	return plot
Ejemplo n.º 13
0
def VJ_Gene_Plot(clone_df,
                 png=None,
                 title="",
                 vgene_col="VGene",
                 jgene_col="JGene",
                 count_col="Clustered",
                 vgene_colors=vgene_colors,
                 vfamily_colors=vfamily_colors,
                 jgene_colors=jgene_colors,
                 vj_gap=0.008,
                 vgene_gap=0.0,
                 line_width=0.4,
                 figsize=(800, 800),
                 hover_tooltip=True):
    """Creates a donut (??) chart for prevalence of all V/J gene pairs in a Repertoire.

	Parameters
	----------
	clone_df: pandas DataFrame

	Returns
	----------
	script: str
	div: str
	"""

    figure_params = {
        "plot_width": figsize[0],
        "plot_height": figsize[1],
        #"sizing_mode": "scale_both",
        "x_range": Range1d(-0.5, 1.5, bounds=(-1.5, 2.5)),
        "y_range": Range1d(-0.5, 1.5, bounds=(-1.5, 2.5)),
        #"outline_line_alpha": 0.0,
        "title": title,
        "tools": "pan, wheel_zoom, box_zoom, tap, save, reset, help",
        "active_scroll": "wheel_zoom",
        "toolbar_location": "right"
    }

    plot = figure(**figure_params)
    plot.grid.visible = False
    plot.axis.visible = False

    if hover_tooltip:
        hover_tool = HoverTool(tooltips=[("Gene", "@legend"),
                                         ("Percent", "@percent{(0.00%)}")],
                               point_policy="snap_to_data")
        plot.add_tools(hover_tool)

    gene_df = clone_df[[vgene_col, jgene_col,
                        count_col]].groupby([vgene_col,
                                             jgene_col]).agg({count_col: sum})
    #Sort by V gene ascending, then J gene ascending
    gene_df = gene_df.sort_index()
    gene_df = gene_df.reset_index()

    total_vgenes = len(gene_df[vgene_col].drop_duplicates())
    total_gapsize = total_vgenes * vgene_gap
    remaining_size = 360.0 - float(total_gapsize)
    gap_size = float(vgene_gap)

    total_counts = gene_df[count_col].sum()
    gene_df["Arc_Length"] = gene_df[count_col] / total_counts * remaining_size
    #Starting at 90 degrees (top center of the circle) plus half the gap size
    #cur_v_start = -90.0 + (gap_size / 2.0)
    cur_v_start = 90.0 + (gap_size / 2.0)

    v_start_angles = []
    v_end_angles = []
    vgene_facecolors = []
    vgene_hover_colors = []
    vfamily_facecolors = []
    vfamily_hover_colors = []
    v_legend_text = []
    v_legend_percent = []

    j_start_angles = []
    j_end_angles = []
    jgene_facecolors = []
    jgene_hover_colors = []
    j_legend_text = []
    j_legend_percent = []

    for vgene in gene_df[vgene_col].drop_duplicates():
        cur_vgene_df = gene_df[gene_df[vgene_col] == vgene]
        vfamily = vgene.split("-")[0]

        vgene_color = vgene_colors[vgene]
        vgene_hover_color = vgene_color.darken(0.05)
        vfamily_color = vfamily_colors[vfamily]
        vfamily_hover_color = vfamily_color.darken(0.05)

        v_arc_length = cur_vgene_df["Arc_Length"].sum()
        cur_v_end = cur_v_start + v_arc_length

        v_start_angles.append(cur_v_start)
        v_end_angles.append(cur_v_end)

        vgene_facecolors.append(vgene_color)
        vgene_hover_colors.append(vgene_hover_color)
        vfamily_facecolors.append(vfamily_color)
        vfamily_hover_colors.append(vfamily_hover_color)

        v_legend_text.append(vgene)
        cur_vgene_counts = cur_vgene_df["Clustered"].sum()
        v_legend_percent.append(cur_vgene_counts / total_counts)

        cur_j_start = cur_v_start
        for jgene, jgene_arc_length in zip(cur_vgene_df[jgene_col],
                                           cur_vgene_df["Arc_Length"]):
            cur_j_end = cur_j_start + jgene_arc_length

            jgene_color = jgene_colors[jgene]
            jgene_hover_color = jgene_color.darken(0.05)

            j_start_angles.append(cur_j_start)
            j_end_angles.append(cur_j_end)

            jgene_facecolors.append(jgene_color)
            jgene_hover_colors.append(jgene_hover_color)

            cur_j_start = cur_j_end

            j_legend_text.append(jgene)
            cur_jgene_counts = cur_vgene_df[cur_vgene_df[jgene_col] ==
                                            jgene]["Clustered"].sum()
            j_legend_percent.append(cur_jgene_counts / cur_vgene_counts)

        cur_v_start = cur_v_end + gap_size

    v_wedge_data = {
        "start_angle": v_start_angles,
        "end_angle": v_end_angles,
        "fill_color": vgene_facecolors,
        "legend": v_legend_text,
        "percent": v_legend_percent,
        "vgene_facecolors": vgene_facecolors,
        "vfamily_facecolors": vfamily_facecolors,
        "hover_fill_color": vgene_hover_colors,
        "vgene_hover_colors": vgene_hover_colors,
        "vfamily_hover_colors": vfamily_hover_colors
    }
    v_source = ColumnDataSource(v_wedge_data)

    v_inner_rad = 0.4
    v_outer_rad = 0.692

    plot.annular_wedge(x=0.5,
                       y=0.5,
                       start_angle="start_angle",
                       end_angle="end_angle",
                       fill_color="fill_color",
                       selection_fill_color="fill_color",
                       nonselection_fill_color="fill_color",
                       selection_fill_alpha=1.0,
                       nonselection_fill_alpha=0.2,
                       hover_fill_color="hover_fill_color",
                       inner_radius=v_inner_rad,
                       outer_radius=v_outer_rad,
                       line_color="black",
                       line_width=line_width,
                       source=v_source,
                       legend="legend",
                       start_angle_units="deg",
                       end_angle_units="deg")

    j_wedge_data = {
        "start_angle": j_start_angles,
        "end_angle": j_end_angles,
        "fill_color": jgene_facecolors,
        "legend": j_legend_text,
        "percent": j_legend_percent,
        "hover_fill_color": jgene_hover_colors
    }

    j_source = ColumnDataSource(j_wedge_data)

    j_inner_rad = v_outer_rad + vj_gap
    j_outer_rad = j_inner_rad + 0.15

    plot.annular_wedge(x=0.5,
                       y=0.5,
                       start_angle="start_angle",
                       end_angle="end_angle",
                       fill_color="fill_color",
                       selection_fill_color="fill_color",
                       nonselection_fill_color="fill_color",
                       selection_fill_alpha=1.0,
                       nonselection_fill_alpha=0.2,
                       hover_fill_color="hover_fill_color",
                       inner_radius=j_inner_rad,
                       outer_radius=j_outer_rad,
                       line_color="black",
                       line_width=line_width,
                       source=j_source,
                       legend="legend",
                       start_angle_units="deg",
                       end_angle_units="deg")

    if png is not None:
        export_png(plot, png)

    change_v_color = CustomJS(args={"source": v_source},
                              code="""
		var selection = cb_obj.value;
		var new_color_array;
		var new_hover_array;
		if(selection.toLowerCase().indexOf("gene") !== -1) {
			new_color_array = source.data["vgene_facecolors"];
			new_hover_array = source.data["vgene_hover_colors"];
		} else {
			new_color_array = source.data["vfamily_facecolors"];
			new_hover_array = source.data["vfamily_hover_colors"];
		}
		var fill_color = source.data["fill_color"];
		var hover_fill_color = source.data["hover_fill_color"];
		for(idx = 0; idx < fill_color.length; idx++) {
			fill_color[idx] = new_color_array[idx];
			hover_fill_color[idx] = new_hover_array[idx];
		}
		source.change.emit();
	""")

    v_data_color_by = Select(title="Color by:",
                             options=["V Gene", "V Family"],
                             value="V Gene",
                             callback=change_v_color)

    plot_layout = column(v_data_color_by, plot)
    return plot_layout
Ejemplo n.º 14
0
def write_cross_chart(df, cluster_output_path, k):
    height = 1600
    width = 1600
    driver = webdriver.Chrome(os.path.join(BASE_DIR, "chromedriver"),
                              options=opts)
    x = df.c3.to_list()
    y = df.c1.to_list()
    clusters = df.cluster.to_list()
    plot = figure(
        # title='TSNE Twitter BIO Embeddings',
        plot_width=width,
        plot_height=height,
        active_scroll="wheel_zoom",
        # x_range=r,
        # y_range=r,
        output_backend="svg",
    )
    plot.add_tools(HoverTool(tooltips="@title"))
    new_x = []
    new_y = []
    for coord in zip(x, y):
        x_coord, y_coord = coord
        x_rand = random.uniform(-(0.5**0.5), 0.5**0.5)
        y_rand_range = (0.5 - x_rand**2)**0.5
        y_rand = random.uniform(-y_rand_range, y_rand_range)
        new_x.append(x_coord + x_rand)
        new_y.append(y_coord + y_rand)
    colors = [colormap[clusters[i]] for i in range(len(new_y))]
    source = ColumnDataSource(data={"x": new_x, "y": new_y, "color": colors})
    plot.scatter(
        source=source,
        x="x",
        y="y",
        line_alpha=0.6,
        fill_alpha=0.6,
        size=10,
        color="color",
    )

    # size
    # count_map = defaultdict(int)
    # for coord in zip(x, y):
    #     count_map[coord] += 1 * 0.5
    # source = ColumnDataSource(
    #     data={
    #         "x": [k[0] for k in count_map.keys()],
    #         "y": [k[1] for k in count_map.keys()],
    #         "size": list(count_map.values()),
    #     }
    # )
    # plot.scatter(
    #     source=source,
    #     x="x",
    #     y="y",
    #     line_alpha=0.6,
    #     fill_alpha=0.6,
    #     size="size",
    # )

    plot.yaxis.axis_label_text_font_size = "25pt"
    plot.yaxis.major_label_text_font_size = "25pt"
    plot.xaxis.axis_label_text_font_size = "25pt"
    plot.xaxis.major_label_text_font_size = "25pt"
    plot.title.text_font_size = value("32pt")
    plot.xaxis.visible = True
    # plot.xaxis.bounds = (0, 0)
    plot.yaxis.visible = True
    label_opts1 = dict(
        x_offset=0,
        y_offset=750,
        text_font_size="30px",
    )
    msg1 = "C1"
    caption1 = Label(text=msg1, **label_opts1)
    label_opts2 = dict(
        x_offset=0,
        y_offset=-750,
        text_font_size="30px",
    )
    msg2 = "-C1"
    caption2 = Label(text=msg2, **label_opts2)
    label_opts3 = dict(
        x_offset=750,
        y_offset=0,
        text_font_size="30px",
    )
    msg3 = "C3"
    caption3 = Label(text=msg3, **label_opts3)
    label_opts4 = dict(
        x_offset=-750,
        y_offset=0,
        text_font_size="30px",
    )
    msg4 = "-C3"
    caption4 = Label(text=msg4, **label_opts4)
    plot.add_layout(caption1, "center")
    plot.add_layout(caption2, "center")
    plot.add_layout(caption3, "center")
    plot.add_layout(caption4, "center")
    plot.background_fill_color = None
    plot.border_fill_color = None
    plot.grid.grid_line_color = None
    plot.outline_line_color = None
    plot.yaxis.fixed_location = 0
    plot.xaxis.fixed_location = 0
    plot.toolbar.logo = None
    plot.toolbar_location = None
    export_svg(
        plot,
        filename=os.path.join(cluster_output_path, f"cross-{k}.svg"),
        webdriver=driver,
        height=height,
        width=width,
    )
    export_png(
        plot,
        filename=os.path.join(cluster_output_path, f"cross-{k}.png"),
        webdriver=driver,
        height=height,
        width=width,
    )
Ejemplo n.º 15
0
            f.legend.label_text_font_size = font_size
            f.legend.glyph_width = 100
            f.legend.glyph_height = 40
            f.legend.spacing = 20

        for f in [f_pme_vs_ni, f_pse_vs_ni]:
            f.legend.location = 'bottom_left'

        f_kl_vs_ni.legend.location = 'center_left'
        f_pe_vs_ni.legend.location = 'center_left'

    if dnms[k] == 'delays10k':
        f_pme_vs_ni.yaxis.ticker = FixedTicker(ticks=[0.1, 0.4, 0.7])
    elif dnms[k] == 'airfoil':
        f_pe_vs_ni.yaxis.ticker = FixedTicker(ticks=[0.3, 0.6, 0.9])
        f_pse_vs_ni.yaxis.ticker = FixedTicker(ticks=[0.1, 0.5, 1])

    #bkp.show(bkl.gridplot([[f_pe_vs_ni, f_pme_vs_ni, f_pse_vs_ni], [f_pe_vs_cput, f_pme_vs_cput, f_pse_vs_cput]]))
    #figs = [f_kl_vs_ni, f_pe_vs_ni, f_pme_vs_ni, f_pme_best_vs_ni, f_pme_obj_vs_ni, f_pse_vs_ni, f_pse_best_vs_ni, f_pse_obj_vs_ni, hypchg_vs_ni]
    figs = [
        f_kl_vs_ni, f_pe_vs_ni, f_pme_vs_ni, f_pme_obj_vs_ni, f_pse_vs_ni,
        f_pse_obj_vs_ni
    ]
    if pngs:
        for i, f in enumerate(figs):
            export_png(f, 'figures/%s%d.png' % (dnms[k], i + 1))
    else:
        bkp.output_file('figures/' + dnms[k] + '.html')
        bki.save(bkl.gridplot([figs]))
        #bkp.show(bkl.gridplot([figs]))
Ejemplo n.º 16
0
def draw_vectors():
    driver = webdriver.Chrome(os.path.join(BASE_DIR, "chromedriver"),
                              options=options)

    df = pd.read_csv(os.path.join(OUTPUTS_DIR,
                                  "normalized_future_vectors.csv"))
    filter_list = {
        (5, 6),
        (5, 7),
        (6, 7),
        (8, 9),
        (8, 10),
        (9, 10),
        (11, 12),
        (11, 13),
        (12, 13),
    }
    comb = list(combinations(range(5, len(df.columns)), 2))
    comb = [c for c in comb if c not in filter_list]
    for idx, coord in enumerate(comb, 1):
        x, y = coord
        X = df[df.columns[x]].to_list()
        Y = df[df.columns[y]].to_list()
        tsne_df = pd.DataFrame(zip(X, Y),
                               index=range(len(X)),
                               columns=["x_coord", "y_coord"])
        tsne_df["title"] = df["title"].to_list()
        tsne_df["cluster_no"] = df["cluster"].to_list()
        colormap = {3: "#ffee33", 2: "#00a152", 1: "#2979ff", 0: "#d500f9"}
        # colormap = {3: "#bdbdbd", 2: "#bdbdbd", 1: "#bdbdbd", 0: "#d500f9"}
        # colormap = {3: "#bdbdbd", 2: "#bdbdbd", 1: "#2979ff", 0: "#bdbdbd"}
        # colormap = {3: "#bdbdbd", 2: "#00a152", 1: "#bdbdbd", 0: "#bdbdbd"}
        # colormap = {3: "#ffee33", 2: "#bdbdbd", 1: "#bdbdbd", 0: "#bdbdbd"}
        only_one_cluster = pd.DataFrame(tsne_df.loc[tsne_df.cluster_no == 3])
        colors = [colormap[x] for x in only_one_cluster["cluster_no"]]

        only_one_cluster["color"] = colors
        plot_data = ColumnDataSource(data=only_one_cluster.to_dict(
            orient="list"))
        plot = figure(
            # title='TSNE Twitter BIO Embeddings',
            plot_width=1600,
            plot_height=1600,
            active_scroll="wheel_zoom",
            output_backend="svg",
            x_range=(-1.1, 1.1),
            y_range=(-1.1, 1.1),
        )
        plot.add_tools(HoverTool(tooltips="@title"))
        plot.circle(
            source=plot_data,
            x="x_coord",
            y="y_coord",
            line_alpha=0.6,
            fill_alpha=0.6,
            size=20,
            fill_color="color",
            line_color="color",
        )
        plot.yaxis.axis_label_text_font_size = "25pt"
        plot.yaxis.major_label_text_font_size = "25pt"
        plot.xaxis.axis_label_text_font_size = "25pt"
        plot.xaxis.major_label_text_font_size = "25pt"
        start_x, end_x = df.columns[x].split("|")
        start_y, end_y = df.columns[y].split("|")
        start_x = start_x.strip()
        end_x = end_x.strip()
        start_y = start_y.strip()
        end_y = end_y.strip()
        plot.title.text_font_size = value("32pt")
        plot.xaxis.visible = True
        # plot.xaxis.bounds = (0, 0)
        plot.yaxis.visible = True
        label_opts1 = dict(
            x_offset=0,
            y_offset=750,
            text_font_size="30px",
        )
        msg1 = end_y
        caption1 = Label(text=msg1, **label_opts1)
        label_opts2 = dict(
            x_offset=0,
            y_offset=-750,
            text_font_size="30px",
        )
        msg2 = start_y
        caption2 = Label(text=msg2, **label_opts2)
        label_opts3 = dict(
            x_offset=600,
            y_offset=0,
            text_font_size="30px",
        )
        msg3 = end_x
        caption3 = Label(text=msg3, **label_opts3)
        label_opts4 = dict(
            x_offset=-750,
            y_offset=0,
            text_font_size="30px",
        )
        msg4 = start_x
        caption4 = Label(text=msg4, **label_opts4)
        plot.add_layout(caption1, "center")
        plot.add_layout(caption2, "center")
        plot.add_layout(caption3, "center")
        plot.add_layout(caption4, "center")
        plot.background_fill_color = None
        plot.border_fill_color = None
        plot.grid.grid_line_color = None
        plot.outline_line_color = None
        plot.yaxis.fixed_location = 0
        plot.xaxis.fixed_location = 0
        plot.toolbar.logo = None
        plot.toolbar_location = None
        print(idx)
        export_svg(
            plot,
            filename=f"svgs/{idx}.svg",
            webdriver=driver,
            height=1600,
            width=1600,
        )
        export_png(
            plot,
            filename=f"pngs/{idx}.png",
            webdriver=driver,
            height=1600,
            width=1600,
        )
Ejemplo n.º 17
0
def Mosaic_Plot(clone_df,
                png=None,
                title="",
                top_clones=5000,
                count_col="Clustered",
                vgene_col="VGene",
                jgene_col="JGene",
                isotype_col="Isotype",
                vshm_col="V_SHM",
                jshm_col="J_SHM",
                vgene_colors=vgene_colors,
                vfamily_colors=vfamily_colors,
                jgene_colors=jgene_colors,
                isotype_colors=isotype_colors,
                line_width=0.3,
                figsize=(600, 600),
                hover_tooltip=True):
    figure_params = {
        "plot_width": figsize[0],
        "plot_height": figsize[1],
        #"sizing_mode": "scale_both",
        "x_range": Range1d(-0.1, 1.1, bounds=(-1.0, 2.0)),
        "y_range": Range1d(-0.1, 1.1, bounds=(-1.0, 2.0)),
        #"outline_line_alpha": 0.0,
        "title": title,
        "tools": "pan, wheel_zoom, box_zoom, save, reset, help",
        "active_scroll": "wheel_zoom",
        "toolbar_location": "right"
    }

    plot = figure(**figure_params)
    plot.grid.visible = False
    plot.axis.visible = False

    hover_tooltips = [("Clone ID", "@CloneID")]

    info_cols = [count_col]
    if vgene_col is not None:
        info_cols.append(vgene_col)
        hover_tooltips.append(("V Gene", "@" + vgene_col))
    if jgene_col is not None:
        info_cols.append(jgene_col)
        hover_tooltips.append(("J Gene", "@" + jgene_col))
    if isotype_col is not None:
        info_cols.append(isotype_col)
        hover_tooltips.append(("Isotype", "@" + isotype_col))
    if vshm_col is not None:
        info_cols.append(vshm_col)
        hover_tooltips.append(("V Gene SHM", "@" + vshm_col + "{(0.00%)}"))
    if jshm_col is not None:
        info_cols.append(jshm_col)
        hover_tooltips.append(("J Gene SHM", "@" + jshm_col + "{(0.00%)}"))

    if hover_tooltip:
        hover_tool = HoverTool(point_policy="snap_to_data",
                               tooltips=hover_tooltips)
        plot.add_tools(hover_tool)

    mosaic_df = clone_df[info_cols]
    mosaic_df = mosaic_df.sort_values([count_col], ascending=[False])

    if top_clones:
        mosaic_df = mosaic_df.head(top_clones)

    total_area = float(mosaic_df[count_col].sum())
    mosaic_df["Clone_Frequencies"] = mosaic_df[count_col].astype(
        float) / total_area

    hover_tooltips.append(("Clone Frequency", "@Clone_Frequencies{(0.00%)}"))

    mosaic_rects = squarify(mosaic_df["Clone_Frequencies"].tolist(), 0.0, 0.0,
                            1.0, 1.0)
    #Add half width/height to x/y position for center points
    mosaic_df["x"] = [rect["x"] + rect["dx"] / 2.0 for rect in mosaic_rects]
    mosaic_df["y"] = [rect["y"] + rect["dy"] / 2.0 for rect in mosaic_rects]
    mosaic_df["width"] = [rect["dx"] for rect in mosaic_rects]
    mosaic_df["height"] = [rect["dy"] for rect in mosaic_rects]

    #By default there is no legend text, since colors are alternating and non-informative
    mosaic_df["legend"] = ""
    mosaic_df["Empty_Legend"] = ""

    alternating_colors = [
        RGB(102, 194, 165),
        RGB(252, 141, 98),
        RGB(141, 160, 203)
    ]
    alt2_color_cycle = cycle(alternating_colors[0:2])
    alt3_color_cycle = cycle(alternating_colors)
    mosaic_df["alternating2_colors"] = [
        next(alt2_color_cycle) for _ in mosaic_rects
    ]
    mosaic_df["alternating3_colors"] = [
        next(alt3_color_cycle) for _ in mosaic_rects
    ]
    #Default color scheme is alternating 3 colors
    mosaic_df["fill_color"] = mosaic_df["alternating3_colors"]

    #Set up various mosaic coloring options and associated legends
    color_select_options = ["Alternating (2)", "Alternating (3)"]
    if vgene_col in mosaic_df.columns:
        mosaic_df["vgene_colors"] = mosaic_df[vgene_col].map(vgene_colors)
        vfamilies = mosaic_df[vgene_col].str.split("-").str[0]
        mosaic_df["vfamily_colors"] = vfamilies.map(vfamily_colors)
        color_select_options.append("V Gene")
        color_select_options.append("V Family")
        mosaic_df["VGene_Legend"] = mosaic_df[vgene_col]
        mosaic_df["VFamily_Legend"] = mosaic_df[vgene_col].str.split(
            "-").str[0]
    if jgene_col in mosaic_df.columns:
        mosaic_df["jgene_colors"] = mosaic_df[jgene_col].map(jgene_colors)
        color_select_options.append("J Gene")
        mosaic_df["JGene_Legend"] = mosaic_df[jgene_col]
    if isotype_col in mosaic_df.columns:
        mosaic_df["isotype_colors"] = mosaic_df[isotype_col].map(
            isotype_colors)
        color_select_options.append("Isotype")
        mosaic_df["Isotype_Legend"] = mosaic_df[isotype_col]

    #Using viridis as a quantitative heatmap color scheme for SHM values
    #The SHM values are binned into 180 groups; viridis in >180 bins uses some values twice, which pandas.cut can't use
    shm_viridis = list(viridis(180))
    colorbar_tick_formatter = NumeralTickFormatter(format="0.00%")

    if vshm_col in mosaic_df.columns:
        vshm_min = mosaic_df[vshm_col].min()
        vshm_max = mosaic_df[vshm_col].max()
        #Use pandas.cut to bin the V gene SHM values into the heatmap colors
        mosaic_df["vshm_colors"] = pandas.cut(mosaic_df[vshm_col],
                                              bins=180,
                                              labels=shm_viridis)
        color_select_options.append("V Gene SHM")

        vshm_color_mapper = LinearColorMapper(palette=shm_viridis,
                                              low=vshm_min,
                                              high=vshm_max)
        vshm_ticks = FixedTicker(ticks=numpy.linspace(vshm_min, vshm_max, 8))
        vshm_colorbar = ColorBar(color_mapper=vshm_color_mapper,
                                 location=(0, 0),
                                 name="vshm_colorbar",
                                 label_standoff=12,
                                 formatter=colorbar_tick_formatter,
                                 ticker=vshm_ticks)
        plot.add_layout(vshm_colorbar, "right")

    if jshm_col in mosaic_df.columns:
        jshm_min = mosaic_df[jshm_col].min()
        jshm_max = mosaic_df[jshm_col].max()
        #Use pandas.cut to bin the J gene SHM values into the heatmap colors
        mosaic_df["jshm_colors"] = pandas.cut(mosaic_df[jshm_col],
                                              bins=180,
                                              labels=shm_viridis)
        color_select_options.append("J Gene SHM")

        jshm_color_mapper = LinearColorMapper(palette=shm_viridis,
                                              low=jshm_min,
                                              high=jshm_max)
        jshm_ticks = FixedTicker(ticks=numpy.linspace(jshm_min, jshm_max, 8))
        jshm_colorbar = ColorBar(color_mapper=jshm_color_mapper,
                                 location=(0, 0),
                                 name="jshm_colorbar",
                                 label_standoff=12,
                                 formatter=colorbar_tick_formatter,
                                 ticker=jshm_ticks)
        plot.add_layout(jshm_colorbar, "right")

    mosaic_source = ColumnDataSource(mosaic_df)

    plot.rect(x="x",
              y="y",
              width="width",
              height="height",
              fill_color="fill_color",
              legend="legend",
              line_color="black",
              line_width=line_width,
              source=mosaic_source)

    #By default, the plot legend and ColorBar should be turned off (since the color is repeating and uninformative)
    plot.legend[0].visible = False
    vshm_colorbar = plot.select("vshm_colorbar")[0]
    jshm_colorbar = plot.select("jshm_colorbar")[0]
    vshm_colorbar.visible = False
    jshm_colorbar.visible = False

    if png is not None:
        export_png(plot, png)

    change_args = {
        "source": mosaic_source,
        "legend_obj": plot.legend[0],
        "vshm_colorbar_obj": vshm_colorbar,
        "jshm_colorbar_obj": jshm_colorbar
    }
    change_rect_color = CustomJS(args=change_args,
                                 code="""
		var selection = cb_obj.value.toLowerCase();
		var new_color_array;
		var new_legend_array;

		if(selection.indexOf("v gene shm") !== -1) {
			new_color_array = source.data["vshm_colors"];
			new_legend_array = source.data["Empty_Legend"];
			legend_obj.visible = false;
			vshm_colorbar_obj.visible = true;
			jshm_colorbar_obj.visible = false;
		} else if(selection.indexOf("j gene shm") !== -1) {
			new_color_array = source.data["jshm_colors"];
			new_legend_array = source.data["Empty_Legend"];
			legend_obj.visible = false;
			vshm_colorbar_obj.visible = false;
			jshm_colorbar_obj.visible = true;
		} else if(selection.indexOf("v gene") !== -1) {
			new_color_array = source.data["vgene_colors"];
			new_legend_array = source.data["VGene_Legend"];
			legend_obj.visible = true;
			vshm_colorbar_obj.visible = false;
			jshm_colorbar_obj.visible = false;
		} else if(selection.indexOf("v family") !== -1) {
			new_color_array = source.data["vfamily_colors"];
			new_legend_array = source.data["VFamily_Legend"];
			legend_obj.visible = true;
			vshm_colorbar_obj.visible = false;
			jshm_colorbar_obj.visible = false;
		} else if(selection.indexOf("j gene") !== -1) {
			new_color_array = source.data["jgene_colors"];
			new_legend_array = source.data["JGene_Legend"];
			legend_obj.visible = true;
			vshm_colorbar_obj.visible = false;
			jshm_colorbar_obj.visible = false;
		} else if(selection.indexOf("isotype") !== -1) {
			new_color_array = source.data["isotype_colors"];
			new_legend_array = source.data["Isotype_Legend"];
			legend_obj.visible = true;
			vshm_colorbar_obj.visible = false;
			jshm_colorbar_obj.visible = false;
		} else if(selection.indexOf("2") !== -1) {
			new_color_array = source.data["alternating2_colors"];
			new_legend_array = source.data["Empty_Legend"];
			legend_obj.visible = false;
			vshm_colorbar_obj.visible = false;
			jshm_colorbar_obj.visible = false;
		} else {
			new_color_array = source.data["alternating3_colors"];
			new_legend_array = source.data["Empty_Legend"];
			legend_obj.visible = false;
			vshm_colorbar_obj.visible = false;
			jshm_colorbar_obj.visible = false;
		}

		var fill_color = source.data["fill_color"];
		var legend = source.data["legend"];
		for(idx = 0; idx < fill_color.length; idx++) {
			fill_color[idx] = new_color_array[idx];
			legend[idx] = new_legend_array[idx];
		}
		source.change.emit();
	""")

    patch_coloring_select = Select(title="Color by:",
                                   options=color_select_options,
                                   value="Alternating (3)",
                                   callback=change_rect_color)

    plot_layout = column(patch_coloring_select, plot)

    return plot_layout
Ejemplo n.º 18
0
def Burtin_VGene_SHM_Plot(clone_df,
                          png=None,
                          title="",
                          vgene_col="VGene",
                          vshm_col="V_SHM",
                          split_col=None,
                          vfamily_colors=vfamily_colors,
                          label_arc=20,
                          figsize=(900, 900)):
    figure_params = {
        "plot_width": figsize[0],
        "plot_height": figsize[1],
        "x_axis_type": None,
        "y_axis_type": None,
        "x_range": Range1d(-45, 45, bounds=(-50, 50)),
        "y_range": Range1d(-45, 45, bounds=(-50, 50)),
        "title": title,
        "tools": "pan, wheel_zoom, box_zoom, save, reset, help",
        "active_scroll": "wheel_zoom",
        "toolbar_location": "right",
        "background_fill_color": RGB(216, 216, 216)
    }

    plot = figure(**figure_params)
    plot.grid.visible = False
    plot.axis.visible = False

    label_offset = 90  #Offset the SHM % labels to the top of the plot
    plot_data_degrees = 360 - label_arc
    initial_angle = label_offset + label_arc / 2
    ending_angle = label_offset + 360 - label_arc / 2
    plot_inner_rad = 10
    plot_outer_rad = 35
    plot_thickness = plot_outer_rad - plot_inner_rad

    df_cols = [vgene_col, vshm_col]
    #If comparing multiple samples, add the sample column to split on to the DataFrame
    if split_col is not None:
        df_cols.append(split_col)

    vgene_shm_df = clone_df[df_cols].sort_values([vgene_col])

    total_vgenes = len(vgene_shm_df[vgene_col].drop_duplicates())
    vgene_arc_degrees = plot_data_degrees / total_vgenes

    #Create and color arc backgrounds by V family
    vgene_family_df = vgene_shm_df[[vgene_col
                                    ]].drop_duplicates().reset_index(drop=True)
    vgene_family_df["VFamily"] = vgene_family_df[vgene_col].str.split(
        "-").str[0]
    vgene_family_df["fill_color"] = vgene_family_df["VFamily"].map(
        vfamily_colors)
    vfamily_arc_length = plot_data_degrees / total_vgenes
    vgene_family_df[
        "start_angle"] = vgene_family_df.index * vfamily_arc_length + initial_angle
    vgene_family_df[
        "end_angle"] = vgene_family_df["start_angle"] + vfamily_arc_length

    vfamily_source = ColumnDataSource(vgene_family_df)
    plot.annular_wedge(x=0,
                       y=0,
                       start_angle="start_angle",
                       end_angle="end_angle",
                       fill_color="fill_color",
                       inner_radius=plot_inner_rad,
                       outer_radius=plot_outer_rad,
                       line_color=None,
                       source=vfamily_source,
                       start_angle_units="deg",
                       end_angle_units="deg")

    if split_col in vgene_shm_df:
        vgene_shm_dfs = [
            sample_df_tup
            for sample_df_tup in vgene_shm_df.groupby([split_col])
        ]

        samples = []
        grouped_vgene_shm_dfs = []
        for sample, df in vgene_shm_dfs:
            samples.append(sample)
            grouped_vgene_shm_dfs.append(
                df.groupby([vgene_col])[vshm_col].agg({"mean"}))

        #Add the V genes that may be present in one sample but not in the current one
        all_vgenes = vgene_shm_df[vgene_col].drop_duplicates().tolist()
        grouped_vgene_shm_dfs = [
            df.reindex(all_vgenes).reset_index()
            for df in grouped_vgene_shm_dfs
        ]

        vshm_min = min([df["mean"].min() for df in grouped_vgene_shm_dfs])
        vshm_max = max([df["mean"].max() for df in grouped_vgene_shm_dfs])

    else:
        grouped_vgene_shm_df = vgene_shm_df.groupby([vgene_col])[vshm_col].agg(
            {"mean"}).reset_index()
        grouped_vgene_shm_df = grouped_vgene_shm_df.sort_values(
            [vgene_col]).reset_index(drop=True)

        vshm_min = grouped_vgene_shm_df["mean"].min()
        vshm_max = grouped_vgene_shm_df["mean"].max()

        samples = ["All"]
        grouped_vgene_shm_dfs = [grouped_vgene_shm_df]

    #Create the labels and radial axis lines for the SHM data
    shm_labels = [
        "{0:.1%}".format(shm) for shm in numpy.linspace(vshm_min, vshm_max, 7)
    ]
    shm_label_radii = numpy.linspace(plot_inner_rad, plot_outer_rad, 7)
    plot.circle(x=0,
                y=0,
                radius=shm_label_radii,
                fill_color=None,
                line_color="white")
    plot.text(x=0,
              y=shm_label_radii[1:],
              text=shm_labels[1:],
              text_font_size="10pt",
              text_align="center",
              text_baseline="middle")

    #Create line-width annular wedges to separate V genes
    sep_angles = numpy.linspace(initial_angle, ending_angle, total_vgenes + 1)
    sep_inner_radius = plot_inner_rad - 1
    sep_outer_radius = plot_outer_rad + 1
    plot.annular_wedge(x=0,
                       y=0,
                       start_angle=sep_angles,
                       end_angle=sep_angles,
                       fill_color=None,
                       inner_radius=sep_inner_radius,
                       outer_radius=sep_outer_radius,
                       line_color="black",
                       start_angle_units="deg",
                       end_angle_units="deg")

    #Gene text labels; text angle location is the midpoint of the V gene separation lines
    text_radius = plot_outer_rad + 3.5
    text_radian_locs = numpy.deg2rad((sep_angles[1:] + sep_angles[:-1]) / 2)
    text_x = text_radius * numpy.cos(text_radian_locs)
    text_y = text_radius * numpy.sin(text_radian_locs)
    #Angle the text based on the position around the circle; reverse the left half so the text isn't upside-down
    mid_graph_radian = numpy.deg2rad(label_offset + 180)
    text_angles = [
        rad if rad > mid_graph_radian else rad + numpy.pi
        for rad in text_radian_locs
    ]
    plot.text(x=text_x,
              y=text_y,
              text=vgene_family_df[vgene_col],
              angle=text_angles,
              text_font_size="10pt",
              text_align="center",
              text_baseline="middle")

    #Finally draw the bars and legend for the mean SHM values for all clones of a specific V gene
    total_samples = len(grouped_vgene_shm_dfs)
    vgene_arc_radians = numpy.deg2rad(vgene_arc_degrees)
    bar_width = vgene_arc_radians / (total_samples + 1)
    spacer_width = bar_width / (total_samples + 1)
    sample_colors = (RGB(60, 60, 60), RGB(130, 40, 40), RGB(60, 60, 130),
                     RGB(10, 50, 100), RGB(150, 100, 20))
    sample_label_ys = numpy.linspace(-total_samples, total_samples,
                                     total_samples)
    arc_starts = text_radian_locs - (vgene_arc_radians / 2) + spacer_width

    for sample, cur_df in enumerate(grouped_vgene_shm_dfs):
        bar_start_angles = arc_starts + sample * (bar_width + spacer_width)
        bar_end_angles = bar_start_angles + bar_width
        cur_df["Normalized_SHM"] = cur_df["mean"] / vshm_max

        shm_bars = cur_df["Normalized_SHM"] * plot_thickness + plot_inner_rad
        plot.annular_wedge(x=0,
                           y=0,
                           start_angle=bar_start_angles,
                           end_angle=bar_end_angles,
                           line_color=None,
                           inner_radius=plot_inner_rad,
                           outer_radius=shm_bars,
                           fill_color=sample_colors[sample])

        if total_samples > 1:
            plot.rect(x=-2,
                      y=sample_label_ys[sample],
                      width=2.5,
                      height=1.5,
                      color=sample_colors[sample])
            plot.text(x=0,
                      y=sample_label_ys[sample],
                      text={"value": samples[sample]},
                      text_font_size="10pt",
                      text_baseline="middle")

    if png is not None:
        export_png(plot, png)

    return plot
Ejemplo n.º 19
0
def draw_chart(df):
    driver = webdriver.Chrome(os.path.join(BASE_DIR, "chromedriver"))
    X = df["wv"].to_list()
    y = df["cluster"].to_list()
    tsne_filepath = "tsne3000.pkl"

    if not os.path.exists(tsne_filepath):
        tsne = TSNE(random_state=42)
        tsne_points = tsne.fit_transform(X)
        with open(tsne_filepath, "wb+") as f:
            pickle.dump(tsne_points, f)
    else:  # Cache Hits!
        with open(tsne_filepath, "rb") as f:
            tsne_points = pickle.load(f)
    tsne_df = pd.DataFrame(
        tsne_points, index=range(len(X)), columns=["x_coord", "y_coord"]
    )

    tsne_df["title"] = df["title"].to_list()
    tsne_df["tokens_len"] = df["tokens_len"].to_list()
    tsne_df["cluster_no"] = y
    colormap = {0: "#ffee33", 1: "#00a152", 2: "#2979ff", 3: "#d500f9"}
    colors = [colormap[x] for x in tsne_df["cluster_no"]]
    tsne_df["color"] = colors
    normalized = min_max_normalize(tsne_df.tokens_len.to_list())
    tsne_df["radius"] = [5 + x * 10 for x in normalized]
    print(tsne_df.to_dict(orient="list"))
    plot_data = ColumnDataSource(data=tsne_df.to_dict(orient="list"))
    print(plot_data)
    tsne_plot = figure(
        # title='TSNE Twitter BIO Embeddings',
        plot_width=1200,
        plot_height=1200,
        active_scroll="wheel_zoom",
        output_backend="svg",
    )
    tsne_plot.add_tools(HoverTool(tooltips="@title"))
    tsne_plot.circle(
        source=plot_data,
        x="x_coord",
        y="y_coord",
        line_alpha=0.6,
        fill_alpha=0.6,
        size="radius",
        fill_color="color",
        line_color="color",
    )
    tsne_plot.title.text_font_size = value("16pt")
    tsne_plot.xaxis.visible = True
    tsne_plot.yaxis.visible = True
    tsne_plot.background_fill_color = None
    tsne_plot.border_fill_color = None
    tsne_plot.grid.grid_line_color = None
    tsne_plot.outline_line_color = None
    # tsne_plot.grid.grid_line_color = None
    # tsne_plot.outline_line_color = None
    show(tsne_plot)
    tsne_plot.toolbar.logo = None
    tsne_plot.toolbar_location = None
    export_svg(
        tsne_plot, filename=f"cluster.svg", webdriver=driver,
    )
    export_png(
        tsne_plot, filename=f"cluster.png", webdriver=driver,
    )
Ejemplo n.º 20
0
def calculate_cluster_number():
    driver = webdriver.Chrome(
        os.path.join(BASE_DIR, "chromedriver"), options=opts
    )
    df = pd.read_csv(os.path.join(H_IN_DIRS, "happiness.csv"))
    x = []

    for row in df.iterrows():
        idx, r = row
        temp = []
        for c in df.columns[1:-1]:
            temp.append(r[c])
        x.append(temp)

    ok = 0
    kmax = 10
    maximum = 0
    for k in range(2, kmax + 1):
        kmeans = KMeans(n_clusters=k)
        fit = kmeans.fit(x)
        labels = fit.labels_
        score = silhouette_score(x, labels, metric="euclidean")
        idx = kmeans.fit_predict(x)
        tsne = TSNE(random_state=42)
        points = tsne.fit_transform(x)
        t_df = pd.DataFrame(points, index=range(len(x)), columns=["x", "y"])
        t_df["cluster_no"] = idx
        colormap = {
            0: "#f44336",
            1: "#673ab7",
            2: "#9c27b0",
            3: "#e91e63",
            4: "#3f51b5",
            5: "#2196f3",
            6: "#03a9f4",
            7: "#00bcd4",
            8: "#009688",
            9: "#cddc39",
        }
        colors = [colormap[x] for x in t_df["cluster_no"]]
        t_df["color"] = colors
        plot_data = ColumnDataSource(data=t_df.to_dict(orient="list"))
        p = figure(
            # title='TSNE Twitter BIO Embeddings',
            plot_width=1200,
            plot_height=1200,
            active_scroll="wheel_zoom",
            output_backend="svg",
        )
        p.add_tools(HoverTool(tooltips="@title"))
        p.circle(
            source=plot_data,
            x="x",
            y="y",
            line_alpha=0.9,
            fill_alpha=0.9,
            # size="radius",
            fill_color="color",
            line_color="color",
        )
        p.title.text_font_size = value("16pt")
        p.xaxis.visible = True
        p.yaxis.visible = True
        p.background_fill_color = None
        p.border_fill_color = None
        p.grid.grid_line_color = None
        p.outline_line_color = None
        # tsne_plot.grid.grid_line_color = None
        # tsne_plot.outline_line_color = None
        p.toolbar.logo = None
        p.toolbar_location = None
        export_svg(
            p, filename=f"cluster-number{k}.svg", webdriver=driver,
        )
        export_png(
            p, filename=f"cluster-number{k}.png", webdriver=driver,
        )
        if score > maximum:
            maximum = score
            ok = k
    print(ok)
Ejemplo n.º 21
0
def Diversity_Plot(clone_df,
                   png=None,
                   title="",
                   count_col="Clustered",
                   split_col=None,
                   line_width=3,
                   add_control_diversities=True,
                   figsize=(1000, 700)):
    """Creates a plot comparing clonal repertoire diversity rates, using the Hill Diversity metric.

	Parameters
	----------
	clone_df: pandas DataFrame
		DataFrame of the repertoire(s) to plot
	png: str
		Title of the output PNG filename or None if none should be made; default is None
	title: str
		Title of the output graph; default is ""
	count_col: str
		Column name in clone_df of the clone counts/frequencies; default is "Clustered"
	split_col: str
		Column separating various repertoire subsets in clone_df or None if single repertoire; default is None
	line_width: int
		Width for the plot lines
	add_control_diversities: bool
		Whether to add lines for control diversities of artificial polarity; default is True
	figsize: tuple of (int, int)
		The width and height of the output plot; default is (100, 700)

	Returns
	----------
	plot: bokeh figure
		The figure object for the diversity plot
	"""

    figure_params = {
        "plot_width": figsize[0],
        "plot_height": figsize[1],
        "x_range": Range1d(0, 10),
        "y_axis_type": "log",
        "title": title,
        "tools": "save, help",
        "toolbar_location": "right"
    }

    plot = figure(**figure_params)
    plot.xgrid.grid_line_alpha = 0.0
    plot.xaxis.axis_label = "Order (N)"
    plot.yaxis.axis_label = "Hill Diversity Constant"
    plot.yaxis.formatter = BasicTickFormatter()

    #If comparing multiple samples, add the sample column to split on to the DataFrame
    if split_col is not None:
        diversity_df = clone_df[[count_col, split_col]]

        samples = []
        diversity_dfs = []
        for sample, df in diversity_df.groupby([split_col]):
            samples.append(sample)
            diversity_dfs.append(df)

    else:
        samples = ["Repertoire"]
        diversity_dfs = [clone_df[[count_col]]]

    sample_colors = (RGB(30, 160, 120), RGB(220, 90,
                                            0), RGB(120, 110,
                                                    180), RGB(230, 40, 140))
    for sample, df, line_color in zip(samples, diversity_dfs,
                                      sample_colors[:len(samples)]):
        hill_indices = Hill_Diversity_Index(df[count_col])
        n_orders = [i[0] for i in hill_indices]
        order_diversities = [i[1] for i in hill_indices]

        #ADD MORE LINE STYLES (dotted, etc.)
        plot.line(x=n_orders,
                  y=order_diversities,
                  color=line_color,
                  line_width=line_width,
                  legend=sample)

    if add_control_diversities:
        total_clones = max([len(i) for i in diversity_dfs])
        total_counts = max([df[count_col].sum() for df in diversity_dfs])

        #Very highly polarized data creates a sample in which the top 20 clones are 20% of the total by prevalence
        top20_20_data = [total_counts * 0.2 / 20] * 20
        top20_20_data += [
            total_counts * 0.8 / (total_clones - 20)
            for _ in range(total_clones - 20)
        ]
        #Highly polarized data has the top 20 clones at 15% of the total
        top20_15_data = [total_counts * 0.15 / 20] * 20
        top20_15_data += [
            total_counts * 0.85 / (total_clones - 20)
            for _ in range(total_clones - 20)
        ]
        #Moderately polarized data has the top 20 clones at 10% of the total
        top20_10_data = [total_counts * 0.1 / 20] * 20
        top20_10_data += [
            total_counts * 0.9 / (total_clones - 20)
            for _ in range(total_clones - 20)
        ]
        #Lowly polarized data has the top 20 clones at 5% of the total
        top20_5_data = [total_counts * 0.05 / 20] * 20
        top20_5_data += [
            total_counts * 0.95 / (total_clones - 20)
            for _ in range(total_clones - 20)
        ]

        top20_20_diversities = [
            i[1] for i in Hill_Diversity_Index(top20_20_data)
        ]
        top20_15_diversities = [
            i[1] for i in Hill_Diversity_Index(top20_15_data)
        ]
        top20_10_diversities = [
            i[1] for i in Hill_Diversity_Index(top20_10_data)
        ]
        top20_5_diversities = [
            i[1] for i in Hill_Diversity_Index(top20_5_data)
        ]
        plot.line(x=n_orders,
                  y=top20_20_diversities,
                  color=RGB(160, 200, 230),
                  alpha=0.8,
                  line_dash=(12, ),
                  line_width=line_width,
                  legend="Very Highly Polarized (Top 20 Clones 20%)")
        plot.line(x=n_orders,
                  y=top20_15_diversities,
                  color=RGB(30, 120, 180),
                  alpha=0.8,
                  line_dash=(12, ),
                  line_width=line_width,
                  legend="Highly Polarized (Top 20 Clones 15%)")
        plot.line(x=n_orders,
                  y=top20_10_diversities,
                  color=RGB(180, 220, 140),
                  alpha=0.8,
                  line_dash=(12, ),
                  line_width=line_width,
                  legend="Moderately Polarized (Top 20 Clones 10%)")
        plot.line(x=n_orders,
                  y=top20_5_diversities,
                  color=RGB(50, 160, 40),
                  alpha=0.8,
                  line_dash=(12, ),
                  line_width=line_width,
                  legend="Lowly Polarized (Top 20 Clones 5%)")

    if png is not None:
        export_png(plot, png)

    return plot
Ejemplo n.º 22
0
index_cmap = factor_cmap('cyl_mfr',
                         palette=Spectral5,
                         factors=sorted(df.cyl.unique()),
                         end=1)

p = figure(width=800,
           height=300,
           title="Mean MPG by # Cylinders and Manufacturer",
           x_range=group,
           toolbar_location=None,
           tooltips=[("MPG", "@mpg_mean"), ("Cyl, Mfr", "@cyl_mfr")])

p.vbar(
    x='cyl_mfr',
    top='mpg_mean',
    width=1,
    source=group,
    line_color="white",
    fill_color=index_cmap,
)

p.y_range.start = 0
p.x_range.range_padding = 0.05
p.xgrid.grid_line_color = None
p.xaxis.axis_label = "Manufacturer grouped by # Cylinders"
p.xaxis.major_label_orientation = 1.2
p.outline_line_color = None

export_png(p, filename="plot.png")