def sequence_logo(score_matrix, order="value", width=1.0, ax=None, sequence_type=Genome, font_properties=None, color_scheme=None, **kwargs): """Plots a sequence logo for visualizing motifs. Parameters ---------- score_matrix : np.ndarray An :math:`L \\times N` array (where :math:`L` is the length of the sequence, and :math:`N` is the size of the alphabet) containing the scores for each base occuring at each position. order : {'alpha', 'value'} The manner by which to sort the bases stacked at each position in the sequence logo plot. * 'alpha' - Bases go in the order they are found in the\ sequence alphabet. * 'value' - Bases go in the order of their value, with the\ largest at the bottom. width : float, optional Default is 1. The width of each character in the plotted logo. A value of 1 will mean that there is no gap between each the characters at each position. A value of 0 will not draw any characters. ax : matplotlib.pyplot.Axes or None, optional Default is `None`. The axes to plot on. If left as `None`, a new axis will be created. sequence_type : class, optional Default is `selene_sdk.sequences.Genome`. The type of sequence that the *in silico* mutagenesis results are associated with. This should generally be a subclass of `selene_sdk.sequences.Sequence`. font_properties : matplotlib.font_manager.FontProperties or None, optional Default is `None`. A `matplotlib.font_manager.FontProperties` object that specifies the properties of the font to use for plotting the motif. If `None`, no font will be used, and the text will be rendered by a path. This method of rendering paths is preferred, as it ensures all character heights correspond to the actual values, and that there are no extra gaps between the tops and bottoms of characters at each position in the sequence logo. If the user opts to use a value other than `None`, then no such guarantee can be made. color_scheme : list(str) or None, optional Default is `None`. A list containing the hex codes or names of colors to use, appearing in the order of the bases of the sequence type. If left as `None`, a default palette will be made with `seaborn.color_palette`, and will have as many colors as there are characters in the input sequence alphabet. Returns ------- matplotlib.pyplot.Axes The axes containing the sequence logo plot. Raises ------ ValueError If the number of columns in `score_matrix` does not match the number of characters in the alphabet of `sequence_type`. ValueError If the number of colors in `color_palette` does not match the number of characters in the alphabet of `sequence_type`. Examples -------- We have included an example of the output from a`sequence_logo` plot below: .. image:: ../../docs/source/_static/img/sequence_logo_example.png """ # Note that everything will break if we do not deepcopy. score_matrix = deepcopy(score_matrix) score_matrix = score_matrix.transpose() if font_properties is not None: warnings.warn( "Specifying a value for `font_properties` (other than `None`) " "will use the `matplotlib`-based character paths, and causes " "distortions in the plotted motif. We recommend leaving " "`font_properties=None`. See the documentation for details.", UserWarning) if color_scheme is None: color_scheme = sns.color_palette("Set1", n_colors=len(sequence_type.BASES_ARR)) color_scheme = color_scheme.as_hex() if len(color_scheme) < len(sequence_type.BASES_ARR): raise ValueError( "Color scheme is shorter than number of bases in sequence.") if score_matrix.shape[0] != len(sequence_type.BASES_ARR): raise ValueError( "Got score with {0} bases for sequence with {1} bases.".format( score_matrix.shape[0], len(sequence_type.BASES_ARR))) if ax is None: _, ax = plt.subplots(figsize=score_matrix.shape) # Determine offsets depending on sort order. positive_offsets = np.zeros_like(score_matrix) negative_offsets = np.zeros_like(score_matrix) bases = np.empty(score_matrix.shape, dtype=object) bases[:, :] = "?" # This ensures blanks are visually obvious. # Change ordering of things based on input arguments. if order == "alpha": for i in range(score_matrix.shape[0]): bases[i, :] = sequence_type.BASES_ARR[i] elif order == "value": if np.sum(score_matrix < 0) != 0: sorted_scores = np.zeros_like(score_matrix) for j in range(score_matrix.shape[1]): # Sort the negative values and put them at bottom. div = np.sum(score_matrix[:, j] < 0.) negative_idx = np.argwhere(score_matrix[:, j] < 0.).flatten() negative_sort_idx = np.argsort(score_matrix[negative_idx, j], axis=None) sorted_scores[:div, j] = score_matrix[ negative_idx[negative_sort_idx], j] bases[:div, j] = sequence_type.BASES_ARR[ negative_idx[negative_sort_idx]].flatten() # Sort the positive values and stack atop the negatives. positive_idx = np.argwhere(score_matrix[:, j] >= 0.).flatten() positive_sort_idx = np.argsort(score_matrix[positive_idx, j], axis=None) sorted_scores[div:, j] = score_matrix[ positive_idx[positive_sort_idx], j] bases[div:, j] = sequence_type.BASES_ARR[ positive_idx[positive_sort_idx]].flatten() score_matrix = sorted_scores else: for j in range(score_matrix.shape[1]): sort_idx = np.argsort(score_matrix[:, j], axis=None)[::-1] bases[:, j] = sequence_type.BASES_ARR[sort_idx] score_matrix[:, j] = score_matrix[sort_idx, j] # Create offsets for each bar. for i in range(score_matrix.shape[0] - 1): y_coords = score_matrix[i, :] if i > 0: negative_offsets[i + 1, :] = negative_offsets[i, :] positive_offsets[i + 1, :] = positive_offsets[i, :] neg_idx = np.argwhere(y_coords < 0.) pos_idx = np.argwhere(y_coords >= 0.) negative_offsets[i + 1, neg_idx] += y_coords[neg_idx] positive_offsets[i + 1, pos_idx] += y_coords[pos_idx] for i in range(score_matrix.shape[0]): x_coords = np.arange(score_matrix.shape[1]) + 0.5 y_coords = score_matrix[i, :] # Manage negatives and positives separately. offsets = np.zeros(score_matrix.shape[1]) negative_idx = np.argwhere(y_coords < 0.) positive_idx = np.argwhere(y_coords >= 0.) offsets[negative_idx] = negative_offsets[i, negative_idx] offsets[positive_idx] = positive_offsets[i, positive_idx] bars = ax.bar(x_coords, y_coords, color="black", width=width, bottom=offsets) for j, bar in enumerate(bars): base = bases[i, j] bar.set_color(color_scheme[sequence_type.BASE_TO_INDEX[base]]) bar.set_edgecolor(None) # Iterate over the barplot's bars and turn them into letters. new_patches = [] for i, bar in enumerate(ax.patches): base_idx = i // score_matrix.shape[1] seq_idx = i % score_matrix.shape[1] base = bases[base_idx, seq_idx] # We construct a text path that tracks the bars in the barplot. # Thus, the barplot takes care of scaling and translation, # and we just copy it. if font_properties is None: text = Path(_SVG_PATHS[base][0], _SVG_PATHS[base][1]) else: text = TextPath((0., 0.), base, fontproperties=font_properties) b_x, b_y, b_w, b_h = bar.get_extents().bounds t_x, t_y, t_w, t_h = text.get_extents().bounds scale = (b_w / t_w, b_h / t_h) translation = (b_x - t_x, b_y - t_y) text = PathPatch(text, facecolor=bar.get_facecolor(), lw=0.) bar.set_facecolor("none") text.set_path_effects([_TextPathRenderingEffect(bar)]) transform = transforms.Affine2D().translate(*translation).scale(*scale) text.set_transform(transform) new_patches.append(text) for patch in new_patches: ax.add_patch(patch) ax.set_xlim(0, score_matrix.shape[1]) ax.set_xticks(np.arange(score_matrix.shape[1]) + 0.5) ax.set_xticklabels(np.arange(score_matrix.shape[1])) return ax