def chromosome_collections(df, y_positions, height, **kwargs): """ Yields BrokenBarHCollection of features that can be added to an Axes object. Parameters ---------- df : pandas.DataFrame Must at least have columns ['chrom', 'start', 'end', 'color']. If no column 'width', it will be calculated from start/end. y_positions : dict Keys are chromosomes, values are y-value at which to anchor the BrokenBarHCollection height : float Height of each BrokenBarHCollection Additional kwargs are passed to BrokenBarHCollection """ del_width = False if 'width' not in df.columns: del_width = True df['width'] = df['end'] - df['start'] for s, group in df.groupby('sample'): print s yrange = (y_positions[s], height) xranges = group[['start', 'width']].values yield BrokenBarHCollection(xranges, yrange, facecolors=group['colors1'], **kwargs) yrange = (y_positions[s] - 0.4, height) yield BrokenBarHCollection(xranges, yrange, facecolors=group['colors2'], **kwargs) if del_width: del df['width']
def regions_to_hbar(region_list_chr): """Make a MathPlotLIb 'BrokenbarCollection' from upd sites data, Isodisomy will have one block and one color. Heterodisomy will be two adjecent bars with two colors. Both plots will have a transperant middle line for aestetics.""" return_list = [] for i in region_list_chr: hbar_upper = BrokenBarHCollection(i["xranges"], (0.52, 1), facecolors=i["upper"], label=i["chr"]) hbar_lower = BrokenBarHCollection(i["xranges"], (0, 0.48), facecolors=i["lower"], label=i["chr"]) return_list.append([hbar_upper, hbar_lower]) return return_list
def plot(self, ax, chrom_region, region_start, region_end): self.ax = ax grange = GenomeRange(chrom_region, region_start, region_end) if grange.chrom not in self.interval_tree: grange.change_chrom_names() bands_in_region = sorted( self.interval_tree[grange.chrom][grange.start:grange.end]) band_height = self.properties['height'] xranges, colors = [], [] for itv in bands_in_region: start, end = itv.begin, itv.end band_name, band_type = itv.data[:2] band_color = self.lookup_band_color(band_type) xranges.append((start, end)) colors.append(band_color) if self.properties['show_band_name'] != 'no': if grange.length < 80_000_000: self.plot_text(band_name, start, end, band_color) coll = BrokenBarHCollection(xranges, (0, band_height), facecolors=colors, linewidths=self.properties['border_width'], edgecolors=self.properties['border_color']) ax.add_collection(coll) ax.set_ylim(-0.1, band_height + 0.1) ax.set_xlim(region_start, region_end) self.plot_label()
def _chromosome_collections(df, y_positions, height, **kwargs): """ Yields BrokenBarHCollection of features that can be added to an Axes object. Parameters ---------- df : pandas.DataFrame Must at least have columns ['chrom', 'start', 'end', 'color']. If no column 'width', it will be calculated from start/end. y_positions : dict Keys are chromosomes, values are y-value at which to anchor the BrokenBarHCollection height : float Height of each BrokenBarHCollection Additional kwargs are passed to BrokenBarHCollection """ del_width = False if "width" not in df.columns: del_width = True df["width"] = df["end"] - df["start"] for chrom, group in df.groupby("chrom"): yrange = (y_positions["chr" + chrom], height) xranges = group[["start", "width"]].values yield BrokenBarHCollection(xranges, yrange, facecolors=group["colors"], **kwargs) if del_width: del df["width"]
def plot_bars(self, ax): ''' Once xranges are determined, create bars for each chr and sample. ''' # Some fancy sorting to get the chromosomes in 1, 2. ..10, M order keys = sorted(self.xranges, key=lambda x:x.replace('chr','').isdigit() \ and x.replace('chr','').rjust(2,'0') \ or x.replace('chr','')) ystart = 0 colors = self.get_colors(len(self.datasets)) yticks = [] yticklabels = [] for chrom in keys: for i, xranges in enumerate(self.xranges[chrom]): yrange = (ystart, self.height) coll = BrokenBarHCollection(xranges, yrange, color=colors[i]) ax.add_collection(coll) # save ticks and labels for plotting center = yrange[0] + yrange[1] / 2. yticks.append(center) # Only label the topmost row with the chromosome. label = i == (len(self.xranges[chrom]) - 1) and chrom or '' yticklabels.append(label) # Increment ystart for next iteration ystart += self.height + self.spacing return yticks, yticklabels
def chromosome_collections(df: pandas.DataFrame, y_positions: dict, height: float, to_log: bool=False, **kwargs): """ Yields BrokenBarHCollection of features that can be added to an Axes object :param bool to_log: whether to log info :param DataFrame df: must at least have columns ['chrom', 'chromStart', 'chromEnd', 'colors']. If no column 'width', it will be calculated from start/end :param dict y_positions: keys are chromosomes, value are y-value at which to anchor the BrokenBarHCollection :param float height: height of each BrokenBarHCollection :param kwargs: are passed to BrokenBarHCollection :return: BrokenBarHCollection """ del_width = False if 'width' not in df.columns: del_width = True df['width'] = df['chromEnd'] - df['chromStart'] for chrom, group in df.groupby('chrom'): if to_log: print(chrom) yrange = (y_positions[chrom], height) xranges = group[['chromStart', 'width']].values yield BrokenBarHCollection( xranges, yrange, facecolors=group['colors'], **kwargs) if del_width: del df['width']
def plot_thresholded_envelope(self, ax_main: Axes, ax_dist: Axes): # e_t, with segment bands, and summary stats to the right :) logger.info("Plotting thresholded envelope..") self.plot_signal(self.e_t, ax=ax_main, color=envelope_color, lw=thicc_lw) rm = self.reference_maker ax_main.hlines(rm.ripple_threshold_high, *self.time_range, lw=thin_lw) ax_main.hlines(rm.ripple_threshold_low, *self.time_range, lw=thin_lw) add_scalebar(ax_main) add_title(ax_main, "Thresholds $T$", threshold_color, y=0.58) segs = self.reference_segs_test visible_segs = segs.intersection(ax_main.get_xlim()) bars = BrokenBarHCollection( xranges=[ tup for tup in zip(visible_segs.start, visible_segs.duration) ], yrange=(0, rm.ripple_threshold_low), facecolors=segment_color, alpha=segment_alpha, ) ax_main.add_collection(bars) # Find and plot crossings of lower threshold crossings_ix = nonzero(diff(self.e_t > rm.ripple_threshold_low))[0] crossings_t = crossings_ix / self.fs crossings_y = [rm.ripple_threshold_low] * len(crossings_t) ax_main.plot(crossings_t, crossings_y, ".", c="black") logger.info("Done") logger.info("Plotting envelope density..") self.plot_envelope_dist(ax_dist) logger.info("Done") ax_dist.set_ylim(ax_main.get_ylim())
def render(self, ax, chrom, pos=None): try: xranges = self.xranges[chrom] colors = self.colors[chrom] centromeres = self.centromeres[chrom] except: print("Error: No chromosome named: {}".format(chrom)) return yranges = (0, 0.5) coll = BrokenBarHCollection(xranges, yranges, facecolors=colors, edgecolors='black', linewidths=0.5) ax.add_collection(coll) if pos: ax.axvline(pos, color='red', lw=4) w = xranges[-1][0] + xranges[-1][1] pad = w * 0.05 ax.set_xlim(0 - pad, xranges[-1][0] + xranges[-1][1] + pad) ax.xaxis.set_visible(False) center = yranges[0] + yranges[1] / 2. x0, y0 = centromeres[0][0], yranges[0] x1, y1 = centromeres[0][0], yranges[1] x2, y2 = centromeres[0][0] + centromeres[0][1], center cent = Polygon(np.array([[x0, y0], [x1, y1], [x2, y2]]), closed=True, fc=color_lookup['acen'], ec='black', linewidth=0.5) ax.add_patch(cent) x0, y0 = centromeres[1][0], center x1, y1 = centromeres[1][0] + centromeres[1][1], yranges[1] x2, y2 = centromeres[1][0] + centromeres[1][1], yranges[0] cent = Polygon(np.array([[x0, y0], [x1, y1], [x2, y2]]), closed=True, fc=color_lookup['acen'], ec='black', linewidth=0.5) ax.add_patch(cent) ax.set_yticks([center]) ax.set_yticklabels([chrom]) ax.set_ylim(-0.2, 0.7) [ ax.spines[loc].set_color('none') for loc in ['top', 'left', 'right', 'bottom'] ]
def plot_sample_chrom(i, sample): """Draw the given coordinates and colors as a horizontal series.""" xranges = [(start, end - start) for start, end in zip(sample.start, sample.end)] bars = BrokenBarHCollection(xranges, (i, i + 1), edgecolors="none", facecolors=sample["color"]) axis.add_collection(bars)
def _chromosome_collections(df, y_positions, height, print_names=False, **kwargs): """ Yields BrokenBarHCollection of features that can be added to an Axes object. Parameters ---------- df : pandas.DataFrame Must at least have columns ['chrom', 'start', 'end', 'color']. If no column 'width', it will be calculated from start/end. y_positions : dict Keys are chromosomes, values are y-value at which to anchor the BrokenBarHCollection height : float Height of each BrokenBarHCollection Additional kwargs are passed to BrokenBarHCollection """ del_width = False if 'width' not in df.columns: del_width = True df['width'] = df['end'] - df['start'] for chrom, group in df.groupby('chrom'): yrange = (y_positions[chrom], height) xranges = group[['start', 'width']].values if print_names: names = group[['name', 'colors']].values names = [i for i in names if i[1] != '#ffffff'] ax = plt.gca() t = ax.transData canvas = ax.figure.canvas # Plot names with different colors and spaced for i, n in enumerate(names): text = ax.text(0, y_positions[chrom] - 2, f'{n[0]}', color=n[1], transform=t, fontsize=10) text.draw(canvas.get_renderer()) ex = text.get_window_extent() t = transforms.offset_copy(text._transform, x=ex.width, units='dots') yield BrokenBarHCollection(xranges, yrange, facecolors=group['colors'], **kwargs) if del_width: del df['width']
def contiguity_plot(): for chrom, group in chrs_contigs_regions.groupby('chrom'): yrange = (chrom_centers[chrom], 0.5) group['width'] = group['width'].apply(lambda x: x) xranges = group[['chrom_start', 'width']].values yield BrokenBarHCollection(xranges, yrange, facecolors=group['colors'])
def plot_enso_background(self, ensoindices=None, lag=0, **optinfo): """ Plots colored stripes in the background of plot to represent ENSO phases. Parameters ---------- ensophases: {array-like}, optional Array of ENSO indices (``+1`` for El Niño, ``0`` for Neutral and ``-1`` for La Niña episodes). If None, the ENSO indices of the underlying series are used instead. """ if ensoindices is None: series = self._series if series is None or not hasattr(series, 'ensoindices') or \ series.ensoindices is None: raise ValueError("Undefined ENSO indices!") ensoindices = series.ensoindices if self.xdata is None: errmsg = "Unable to retrieve the dates of the current plot!" raise ValueError(errmsg) # dates = self.xdata clust_indices = Cluster(ensoindices.filled(0), 0) _dates = np.empty(len(dates) + 1, int) _dates[:-1] = dates _dates[-1] = dates[-2] episodes = dict([(k, zip(_dates[v[:, 0]], _dates[v[:, 1]])) for (k, v) in clust_indices.grouped_limits().items()]) # colors = ENSOcolors['polygons'] for (key, idx) in {'C':-1, 'N':0, 'W':+1}.iteritems(): colors[idx] = colors[key] # trans = blended_transform_factory(self.transData, self.transAxes) for (k, lim) in episodes.iteritems(): _bbc = BrokenBarHCollection([(x + lag, y - x) for (x, y) in lim], (0, 1), facecolors=colors[k], edgecolors=colors[k],) _bbc.set_alpha(0.2) _bbc.set_transform(trans) self.add_collection(_bbc)
def bed_collections_generator(df, y_positions, height): """ Interate dataframe Yeilds: Brokenbarhcollection --from BED DF to be plotted """ for chrom, group in df.groupby('chrom'): yrange = (0, height) xranges = group[['start', 'width']].values yield BrokenBarHCollection(xranges, yrange, facecolors=group['colors'], label=chrom)
def bed_collections_generator(dataframe, height): """ Interate dataframe Yeilds: Brokenbarhcollection --from BED DF to be plotted """ for chrom, group in dataframe.groupby("chrom"): yrange = (0, height) xranges = group[["start", "width"]].values yield BrokenBarHCollection(xranges, yrange, facecolors=group["colors"], label=chrom)
def make_collection(self, df, chrom, ypos=0.25, height=0.5, **kwargs): if 'width' not in df.columns: df['width'] = df.end - df.start df0 = df[df['chrom'] == chrom] xranges = df0[['start', 'width']].values colors = df0['colors'] #.values yrange = (ypos, height) bbhc = BrokenBarHCollection(xranges, yrange, facecolors=colors, **kwargs) xmin = df0.start.min() xmax = df0.end.max() return bbhc, xmin, xmax
def chromosome_collections(df, y_positions, height, **kwargs): del_width = False if 'width' not in df.columns: del_width = True df['width'] = df['end'] - df['start'] for chrom, group in df.groupby('chrom'): yrange = (y_positions[chrom], height) xranges = group[['start', 'width']].values yield BrokenBarHCollection(xranges, yrange, edgecolors=("black", ), facecolors=group['colors'], **kwargs) if del_width: del df['width']
def bed_collections_generator_combine(dataframe, y_positions, height): """ Iterate dataframe Args: dataframe(pandas dataframe) y_positions() height Yields: BrokenBarHCollection """ for chrom, group in dataframe.groupby("chrom"): print("chrom: {}".format(chrom)) yrange = (y_positions[chrom], height) xranges = group[["start", "width"]].values yield BrokenBarHCollection(xranges, yrange, facecolors=group["colors"], label=chrom)
def coverage_generator_combine(dataframe, height): """Iterate dataframe and yeild per chromosome, like coverage_generator() -with additional positional Args: dataframe -- height --- Yeilds: BrokenBarhcollection """ for chrom, group in dataframe.groupby("chrom"): yrange = (0, height) xranges = group[["start", "width"]].values yield BrokenBarHCollection(xranges, yrange, facecolors=group["colors"], label=chrom)
def bed_collections_generatorCombine(df, y_positions, height, **kwargs): """ Iterate dataframe Args: df(pandas dataframe) y_positions() height Yields: BrokenBarHCollection """ for chrom, group in df.groupby('chrom'): print("chrom: {}".format(chrom)) yrange = (y_positions[chrom], height) xranges = group[['start', 'width']].values yield BrokenBarHCollection(xranges, yrange, facecolors=group['colors'], label=chrom)
def coverage_generatorCombine(df, y_positions, height): """Iterate dataframe and yeild per chromosome, like coverage_generator() -with additional positional Args: df -- y_positions -- height --- Yeilds: BrokenBarhcollection """ for chrom, group in df.groupby('chrom'): yrange = (0, height) xranges = group[['start', 'width']].values yield BrokenBarHCollection(xranges, yrange, facecolors=group['colors'], label=chrom)
def plot(self, ax, gr: GenomeRange, **kwargs): self.ax = ax df = self.fetch_data(gr) xranges, colors = [], [] band_height = self.properties['height'] for _, row in df.iterrows(): start, end = row['start'], row['end'] band_name, band_type = row['band_name'], row['band_type'] band_color = self.lookup_band_color(band_type) xranges.append((start, end)) colors.append(band_color) if self.properties['show_band_name'] != 'no': if gr.length < 80_000_000: self.plot_text(band_name, start, end, band_color) coll = BrokenBarHCollection(xranges, (0, band_height), facecolors=colors, linewidths=self.properties['border_width'], edgecolors=self.properties['border_color']) ax.add_collection(coll) ax.set_ylim(-0.1, band_height + 0.1) ax.set_xlim(gr.start, gr.end) self.plot_label()
def plot_enso_background(self, ensoindices=None, lag=0, **optinfo): """ Plots colored stripes in the background of plot to represent ENSO phases. Parameters ---------- ensophases: {array-like}, optional Array of ENSO indices (``+1`` for El Niño, ``0`` for Neutral and ``-1`` for La Niña episodes). If None, the ENSO indices of the underlying series are used instead. """ if ensoindices is None: series = self._series if series is None or not hasattr(series, 'ensoindices') or \ series.ensoindices is None: raise ValueError("Undefined ENSO indices!") ensoindices = series.ensoindices if self.xdata is None: errmsg = "Unable to retrieve the dates of the current plot!" raise ValueError(errmsg) # dates = self.xdata clust_indices = Cluster(ensoindices.filled(0), 0) _dates = np.empty(len(dates) + 1, int) _dates[:-1] = dates _dates[-1] = dates[-2] episodes = dict([(k, zip(_dates[v[:, 0]], _dates[v[:, 1]])) for (k, v) in clust_indices.grouped_limits().items()]) # colors = ENSOcolors['polygons'] for (key, idx) in {'C': -1, 'N': 0, 'W': +1}.iteritems(): colors[idx] = colors[key] # trans = blended_transform_factory(self.transData, self.transAxes) for (k, lim) in episodes.iteritems(): _bbc = BrokenBarHCollection( [(x + lag, y - x) for (x, y) in lim], (0, 1), facecolors=colors[k], edgecolors=colors[k], ) _bbc.set_alpha(0.2) _bbc.set_transform(trans) self.add_collection(_bbc)
def get_steps(self, sigs, weights, cutoff_hz=1.0, order=2, window=4.0, x_threshold=2.0, figname=None, min_step_window=2.0): """ Parameters ---------- sigs : list(ndarray(n), ndarray(n)) List of signals (1D-arrays) """ x = sigs[0] y = sigs[1] xf, xf_ds, xf_ds_dt, x_regress = self.conditioning(x, cutoff_hz=cutoff_hz, order=order, window=window) xf_ds_regress = x_regress[:,0] yf, yf_ds, yf_ds_dt, y_regress = self.conditioning(y, cutoff_hz=cutoff_hz, order=order, window=window) yf_ds_regress = y_regress[:,0] # select on the product of yf_ds_regress * xf_ds_regress, # both need to be steady! xw = weights[0] yw = weights[1] xy_ds = np.abs(xf_ds_regress*xw) + np.abs(yf_ds_regress*yw) xf_sel_mask, xf_sel_arg = self.select(xy_ds, x_threshold) step_ds_mask, steps_ds = self.steady_steps(xf_sel_mask, step_lenght=min_step_window) # save steps in high-res sampling of the original signal steps = np.round(steps_ds * self.sps / self.freq_ds, 0).astype(np.int) np.savetxt(figname.replace('.png', '_steps.txt'), steps) # steps_ds_times = self.t_ds[steps_ds.flatten()] # steps = np.ndarray(steps_ds.shape) * np.nan # for k in range(steps.shape[0]): # t0 = self.t_ds[steps[k,0]] # t1 = self.t_ds[steps[k,1]] # steps[k,0] = np.abs(self.time - t0).argmin() # steps[k,0] = np.abs(self.time - t1).argmin() if figname is not None: print('start plotting...') fig, axes = plotting.subplots(nrows=3, ncols=1, figsize=(8,9), dpi=120) ax = axes[0,0] ax.set_title('original and filtered signals') ax.plot(self.time, x, 'r-', alpha=0.3) ax.plot(self.time, xf, 'r-') ax.grid() axr = ax.twinx() axr.plot(self.time, y, 'g-', alpha=0.3) axr.plot(self.time, yf, 'g-') ax = axes[1,0] ax.set_title('lin regr window: %1.02f sec' % window) t_mask = self.t_ds.copy() t_mask[~xf_sel_mask] = np.nan x_mask = xf_ds.copy() x_mask[~xf_sel_mask] = np.nan ax.plot(self.t_ds, xf_ds, 'r-', alpha=1.0, label='xf ds') ax.plot(self.t_ds, x_mask, 'k-+', alpha=0.7, label='xf select') ax.grid() axr = ax.twinx() axr.plot(self.t_ds, yf_ds, 'g-', alpha=0.8, label='yx ds') y_mask = yf_ds.copy() y_mask[~xf_sel_mask] = np.nan axr.plot(self.t_ds, y_mask, 'k-+', alpha=0.7, label='yf select') xmin = axr.get_ylim()[0] xmax = axr.get_ylim()[1] collection = region.span_where(self.t_ds, ymin=xmin, ymax=xmax, where=xf_sel_mask, facecolor='grey', alpha=0.4) axr.add_collection(collection) leg = plotting.one_legend(ax, axr, loc='best') leg.get_frame().set_alpha(0.5) ax = axes[2,0] rpl = (x_threshold, min_step_window) ax.set_title('threshold: %1.02f, min step window: %1.2f sec' % rpl) ax.plot(self.t_ds, np.abs(xf_ds_regress), 'r-', label='xf lin regress', alpha=0.9) ax.plot(self.t_ds, np.abs(yf_ds_regress), 'g-', label='yf lin regress', alpha=0.9) ax.plot(self.t_ds, np.abs(xy_ds), 'k-', label='xy*w', alpha=0.7) ax.axhline(y=x_threshold, linewidth=1, color='k', linestyle='--', aa=False) ax.set_ylim([0,5]) xmin = ax.get_ylim()[0] xmax = ax.get_ylim()[1] collection = region.span_where(self.t_ds, ymin=xmin, ymax=xmax, where=step_ds_mask, facecolor='grey', alpha=0.4) ax.add_collection(collection) # axr = ax.twinx() # axr.plot(self.t_ds, np.abs(yf_ds_regress), 'g-', # label='yf lin regress', alpha=0.9) # ax, axr = plotting.match_yticks(ax, axr) # axr.set_ylim([0,5]) # leg = plotting.one_legend(ax, axr, loc='best') # leg.get_frame().set_alpha(0.5) ax.grid() leg = ax.legend(loc='best') leg.get_frame().set_alpha(0.5) fig.tight_layout() fig.savefig(figname) print(figname) return steps
def draw(self, ax): a = self.attr st, ed = a['st'], a['ed'] xmin = self.st #- width*self.margin xmax = self.ed #+ width*self.margin width = xmax - xmin #ed-st #print 'gene,st,ed,width,margin,xmin,xmax',st,ed,width,self.margin,xmin,xmax xmid = (xmin + xmax) / 2. ymax = 0.8 h = 0.4 ymin = ymax - h ymid = ymax - h / 2. # draw genome line ax.plot([xmin, xmax], [ymid, ymid], 'grey') # base # draw arrows in introns estsi = zip(a['estarts'], a['esizes']) ists = [st + est + esi for est, esi in estsi[:-1]] ieds = [st + est for est, esi in estsi[1:]] if a['strand'] == '+': dx, shape, hw = 1, 'right', 0.25 else: dx, shape, hw = -1, 'right', 0.4 hl = 0.05 * width arrowargs = dict(y=ymid, dx=dx, dy=0, shape=shape, fc='grey', linewidth=0, head_width=hw, head_length=hl) #print estsi #print zip(ists,ieds) for ist, ied in zip(ists, ieds): iwidth = ied - ist imid = (ist + ied) / 2. if iwidth < 0.15 * width: continue #print imid ax.arrow(imid - dx * (hl + 1) / 2., **arrowargs) # put arrow at xmin and xmax if st > xmin: if dx < 0: axmin = xmin + hl - dx axmax = xmax - dx else: axmin = xmin - dx axmax = xmax - hl - dx ax.arrow(axmin, **arrowargs) ax.arrow(axmax, **arrowargs) # draw exons => BrokenBarHCollection # draw st=>TSS & TSE=>ed tss = a['tst'] - st # exon coords are st based tse = a['ted'] - st if tss == tse: estsi1 = estsi3 = [] estsi2 = estsi else: estsi1 = [(x, y) for x, y in estsi if x < tss] estsi2 = [(x, y) for x, y in estsi if ((x + y) >= tss) & (x <= tse)] estsi3 = [(x, y) for x, y in estsi if (x + y) > tse] if estsi1: x0, y0 = estsi1[-1] # last size needs fixing if (x0 + y0) > tss: estsi1[-1] = (x0, tss - x0) if estsi2: x0, y0 = estsi2[0] if x0 < tss: # first start and size need fixing estsi2[0] = (tss, y0 - (tss - x0)) x0, y0 = estsi2[-1] if (x0 + y0) > tse: estsi2[-1] = (x0, tse - x0) if estsi3: x0, y0 = estsi3[0] if x0 < tse: estsi3[0] = (tse, y0 - (tse - x0)) c = self.color cargs = dict(facecolor=c, edgecolor=c) #, alpha=0.8) cargs.update(self.kwargs) #print 'estsi1',estsi1 #print 'estsi2',estsi2 #print 'estsi3',estsi3 # draw UTR if (len(estsi1) + len(estsi3)) > 0: yrange = (ymid - h / 4., h / 2.) xranges = [(st + x, y) for x, y in estsi1] + [(st + x, y) for x, y in estsi3] bbhc = BrokenBarHCollection(xranges, yrange, **cargs) ax.add_collection(bbhc) #print '1,3 xranges', xranges #print '1,3 yrange', yrange # draw coding yrange = (ymid - h / 2., h) xranges = [(st + x, y) for x, y in estsi2] #print '2 xranges', xranges #print '2 yrange', yrange bbhc = BrokenBarHCollection(xranges, yrange, **cargs) ax.add_collection(bbhc) # draw gene name txt = '%s (%.1fkb)' % (a['name'], width / 1000) ax.text(xmid, 0, txt, ha='center', va='bottom', fontsize=FONTSIZE) PP.setp(ax, xticks=[], yticks=[], frame_on=False, xlim=(xmin, xmax), ylim=(0, 1))
chrRng = ['R','7','6','5','4','3','2','1'] mydata = [] for ichr in chrRng: os.chdir("chr%s"%ichr) tcom = np.genfromtxt("compress.out",skip_header=1) for itemcom in tcom: i = itemcom[0] c = itemcom[2] mydata.append(["chr%s"%ichr,12500+i*50000,12500+(i+1)*50000,0,c]) os.chdir("..") #mydata = [["chr1",0,50000,0,1], #["chr1",50000,100000,0,2], #["chr1",100000,150000,0,1],["chr2",150000,200000,0,3],["chr2",200000,250000,0,4]] for xranges, yrange, colors, label in ideograms(mydata): coll = BrokenBarHCollection(xranges, yrange, facecolors=colors) ax.add_collection(coll) center = yrange[0] + yrange[1]/2. yticks.append(center) yticklabels.append(label) d[label] = xranges ax.axis('tight') ax.set_yticks(yticks) ax.set_yticklabels(yticklabels) ax.set_xticks([]) plt.savefig("compress.pdf") plt.show() ### plot compressibilities out as curve if False:
def details(file): height = 0.9 spacing = 0.9 def ideograms(fn): last_chrom = None last_specie = None last_population = None fin = open(fn) fin.readline() xranges, color = [], [] ymin = 0 for line in fin: chrom, start, stop, specie, population, stain = line.strip().split( ) start = int(start) stop = int(stop) width = stop - start if chrom == last_chrom or (last_chrom is None): xranges.append((start, width)) color.append(colors[stain]) last_chrom = chrom last_specie = specie last_population = population continue ymin += height + spacing yrange = (ymin, height) yield xranges, yrange, color, last_chrom, last_specie, last_population xranges, color = [], [] xranges.append((start, width)) color.append(colors[stain]) last_chrom = chrom last_specie = specie last_population = population # last one ymin += height + spacing yrange = (ymin, height) yield xranges, yrange, color, last_chrom, last_specie, last_population fig = plt.figure(figsize=(15, 15)) ax = fig.add_subplot(111) d = {} yticks = [] yticklabels = [] for xranges, yrange, color, chms, species, populations in ideograms(file): coll = BrokenBarHCollection(xranges, yrange, facecolors=color) ax.add_collection(coll) center = yrange[0] + yrange[1] / 2. yticks.append(center) label = '%s %s' % (species, populations) yticklabels.append(label) d[chms] = xranges values = [] bp = [] for inter in xranges: values.append(inter[0]) bp.append(inter[1]) values.append(bp[-1] + values[-1]) for i in range(0, len(values) - 1): xlabel = '%d bp' % (bp[i]) ax.annotate(xlabel, xy=(values[i + 1] / 2 + values[i] / 2, yrange[0] + yrange[1]), xytext=(values[i + 1] / 2 + values[i] / 2, yrange[0] + yrange[1])) ax.set_title('Chromosomes') ax.axis('tight') ax.set_yticks(yticks) ax.set_yticklabels(yticklabels) ax.set_xticks([]) plt.show(block=False)
def main(args, parser): # Get the contig length list ctglens = dict() with open(args.fai, 'r') as fai: for l in fai: s = l.rstrip().split() ctglens[s[0]] = s[1] # Create windows winlist = defaultdict(list) # offset bp to add for stitching contigs together in one line ctgoffset = dict() lastbp = 0 for c in ctglens: ctgoffset[c] = lastbp + 100 for i in range(0, ctglens[c], args.binsize): winlist[c].append(window(c, i, i + args.binsize)) lastbp += ctglens[c] # read each sam region and count the reads with pysam.AlignmentFile(args.bam, 'rb') as bamfile: for c, w in winlist.items(): for i, win in enumerate(w): count = 0 for s in bamfile.fetch(c, win.start, win.end): if s.is_secondary: continue count += 1 winlist = updateWin(winlist, c, i, count) # Now, read in the human readable text file and process that hapset = set() with open(args.human, 'r') as human: human.readline() for l in human: s = l.rstrip().split() # determine where the contig start falls for i, win in enumerate(winlist[s[2]]): if int(s[3]) < win.end and int(s[3]) >= win.start: winlist = updateWin(winlist, s[2], i, int(s[6]), s[4]) print(f'Updating window: {s[2]} {win.start} {win.end} to {s[6]} for Hap {s[4]}') hapset.add(s[4]) # OK, data is in! Let's try plotting raw = defaultdict(list) bars = list() for c, w in winlist.items(): bars.append([ctgoffset[c], ctglens[c]]) for win in winlist: for h in hapset: raw["contig"].append(c) raw["start"].append(win.start + ctgoffset[c]) raw["end"].append(win.end + ctgoffset[c]) raw["hap"].append(h) raw["count"].append(win.getCount(h)) df = pandas.DataFrame(raw) df.to_csv(args.output + '.wins', sep='\t', header=True) fig = plt.figure(figsize=(6,8)) ax = df[['start', 'hap', 'count']].plot.area(x='start', y='count', colormap='viridis') ax.add_collection(BrokenBarHCollection(bars, [-1, 1], facecolors=plt.get_cmap('tab20'))) ax.axis('tight') plt.savefig(args.output + '.pdf')
def labeled_intervals(intervals, labels, label_set=None, base=None, height=None, extend_labels=True, ax=None, tick=True, **kwargs): '''Plot labeled intervals with each label on its own row. Parameters ---------- intervals : np.ndarray, shape=(n, 2) segment intervals, in the format returned by :func:`mir_eval.io.load_intervals` or :func:`mir_eval.io.load_labeled_intervals`. labels : list, shape=(n,) reference segment labels, in the format returned by :func:`mir_eval.io.load_labeled_intervals`. label_set : list An (ordered) list of labels to determine the plotting order. If not provided, the labels will be inferred from ``ax.get_yticklabels()``. If no ``yticklabels`` exist, then the sorted set of unique values in ``labels`` is taken as the label set. base : np.ndarray, shape=(n,), optional Vertical positions of each label. By default, labels are positioned at integers ``np.arange(len(labels))``. height : scalar or np.ndarray, shape=(n,), optional Height for each label. If scalar, the same value is applied to all labels. By default, each label has ``height=1``. extend_labels : bool If ``False``, only values of ``labels`` that also exist in ``label_set`` will be shown. If ``True``, all labels are shown, with those in `labels` but not in `label_set` appended to the top of the plot. A horizontal line is drawn to indicate the separation between values in or out of ``label_set``. ax : matplotlib.pyplot.axes An axis handle on which to draw the intervals. If none is provided, a new set of axes is created. tick : bool If ``True``, sets tick positions and labels on the y-axis. kwargs Additional keyword arguments to pass to `matplotlib.collection.BrokenBarHCollection`. Returns ------- ax : matplotlib.pyplot.axes._subplots.AxesSubplot A handle to the (possibly constructed) plot axes ''' # Get the axes handle ax, _ = __get_axes(ax=ax) # Make sure we have a numpy array intervals = np.atleast_2d(intervals) if label_set is None: # If we have non-empty pre-existing tick labels, use them label_set = [_.get_text() for _ in ax.get_yticklabels()] # If none of the label strings have content, treat it as empty if not any(label_set): label_set = [] else: label_set = list(label_set) # Put additional labels at the end, in order if extend_labels: ticks = label_set + sorted(set(labels) - set(label_set)) elif label_set: ticks = label_set else: ticks = sorted(set(labels)) style = dict(linewidth=1) style.update(next(ax._get_patches_for_fill.prop_cycler)) # Swap color -> facecolor here so we preserve edgecolor on rects style['facecolor'] = style.pop('color') style.update(kwargs) if base is None: base = np.arange(len(ticks)) if height is None: height = 1 if np.isscalar(height): height = height * np.ones_like(base) seg_y = dict() for ybase, yheight, lab in zip(base, height, ticks): seg_y[lab] = (ybase, yheight) xvals = defaultdict(list) for ival, lab in zip(intervals, labels): if lab not in seg_y: continue xvals[lab].append((ival[0], ival[1] - ival[0])) for lab in seg_y: ax.add_collection(BrokenBarHCollection(xvals[lab], seg_y[lab], **style)) # Pop the label after the first time we see it, so we only get # one legend entry style.pop('label', None) # Draw a line separating the new labels from pre-existing labels if label_set != ticks: ax.axhline(len(label_set), color='k', alpha=0.5) if tick: ax.grid(True, axis='y') ax.set_yticks([]) ax.set_yticks(base) ax.set_yticklabels(ticks, va='bottom') ax.yaxis.set_major_formatter(IntervalFormatter(base, ticks)) if base.size: __expand_limits(ax, [base.min(), (base + height).max()], which='y') if intervals.size: __expand_limits(ax, [intervals.min(), intervals.max()], which='x') return ax
def main(argv): file_metadata = "" dir_post = "" output_mark = "" genome = "" method = -1 file_input_extension = "" file_output_extension = "" title_label = "" libraries = 'all' type_junction = 'all' good_directory_list = [] check_result_tlx = False check_result_filter_stats_txt = False total_junction = 0 #pd.options.mode.chained_assignment = None try: opts, args = getopt.getopt(sys.argv[1:], 'm:g:p:o:e:i:w:t:', [ 'file_metadata=', 'genome=', 'dir_post=', 'output_mark=', 'method=', 'input_mark=', 'libraries=', 'type_junction=', 'help' ]) except getopt.GetoptError: usage() sys.exit(2) ##############################OPTIONS############################## print("\n") for opt, arg in opts: if opt in ('-h', '--help'): usage() sys.exit(2) elif opt in ('-m', '--file_metadata'): file_metadata = arg elif opt in ('-g', '--genome'): genome = arg elif opt in ('-p', '--dir_post'): dir_post = arg elif opt in ('-o', '--output_mark'): output_mark = arg elif opt in ('-e', '--method'): method = arg elif opt in ('-i', '--input_mark'): input_mark = arg elif opt in ('-w', '--libraries'): libraries = arg elif opt in ('-t', '--type_junction'): type_junction = arg else: print("Error : Bad option -> " + opt) usage() sys.exit(2) ##############################CHECK UP/SET UP############################## # CHECK METADATA FILE if file_metadata == "" or not os.path.exists(file_metadata): print("Error : You have to set a metadata file !\n") usage() sys.exit(2) else: # READ METADATA FILE metadata = pd.read_table(file_metadata, sep='\t') # FILTER METADATA FILE IF GENOME INPUT if genome != "": metadata = metadata.loc[metadata['Assembly'] == genome] if metadata.empty: print( "Error : This assembly does not exist in the metadata file !\n" ) usage() sys.exit(2) else: print("Error : You have to set a genome name !\n") usage() sys.exit(2) # CHECK POSTPROCESS DIRECTORY if not os.path.exists(dir_post): print("Error : You have to set a postprocess directory !\n") usage() sys.exit(2) else: if dir_post[-1] != "/": dir_post += "/" # CHECK METHOD try: method = int(method) if method == 0: title_label = "Number of double stranded breaks after the primer at each position divided by the maximum of breaks by library" elif method == 1: title_label = "Log2 of number of double stranded breaks after the primer at each position divided by log2 of the maximum of breaks by library" else: print("Error : Method option needs to be 0 or 1 !\n") usage() sys.exit(2) except: print( "Error : You have to set an integer (0 or 1) to method option !\n") usage() sys.exit(2) # CHECK INPUT MARKS HISTORY if input_mark == "": print("Warning : You will process the raw file !\n") # CHECK OUTPUT MARK if output_mark == "": print("Error : You have to set an output mark !\n") usage() sys.exit(2) # SELECT INPUT FILES if input_mark == "": file_input_extension = ".tlx" else: file_input_extension = "_" + "_".join(input_mark.split(",")) + ".tlx" # TEST IF INPUT EXIST IN AT LEAST ON LIBRARY check_input_mark = False for library in metadata['Library'].tolist(): if os.path.exists(dir_post + library + "/" + library + "_Legitimate" + file_input_extension) or os.path.exists( dir_post + library + "/" + library + "_Illegitimate" + file_input_extension): check_input_mark = True if not check_input_mark: print( "Error : Your input marks can not localize a good legitimate or illegitimate file !\n" ) usage() sys.exit(2) # CHECK LIBRARIES array_libraries = [] if libraries == 'all': array_libraries = metadata['Library'].tolist() else: for library in libraries.split(","): if library in metadata['Library'].tolist(): array_libraries.append(library) else: print("Warning : {" + library + "} does not exist in metadata file with this genome !") print("Warning : {" + library + "} will not be used !") if type_junction != 'all' and type_junction != 'legitimate' and type_junction != 'illegitimate': print( "Error : Type junction only stand for all, legitimate or illegitimate value !\n" ) usage() sys.exit(2) # SELECT OUTPUT FILES if file_input_extension != "": file_output_extension = file_input_extension[:-4] + "_" + output_mark + ".png" else: file_output_extension = "_" + output_mark + ".png" ##############################PRINTS############################## print('\n-----------------------------------------') print('Metadata file : ' + file_metadata) print('Genome : ' + genome) print('Postprocess directory : ' + dir_post) print('Method : ' + str(method)) print('Libraries : ' + libraries) print('Type junction : ' + type_junction) print('Input file extension: ' + file_input_extension) print('Output file extension : ' + file_output_extension) print('-----------------------------------------\n') ##############################PROGRAMS############################## fig = plt.figure() ax = fig.add_subplot(111) d = {} yticks = [] yticklabels = [] track_number = 0 yrange = [] coff_leg_ille_graph = 0 # LOOP OVER EACH LIBRARIES for library in array_libraries: # print(library) # CHECK DIRECTORY EXISTS if not os.path.exists(dir_post + library): print("Warning : " + dir_post + " does not contains {" + library + "}") print("Warning : {" + library + "} will not be filtered") else: # CHECK INPUT FILE EXISTS if os.path.exists(dir_post + library + "/" + library + "_Legitimate" + file_input_extension): df_legitimate = pd.read_csv(dir_post + library + "/" + library + "_Legitimate" + file_input_extension, sep='\t', header=0, index_col=None) df_legitimate = df_legitimate.drop( columns=df_legitimate.columns[11:]) else: print("Error : The Legitimate file for " + library + " is missing !\n") usage() sys.exit(2) if os.path.exists(dir_post + library + "/" + library + "_Illegitimate" + file_input_extension): df_illegitimate = pd.read_csv(dir_post + library + "/" + library + "_Illegitimate" + file_input_extension, sep='\t', header=0, index_col=None) df_illegitimate = df_illegitimate.drop( columns=df_illegitimate.columns[11:]) else: print("Error : The Illegitimate file for " + library + " is missing !\n") usage() sys.exit(2) metadata = metadata[(metadata['Library'].isin(array_libraries))] metadata_sort = pd.DataFrame(columns=list(metadata.columns.values)) for i in array_libraries: for index, row in metadata.iterrows(): if i == row['Library']: metadata_sort = metadata_sort.append(row) metadata = metadata_sort #metadata = metadata.sort_values(['Library'], ascending=[True]) global_list = dir_post + metadata['Library'] # for i in global_list: # print(i) nb_files = len(global_list) # DICTIONNARY WITH ILLEGITIMATES AND LEGITIMATES JUNCTIONS distance_dict = {"illegitimates": {}, "legitimates": {}} # OPEN TLX FILES ONE BY ONE for result_file in global_list: label = result_file.split("/")[-1] # YRANGE USED AT THE END TO HORIZONTAL SPACE BAR yrange.append(((track_number * 1.8) + (track_number * 1.8) + 1.8 + coff_leg_ille_graph, 1)) if type_junction == 'all': yrange.append(((track_number * 1.8) + (track_number * 1.8) + 3.6 + coff_leg_ille_graph, 1)) coff_leg_ille_graph += 1 # DICTIONNARY LABEL INSIDE ILLEGITIMATES AND LEGITIMATES DICTIONNARIES distance_dict["illegitimates"][label] = {} distance_dict["legitimates"][label] = {} # with open(result_file+"/"+label+"_result.tlx", 'r') as f_tlx: with open( result_file + "/" + label + "_Legitimate" + file_input_extension, 'r') as f_tlx: f_tlx.readline() for line in f_tlx: if pd.isnull(metadata.loc[metadata['Library'] == label] ['MID'].values[0]): len_mid = 0 else: len_mid = len(metadata.loc[metadata['Library'] == label] ['MID'].values[0]) if pd.isnull(metadata.loc[metadata['Library'] == label] ['Primer'].values[0]): len_primer = 0 else: len_primer = len(metadata.loc[metadata['Library'] == label] ['Primer'].values[0]) distance = int( int(line.split("\t")[9]) - int(line.split("\t")[8])) - (len_mid + len_primer) if distance not in distance_dict["legitimates"][label]: distance_dict["legitimates"][label][distance] = 1 else: distance_dict["legitimates"][label][distance] += 1 total_junction += 1 with open( result_file + "/" + label + "_Illegitimate" + file_input_extension, 'r') as f_tlx: f_tlx.readline() for line in f_tlx: if pd.isnull(metadata.loc[metadata['Library'] == label] ['MID'].values[0]): len_mid = 0 else: len_mid = len(metadata.loc[metadata['Library'] == label] ['MID'].values[0]) if pd.isnull(metadata.loc[metadata['Library'] == label] ['Primer'].values[0]): len_primer = 0 else: len_primer = len(metadata.loc[metadata['Library'] == label] ['Primer'].values[0]) distance = int( int(line.split("\t")[9]) - int(line.split("\t")[8])) - (len_mid + len_primer) if distance not in distance_dict["illegitimates"][label]: distance_dict["illegitimates"][label][distance] = 1 else: distance_dict["illegitimates"][label][distance] += 1 total_junction += 1 # ADD MAX_DISTANCE AND MAX_DISTANCE_COUNT TO ILLEGITIMATES max_distance = 0 max_distance_count = 0 if len(distance_dict["illegitimates"][label]) > 0: # max_distance max_distance = max( distance_dict["illegitimates"][label].iteritems(), key=operator.itemgetter(0))[0] # max_distance_count max_distance_count = max( distance_dict["illegitimates"][label].iteritems(), key=operator.itemgetter(1))[1] distance_dict["illegitimates"][label]["max_distance"] = max_distance distance_dict["illegitimates"][label][ "max_distance_count"] = max_distance_count # ADD MAX_DISTANCE AND MAX_DISTANCE_COUNT TO LEGITIMATES max_distance = 0 max_distance_count = 0 if len(distance_dict["legitimates"][label]) > 0: # max_distance max_distance = max(distance_dict["legitimates"][label].iteritems(), key=operator.itemgetter(0))[0] # max_distance_count max_distance_count = max( distance_dict["legitimates"][label].iteritems(), key=operator.itemgetter(1))[1] distance_dict["legitimates"][label]["max_distance"] = max_distance distance_dict["legitimates"][label][ "max_distance_count"] = max_distance_count track_number += 1 # print(distance_dict) max_distance_all = 0 max_distance_count_all = 0 # Define max_distance_all and max_distance_count_all for legitimate, label_dict in distance_dict.items(): for key, value in label_dict.items(): if max_distance_all < int(label_dict[key]['max_distance']): max_distance_all = int(label_dict[key]['max_distance']) if max_distance_count_all < int( label_dict[key]['max_distance_count']): max_distance_count_all = int( label_dict[key]['max_distance_count']) # print(max_distance_all) # print(max_distance_count_all) yrange_count = 0 for bad_label in global_list: label = bad_label.split("/")[-1] # MANAGE ARRAYS #distance_dict["illegitimates"][label] = manageArray( # distance_dict["illegitimates"][label], max_distance_count_all, method) #distance_dict["legitimates"][label] = manageArray( # distance_dict["legitimates"][label], max_distance_count_all, method) max_label_leg_ille = max( distance_dict["legitimates"][label]["max_distance_count"], distance_dict["illegitimates"][label]["max_distance_count"]) distance_dict["illegitimates"][label] = manageArray( distance_dict["illegitimates"][label], max_label_leg_ille, method) distance_dict["legitimates"][label] = manageArray( distance_dict["legitimates"][label], max_label_leg_ille, method) #distance_dict["illegitimates"][label] = manageArray(distance_dict["illegitimates"][label], total_junction, method) #distance_dict["legitimates"][label] = manageArray(distance_dict["legitimates"][label], total_junction, method) distance_dict["illegitimates"][label] = collections.OrderedDict( sorted(distance_dict["illegitimates"][label].items())) distance_dict["legitimates"][label] = collections.OrderedDict( sorted(distance_dict["legitimates"][label].items())) # CONCATENATE LEGITIMATE AND ILLEGITIMATE DISTANCE AND COUNTS (USE FOR # COLORS) xranges_leg = [] colors_leg = [] xranges_illeg = [] colors_illeg = [] # print(distance_dict["illegitimates"][label]) for key, value in distance_dict["legitimates"][label].items(): xranges_leg.append((key, 1)) colors_leg.append((0.0, 0.0, 0.0, value)) for key, value in distance_dict["illegitimates"][label].items(): xranges_illeg.append((key, 1)) colors_illeg.append((1.0, 0.0, 0.0, value)) ##############################WORKS############################## #xranges=[(29, 1),(30, 1),(31, 1)] #yrange=(1.8, 1) #colors=[(0.0, 0.0, 0.0, 0.012),(0.0, 0.0, 0.0, 0.008),(0.0, 0.0, 0.0, 0.012)] ################################################################# # print("xranges") # print(xranges) # print(type(xranges)) # print("yrange") # print(yrange[yrange_count]) # print(type(yrange[yrange_count])) # print("colors") # print(colors) # print(type(colors)) if type_junction == 'all': # LEGITIMATE DISPLAY coll = BrokenBarHCollection(xranges_leg, yrange[yrange_count], facecolors=colors_leg, edgecolors=colors_leg) ax.add_collection(coll) center = yrange[yrange_count][0] + yrange[yrange_count][1] / 2.0 yticks.append(center) yticklabels.append(label + "_legi") d[label + "legi"] = xranges_leg yrange_count += 1 # ILLEGITIMATE DISPLAY coll = BrokenBarHCollection(xranges_illeg, yrange[yrange_count], facecolors=colors_illeg, edgecolors=colors_illeg) ax.add_collection(coll) center = yrange[yrange_count][0] + yrange[yrange_count][1] / 2.0 yticks.append(center) yticklabels.append(label + "_illegi") d[label + "illegi"] = xranges_illeg yrange_count += 1 elif type_junction == 'legitimate': # LEGITIMATE DISPLAY coll = BrokenBarHCollection(xranges_leg, yrange[yrange_count], facecolors=colors_leg, edgecolors=colors_leg) ax.add_collection(coll) center = yrange[yrange_count][0] + yrange[yrange_count][1] / 2.0 yticks.append(center) yticklabels.append(label + "_legi") d[label + "legi"] = xranges_leg yrange_count += 1 elif type_junction == 'illegitimate': # ILLEGITIMATE DISPLAY coll = BrokenBarHCollection(xranges_illeg, yrange[yrange_count], facecolors=colors_illeg, edgecolors=colors_illeg) ax.add_collection(coll) center = yrange[yrange_count][0] + yrange[yrange_count][1] / 2.0 yticks.append(center) yticklabels.append(label + "_illegi") d[label + "illegi"] = xranges_illeg yrange_count += 1 else: print( "Error in Type junction attribute : all, legitimate or illegitimate value !\n" ) usage() sys.exit(2) # SET UP DISPLAY BACKGROUND ax.axis('tight') ax.set_xlim([0, max_distance_all + 10]) ax.set_yticks(yticks) ax.set_yticklabels(yticklabels) step_length = (int(max_distance_all / 100) * 100) / 20 if step_length == 0: step_length = 5 ax.set_xticks(range(0, max_distance_all + 10, step_length)) ax.get_xaxis().get_major_formatter().set_scientific(False) plt.xlabel("Position from bait primer") plt.axes().xaxis.set_minor_locator(MultipleLocator(1)) # SET UP TITLE plt.title(title_label, fontdict=None, loc='center') fig = plt.gcf() fig.set_size_inches(22, 10) # SAVE DISPLAY fig.savefig(dir_post + genome + file_output_extension, format='png', bbox_inches='tight')