Ejemplo n.º 1
0
    def goto(self, gr: GR = None, gr2: GR = None):
        """
        Go to the range on the genome.

        Parameters
        ----------
        gr2
        gr : {str, GenomeRange}
            The range string,
            like "chr1:1000000-2000000", or GenomeRange object.

        Examples
        --------
        >>> frame = Frame()
        >>> frame.goto("chrX:3000000-5000000")
        >>> str(frame.current_range)
        'chrX:3000000-5000000'
        >>> frame.goto(GenomeRange("chr1", 1000, 2000))
        >>> str(frame.current_range)
        'chr1:1000-2000'
        """
        if gr:
            self.current_range = gr if isinstance(
                gr, GenomeRange) else GenomeRange(gr)
        if gr2:
            self.current_range2 = gr if isinstance(
                gr2, GenomeRange) else GenomeRange(gr2)
Ejemplo n.º 2
0
    def fetch_intervals(self, gr: GenomeRange):
        """

        Parameters
        ----------
        gr : {str, GenomeRange}

        Returns
        -------
        intervals : pandas.core.frame.DataFrame
            Annotation interval table.
        """
        rows = [
            row
            for row in tabix_query(self.bgz_file, gr.chrom, gr.start, gr.end)
        ]
        if not rows:
            gr.change_chrom_names()
            for row in tabix_query(self.bgz_file, gr.chrom, gr.start, gr.end):
                rows.append(row)

        columns = [
            'seqname', 'source', 'feature', 'start', 'end', 'score', 'strand',
            'frame', 'attribute'
        ]
        df = pd.DataFrame(rows, columns=columns)
        df['start'] = df['start'].astype(int)
        df['end'] = df['end'].astype(int)
        df['gene_name'] = df['attribute'].str.extract(
            ".*gene_name (.*?) ").iloc[:, 0].str.strip('\";')
        df['gene_name'].fillna("", inplace=True)
        return df
Ejemplo n.º 3
0
 def plot(self, ax, chrom_region, region_start, region_end):
     self.ax = ax
     grange = GenomeRange(chrom_region, region_start, region_end)
     if grange.chrom not in self.interval_tree:
         grange.change_chrom_names()
     bands_in_region = sorted(
         self.interval_tree[grange.chrom][grange.start:grange.end])
     band_height = self.properties['height']
     xranges, colors = [], []
     for itv in bands_in_region:
         start, end = itv.begin, itv.end
         band_name, band_type = itv.data[:2]
         band_color = self.lookup_band_color(band_type)
         xranges.append((start, end))
         colors.append(band_color)
         if self.properties['show_band_name'] != 'no':
             if grange.length < 80_000_000:
                 self.plot_text(band_name, start, end, band_color)
     coll = BrokenBarHCollection(xranges, (0, band_height),
                                 facecolors=colors,
                                 linewidths=self.properties['border_width'],
                                 edgecolors=self.properties['border_color'])
     ax.add_collection(coll)
     ax.set_ylim(-0.1, band_height + 0.1)
     ax.set_xlim(region_start, region_end)
     self.plot_label()
Ejemplo n.º 4
0
    def fetch_data(self, gr: GenomeRange, **kwargs):
        gr = to_gr(gr)
        if gr.chrom not in list(self.interval_tree):
            gr.change_chrom_names()

        return [
            (region.begin, region.end, region.data)
            for region in sorted(
                self.interval_tree[gr.chrom][gr.start - 10000 : gr.end + 10000]
            )
        ]
Ejemplo n.º 5
0
 def fetch_data(self, gr: GenomeRange, **kwargs):
     if gr.chrom not in self.interval_tree:
         gr.change_chrom_names()
     bands_in_region = sorted(self.interval_tree[gr.chrom][gr.start:gr.end])
     rows = []
     for itv in bands_in_region:
         start, end = itv.begin, itv.end
         band_name, band_type = itv.data[:2]
         rows.append([gr.chrom, start, end, band_name, band_type])
     fields = ['chrom', 'start', 'end', 'band_name', 'band_type']
     return pd.DataFrame(rows, columns=fields)
Ejemplo n.º 6
0
    def goto(self, gr1=None, gr2=None):
        if gr1 is not None:
            gr1 = GenomeRange(gr1)
        if gr2 is not None:
            gr2 = GenomeRange(gr2)
        if gr1 is None:
            gr1 = self.current_range[0]
        if gr2 is None:
            gr2 = gr1

        if gr1 is None or gr2 is None:
            raise ValueError("No history gr found.")
        self.current_range = [gr1, gr2]
Ejemplo n.º 7
0
    def fetch_data(self, gr: GenomeRange):
        vlines_list = []

        if gr.chrom not in list(self.vlines_intval_tree):
            gr.change_chrom_names()

        for region in sorted(self.vlines_intval_tree[gr.chrom][gr.start -
                                                               1:gr.end + 1]):
            vlines_list.append(region.begin)
            if region.end != region.begin:
                vlines_list.append(region.end)

        return vlines_list
Ejemplo n.º 8
0
    def goto(self, genome_range, who=None):
        if isinstance(genome_range, str):
            genome_range = GenomeRange(genome_range)
        if not self.chrom_lengthes.check_range(genome_range):
            log.warning("The genome range {} is not valid.".format(genome_range))
            return
        self.current_range = genome_range
        frame_range = GenomeRange(genome_range.chrom,
                                  genome_range.start - 1, # NOTE: frame's start is zero based
                                  genome_range.end)
        self.frame.goto(frame_range)

        self.widgets.refresh_widgets(who=who)
Ejemplo n.º 9
0
 def __intervaltree_from_list(self, region_list):
     itree = {}
     for r in region_list:
         if isinstance(r, str):
             grange = GenomeRange(r)
         elif isinstance(r, tuple):
             grange = GenomeRange(r[0], r[1], r[2])
         elif isinstance(r, GenomeRange):
             grange = r
         else:
             raise ValueError("position must be a tuple or string.")
         chr_ = grange.chrom
         itree.setdefault(chr_, IntervalTree())
         itree[chr_][grange.start:grange.end + 1] = grange
     return itree
Ejemplo n.º 10
0
    def __init__(self, frame, reference_genome='hg19',
                 init_range=None, widgets_box='simple',
                 dpi=None, img_format='svg'):
        """
        Parameters
        ----------
        frame : coolbox.core.Frame
            Browser's main frame.

        reference_genome : str, optional
            Reference genome,
            built-in references:('hg19', 'hg38', 'mm9', 'mm10')
            if you want use other genome, you can specify the "chromosome length file",
            that is a tab splited file, first column is the chromosomes,
            and second column is the length of correspond chromosome. ['hg19']

        init_range : str, optional
            Initial browser range.

        widgets_box : {'simple', 'full'}, optional
            WidgetsBox sub class, default SimpleWidgets

        dpi : int, optional
            The dpi of frame's image.

        img_format : str, optional
            Frame image format, default svg.
        """

        self.dpi = dpi
        self.img_format = img_format
        self.frame = frame

        if reference_genome in BUILT_IN_GENOMES:
            self.chrom_lengthes = BUILT_IN_GENOMES[reference_genome]
        else:
            self.chrom_lengthes = GenomeLength(reference_genome)
            if len(self.chrom_lengthes) == 0:
                raise IOError("chromosome lengthes file is not include any useful information."
                              "Please check file \"{}\".".format(reference_genome))

        if init_range is not None:
            self.current_range = GenomeRange(init_range)
        else:
            self.current_range = self.get_init_range()

        if widgets_box == 'simple':
            self.widgets = SimpleWidgets(self)
        elif widgets_box == 'full':
            self.widgets = FullWidgets(self)
        else:
            raise NotImplementedError("widgets type {} not support, please use 'simple' or 'full'".format(widgets_box))

        self.goto(self.current_range)
        self.fig = None

        # cache figs in dict, speed up the figure display process.
        #   key: genome range
        #   value: fig image bytes
        self.fig_cache = {}
Ejemplo n.º 11
0
 def __init__(self,
              hic_track_or_file,
              genome_position,
              args_hic=None,
              **kwargs):
     if isinstance(hic_track_or_file, str):
         args_hic = args_hic or {}
         hic_track = HiCMat(hic_track_or_file, **args_hic)
     else:
         hic_track = hic_track_or_file
     properties_dict = {
         'hic': hic_track,
         'color': Virtual4C.DEFAULT_COLOR,
         'height': Virtual4C.DEFAULT_HEIGHT,
         'genome_position': genome_position,
         'bin_width': 3,
         'max_value': 'auto',
         'min_value': 'auto',
         'show_data_range': True,
         'data_range_style': 'y-axis',
         'style': 'line:1',
         'title': '',
     }
     properties_dict.update(kwargs)
     super().__init__(properties_dict)
     self.hic = self.properties['hic']
     self.position = GenomeRange(self.properties['genome_position'])
     self.bin_width = self.properties['bin_width']
     self.properties['type'] = self.properties['style']
Ejemplo n.º 12
0
 def fetch_data(self, gr: GenomeRange, **kwargs):
     ix_chrom = self.properties['col_chrom']
     ix_pos = self.properties['col_pos']
     ix_pval = self.properties['col_pval']
     rows = self.load_range(gr)
     if len(rows) == 0:
         gr.change_chrom_names()
         rows = self.load_range(gr)
     df = pd.DataFrame(rows)
     if df.shape[0] > 0:
         columns = [f'col_{i}' for i in range(df.shape[1])]
         columns[ix_chrom] = "chrom"
         columns[ix_pos] = "pos"
         columns[ix_pval] = "score"
         df.columns = columns
     return df
Ejemplo n.º 13
0
    def plot(self, ax, chrom_region, start_region, end_region):
        self.ax = ax

        genome_range = GenomeRange(chrom_region, start_region, end_region)
        log.debug("plotting {}".format(self.properties['file']))

        num_bins = self.__get_bins_num()
        self.__check_chrom_name(genome_range)
        scores_per_bin = self.__get_scores_per_bin(genome_range, num_bins)

        x_values = np.linspace(genome_range.start, genome_range.end, num_bins)

        if 'type' in self.properties and self.properties['type'] != 'fill':
            self.__plot_line_or_points(scores_per_bin, x_values)
        else:
            self.__plot_fill(scores_per_bin, x_values)

        ymin, ymax = self.__adjust_plot(genome_range)

        if "show_data_range" in self.properties and self.properties["show_data_range"] == 'no':
            pass
        else:
            self.genome_range = genome_range
            self.plot_data_range(ymin, ymax, self.properties['data_range_style'])

        self.plot_label()

        return self.ax
Ejemplo n.º 14
0
 def plot(self, ax, chrom_region, start_region, end_region):
     self.ax = ax
     genome_range = GenomeRange(chrom_region, start_region, end_region)
     itv_df = self.fetch_intervals(genome_range)
     df = itv_df
     if self.has_prop("row_filter"):
         filters = self.properties["row_filter"]
         for filter_ in filters.split(";"):
             try:
                 op_idx = list(re.finditer("[=><!]", filter_))[0].start()
                 l_ = filter_[:op_idx].strip()
                 r_ = filter_[op_idx:]
                 df = eval(f'df[df["{l_}"]{r_}]')
             except IndexError:
                 log.warning(f"row filter {filter_} is not valid.")
     region_length = end_region - start_region
     if self.has_prop("length_ratio_thresh"):
         len_ratio_th = self.properties["length_ratio_thresh"]
         df = df[(df["end"] - df["start"]) > region_length * len_ratio_th]
     features = []
     for _, row in df.iterrows():
         gf = GraphicFeature(
             start=row['start'],
             end=row['end'],
             strand=(1 if row['strand'] == '+' else -1),
             label=row['gene_name'],
             color=random.choice(self.colors),
         )
         features.append(gf)
     record = GraphicRecord(sequence_length=end_region - start_region,
                            features=features,
                            first_index=start_region)
     record.plot(ax=ax, with_ruler=False, draw_line=False)
     self.plot_label()
Ejemplo n.º 15
0
 def __intervaltree_from_list(self, vlines_list):
     from intervaltree import IntervalTree
     itree = {}
     for v in vlines_list:
         if isinstance(v, str):
             grange = GenomeRange(v)
         elif isinstance(v, tuple):
             grange = GenomeRange(v[0], v[1], v[1])
         elif isinstance(v, GenomeRange):
             grange = v
         else:
             raise ValueError("position must be a tuple or string.")
         chr_ = grange.chrom
         itree.setdefault(chr_, IntervalTree())
         itree[chr_][grange.start:grange.end + 1] = grange
     return itree
Ejemplo n.º 16
0
    def plot(self, ax, chrom_region, start_region, end_region):
        self.ax = ax

        self._out_of_bound = False

        log.debug("plotting {}".format(self.properties['file']))

        genome_range = GenomeRange(chrom_region, start_region, end_region)

        self.ax = ax

        # fetch matrix and perform transform process
        if self.style == STYLE_WINDOW:
            arr, fetch_region = self.__fetch_window_matrix(genome_range)
            self.fetch_region = fetch_region
        else:
            arr = self.__fetch_matrix(genome_range)

        self.matrix = arr

        # plot matrix
        img = self.__plot_matrix(genome_range)
        self.__adjust_figure(genome_range)

        # plot colorbar
        if self.properties['color_bar'] == 'yes':
            if hasattr(self, 'y_ax') and self.style == STYLE_WINDOW:
                self.__plot_colorbar(img, orientation='vertical')
            else:
                self.__plot_colorbar(img, orientation='horizontal')
        else:
            pass

        # plot label
        self.plot_label()
Ejemplo n.º 17
0
    def __init__(self, *args, **kwargs):
        super().__init__({}, OrderedDict())

        # init range
        if 'genome_range' in kwargs:
            range_ = kwargs['genome_range']
            if isinstance(range_, GenomeRange):
                self.current_range = range_
            else:
                # init from genome range string
                # e.g. `frame = Frame(genome_range="chr1:1000-2000")`
                self.current_range = GenomeRange(range_)
        else:
            self.current_range = None

        # set properties
        if 'width' in kwargs:
            self.properties['width'] = kwargs['width']
        else:
            self.properties['width'] = Frame.DEFAULT_WIDTH

        if 'width_ratios' in kwargs:
            self.properties['width_ratios'] = kwargs['width_ratios']
        else:
            self.properties['width_ratios'] = Frame.DEFAULT_WIDTH_RATIOS

        if 'margins' in kwargs:
            self.properties['margins'] = kwargs['margins']
        else:
            self.properties['margins'] = Frame.DEFAULT_MARGINS

        if 'title' in kwargs:
            self.properties['title'] = kwargs['title']
Ejemplo n.º 18
0
    def fetch_intervals(self, genome_range: Union[str, GenomeRange]):
        """
        Fetch intervals within input chromosome range.
        """
        chrom, start, end = split_genome_range(genome_range)
        gr = GenomeRange(chrom, start, end)

        rows = self.__load(gr)
        if len(rows) == 0:
            chrom = change_chrom_names(chrom)
            rows = self.__load(GenomeRange(chrom, start, end))

        intval_table = pd.DataFrame(
            rows, columns=['chromsome', 'start', 'end', 'score'])

        return intval_table
Ejemplo n.º 19
0
    def fetch_intervals(self, gr: GenomeRange):
        """

        Parameters
        ----------
        gr : {str, GenomeRange}

        Returns
        -------
        intervals : pandas.core.frame.DataFrame
            Annotation interval table.
        """
        rows = [
            row
            for row in tabix_query(self.bgz_file, gr.chrom, gr.start, gr.end)
        ]
        if not rows:
            gr.change_chrom_names()
            for row in tabix_query(self.bgz_file, gr.chrom, gr.start, gr.end):
                rows.append(row)

        columns = [
            'seqname', 'source', 'feature', 'start', 'end', 'score', 'strand',
            'frame', 'attribute'
        ]
        df = pd.DataFrame(rows, columns=columns)
        df['start'] = df['start'].astype(int)
        df['end'] = df['end'].astype(int)
        name_attr = self.properties.get("name_attr", "auto")
        if name_attr == "auto":
            gene_name = df['attribute'].str.extract(
                ".*gene_name (.*?) ").iloc[:, 0].str.strip('\";')
            if gene_name.hasnans:
                gene_id = df['attribute'].str.extract(
                    ".*gene_id (.*?) ").iloc[:, 0].str.strip('\";')
                gene_name.fillna(gene_id, inplace=True)
                if gene_name.hasnans:
                    pos_str = df['seqname'].astype(str) + ":" +\
                              df['start'].astype(str) + "-" +\
                              df['end'].astype(str)
                    gene_name.fillna(pos_str, inplace=True)
            df['feature_name'] = gene_name
        else:
            df['feature_name'] = df['attribute'].str.extract(
                f".*{name_attr} (.*?) ").iloc[:, 0].str.strip('\";')
        return df
Ejemplo n.º 20
0
 def chrom_dropdown_val_change(change):
     new_chrom = change['new']
     current_range = browser.current_range
     # only change chromosome
     range_ = GenomeRange(new_chrom, current_range.start, current_range.end)
     range_ = browser.chrom_lengthes.bound_range(range_)
     browser.goto(range_, who='chromosomes_list')
     browser.refresh()
Ejemplo n.º 21
0
 def plot(self, ax, chrom_region, start_region, end_region):
     gr = GenomeRange(chrom_region, start_region, end_region)
     ptype = self.properties.get("plot_type", "alignment")
     self.ax = ax
     if ptype == "alignment":
         self.plot_align(ax, gr)
     else:
         self.plot_coverage(ax, gr)
Ejemplo n.º 22
0
    def fetch_data(self, gr: GenomeRange, **kwargs) -> pd.DataFrame:
        rows = self.load(gr)
        if len(rows) == 0:
            gr.chrom = change_chrom_names(gr.chrom)
            rows = self.load(gr)

        return pd.DataFrame(rows,
                            columns=['chromsome', 'start', 'end', 'score'])
Ejemplo n.º 23
0
 def go_left(self, step_ratio=0.5, dry_run=False):
     window_size = self.window_size
     step = int(window_size * step_ratio)
     start = self.current_range.start - step
     end = self.current_range.end - step
     genome_range = GenomeRange(self.current_range.chrom, start, end)
     genome_range = self.chrom_lengthes.bound_range(genome_range)
     if dry_run:
         return genome_range
     else:
         self.goto(genome_range)
Ejemplo n.º 24
0
    def get_init_range(self, chrom=None):
        """
        Generate an initial range within a chromosome.

        Args:
            chrom (str, optional): initial choromosome.

        Return:
            (:obj:`GenomeRange`)
        """

        if chrom is None:
            chrom = list(self.chrom_lengthes.keys())[0]

        default_length = 10**7

        if self.chrom_lengthes[chrom] > default_length:
            return GenomeRange(chrom, 1, default_length)
        else:
            return GenomeRange(chrom, 1, self.chrom_lengthes[chrom])
Ejemplo n.º 25
0
    def plot(self, ax, chrom_region, start_region, end_region):
        gr = GenomeRange(chrom_region, start_region, end_region)
        vlines_list = self.fetch_data(gr)

        ymin, ymax = ax.get_ylim()

        ax.vlines(vlines_list, ymin, ymax,
                  linestyle=self.properties['line_style'],
                  linewidth=self.properties['line_width'],
                  color=self.properties['color'],
                  alpha=self.properties['alpha'])
Ejemplo n.º 26
0
        def range_slider_val_change(change):
            start_old, end_old = change['old']
            length_old = end_old - start_old

            start, end = change['new']
            chrom = browser.current_range.chrom
            if end - start <= 0:
                end = start + length_old
            new_range = GenomeRange(chrom, start, end)
            new_range = browser.chrom_lengthes.bound_range(new_range)
            browser.goto(new_range, who='range_slider')
            browser.refresh()
Ejemplo n.º 27
0
 def plot(self, ax, chrom_region, start_region, end_region):
     self.ax = ax
     genome_range = GenomeRange(chrom_region, start_region, end_region)
     self.genome_range = genome_range
     plot_data = self.fetch_plot_data(genome_range)
     if plot_data is not None:
         if isinstance(plot_data, tuple):
             scores_per_bin, x_values = plot_data
         else:
             scores_per_bin, x_values = plot_data, None
         self.plot_coverage(ax, genome_range, scores_per_bin, x_values)
     self.plot_label()
Ejemplo n.º 28
0
 def zoom_out(self, zoom_ratio=2, dry_run=False):
     window_size = self.window_size
     window_size = window_size * zoom_ratio
     start = self.center - window_size // 2
     end = start + window_size
     genome_range = GenomeRange(self.current_range.chrom, start, end)
     genome_range = self.chrom_lengthes.bound_range(genome_range)
     self.goto(genome_range)
     if dry_run:
         return genome_range
     else:
         self.goto(genome_range)
Ejemplo n.º 29
0
    def plot(self, ax, gr: GenomeRange, **kwargs):
        gr = GenomeRange(gr)
        vlines_list = self.fetch_data(gr)

        ymin, ymax = ax.get_ylim()

        ax.vlines(vlines_list,
                  ymin,
                  ymax,
                  linestyle=self.properties['line_style'],
                  linewidth=self.properties['line_width'],
                  color=self.properties['color'],
                  alpha=self.properties['alpha'])
Ejemplo n.º 30
0
 def __init__(self,
              hicmat: Union[str, HicMatBase],
              genome_position: str,
              args_hic: dict = None,
              **kwargs):
     properties = Virtual4C.DEFAULT_PROPERTIES.copy()
     properties.update({
         "genome_position": genome_position,
         **kwargs,
     })
     super().__init__(hicmat, args_hic, **properties)
     self.position = GenomeRange(self.properties['genome_position'])
     self.bin_width = self.properties['bin_width']