Beispiel #1
0
    def plot(self, regions):
        if isinstance(regions, tuple):
            x_region, y_region = regions
        else:
            x_region = regions
            y_region = x_region

        if isinstance(x_region, string_types):
            x_region = GenomicRegion.from_string(x_region)

        if isinstance(y_region, string_types):
            y_region = GenomicRegion.from_string(y_region)

        self._current_chromosome_x = x_region.chromosome
        self._current_chromosome_y = y_region.chromosome

        if self.ax is None:
            self.ax = plt.gca()
        self._before_plot((x_region, y_region))
        plot_output = self._plot((x_region, y_region))
        self._after_plot((x_region, y_region))

        if plot_output is None:
            return self.fig, self.ax
        return plot_output
Beispiel #2
0
    def _subset_rows(self, key):
        """
        Iterate over a subset of regions given the specified key.

        :param key: A :class:`~GenomicRegion` object,
                    or a list of the former. Also accepts slices and integers
        :return: Iterator over the specified subset of regions
        """
        if isinstance(key, slice):
            for row in self._regions.where("(ix >= {}) & (ix < {})".format(
                    key.start, key.stop)):
                yield row
        elif isinstance(key, int):
            yield self._regions[key]
        elif isinstance(key, list) and len(key) > 0 and isinstance(
                key[0], int):
            for ix in key:
                yield self._regions[ix]
        else:
            if isinstance(key, string_types):
                key = GenomicRegion.from_string(key)

            if isinstance(key, GenomicRegion):
                keys = [key]
            else:
                keys = key

            for k in keys:
                if isinstance(k, string_types):
                    k = GenomicRegion.from_string(k)

                query = '('
                if k.chromosome is not None:
                    query += "(chromosome == b'%s') & " % k.chromosome
                if k.end is not None:
                    query += "(start <= %d) & " % k.end
                if k.start is not None:
                    query += "(end >= %d) & " % k.start
                if query.endswith(' & '):
                    query = query[:-3]
                query += ')'

                if len(query) == 2:
                    for row in self._regions:
                        yield row
                else:
                    for row in self._regions.where(query):
                        yield row
Beispiel #3
0
    def _convert_key(self, key, region_trees):
        if isinstance(key, string_types):
            key = GenomicRegion.from_string(key)

        if isinstance(key, GenomicRegion):
            start = None
            stop = None
            try:
                key_start = 0 if key.start is None else max(0, key.start - 1)
                key_end = key.end
                for interval in region_trees[
                        key.chromosome][key_start:key_end]:
                    i = interval.data
                    start = min(i, start) if start is not None else i
                    stop = max(i + 1, stop) if stop is not None else i + 1
            except KeyError:
                raise ValueError("Requested chromosome {} was not "
                                 "found in this matrix.".format(
                                     key.chromosome))

            if start is None or stop is None:
                raise ValueError(
                    "Requested region {} was not found in this matrix.".format(
                        key))

            return slice(start, stop, 1)
        return key
Beispiel #4
0
    def test_get_node_x_by_region(self):
        region1 = GenomicRegion.from_string('chr1')
        nodes1 = list(self.hic.regions(region1))
        assert len(nodes1) == 5

        region2 = GenomicRegion.from_string('chr2')
        nodes2 = list(self.hic.regions(region2))
        assert len(nodes2) == 3

        region3 = GenomicRegion.from_string('chr3')
        nodes3 = list(self.hic.regions(region3))
        assert len(nodes3) == 4

        region4 = GenomicRegion.from_string('chr1:3452-6000')
        nodes4 = list(self.hic.regions(region4))
        assert len(nodes4) == 2

        region5 = GenomicRegion.from_string('chr1:1-51000')
        nodes5 = list(self.hic.regions(region5))
        assert len(nodes5) == 5
Beispiel #5
0
 def plot(self, region):
     if isinstance(region, string_types):
         region = GenomicRegion.from_string(region)
     if self.fix_chromosome:
         chromosome = region.chromosome
         if chromosome.startswith('chr'):
             chromosome = chromosome[3:]
         else:
             chromosome = 'chr' + chromosome
         region = GenomicRegion(chromosome=chromosome,
                                start=region.start,
                                end=region.end)
     self._plot(region)
Beispiel #6
0
    def plot(self, region):
        if isinstance(region, string_types):
            region = GenomicRegion.from_string(region)
        if self.fix_chromosome:
            region = region.fix_chromosome(copy=True)
        if self.ax is None:
            self.ax = plt.gca()
        self._before_plot(region)
        plot_output = self._plot(region)
        self._after_plot(region)

        if plot_output is None:
            return self.fig, self.ax
        return plot_output
Beispiel #7
0
    def __init__(self, hic_data, slice_region, names=None,
                 colors=None, fill=None,
                 buffering_strategy="relative", buffering_arg=1,
                 weight_field=None, default_value=None, **kwargs):
        """
        :param hic_data: :class:`~fanc.Hic` or :class:`~fanc.RegionMatrix`. Can be list of
                         multiple Hi-C datasets.
        :param slice_region: String ("2L:1000000-1500000") or :class:`~GenomicRegion`.
                             All interactions involving this region are shown.
        :param names: If multiple Hi-C datasets are provided, can pass a list of names.
                      Are used as names in the legend of the plot.
        :param buffering_strategy: A valid buffering strategy for class:`~BufferedMatrix`
        :param buffering_arg: Adjust range of buffering for class:`~BufferedMatrix`
        """
        kwargs.setdefault("aspect", .3)
        super(HicSlicePlot, self).__init__(**kwargs)
        if not isinstance(hic_data, (list, tuple)):
            hic_data = [hic_data]
        self.hic_buffers = []
        for h in hic_data:
            hb = prepare_hic_buffer(h,
                                    buffering_strategy=buffering_strategy,
                                    buffering_arg=buffering_arg,
                                    weight_field=weight_field,
                                    default_value=default_value)
            self.hic_buffers.append(hb)
        self.names = names
        if isinstance(slice_region, string_types):
            slice_region = GenomicRegion.from_string(slice_region)
        self.slice_region = slice_region
        self.x = None
        self.y = None
        self.lines = []
        self.fill = fill

        if colors is None:
            prop_cycle = plt.rcParams['axes.prop_cycle']
            colors = prop_cycle.by_key()['color']
        elif isinstance(colors, string_types):
            colors = [colors]

        self.colors = colors
Beispiel #8
0
    def sub_sequence(self, chromosome, start=None, end=None):
        """
        Extract the chromosome DNA sequence between start and end.

        :param chromosome: Name of chromosome
        :param start: start position in bp (1-based, inclusive)
        :param end: end position in bp (1-based, inclusive)
        :return: str
        """
        if start is not None:
            selection_region = GenomicRegion(chromosome=chromosome,
                                             start=start,
                                             end=end)
        elif isinstance(chromosome, GenomicRegion):
            selection_region = chromosome
        else:
            selection_region = GenomicRegion.from_string(chromosome)

        res_chromosome = self[selection_region.chromosome]
        if selection_region.start is None:
            return res_chromosome.sequence
        return res_chromosome.sequence[selection_region.start -
                                       1:selection_region.end]
Beispiel #9
0
def hic_contact_plot_linear(hic, regions, output=None, window_size=1000000):
    contact_list = []
    half_window = int(window_size / 2)
    bin_size = hic.bin_size
    for i, feature_region in enumerate(regions):
        if isinstance(feature_region, string_types):
            feature_region = GenomicRegion.from_string(feature_region)
        center = feature_region.start + int(
            (feature_region.end - feature_region.start) / 2)
        center_region = GenomicRegion(chromosome=feature_region.chromosome,
                                      start=center,
                                      end=center)

        center_node = hic.get_node(center_region)

        left_region = GenomicRegion(chromosome=feature_region.chromosome,
                                    start=max(1,
                                              center_node.start - half_window),
                                    end=center_node.start)

        right_region = GenomicRegion(chromosome=feature_region.chromosome,
                                     start=center_node.end + 1,
                                     end=center_node.end + half_window)

        hic_left = hic[center_region, left_region][0]
        for j in range(0, len(hic_left)):
            j_r = len(hic_left) - j - 1
            label = -1 * bin_size * j
            val = hic_left[j_r]
            contact_list.append([label, val, str(i), 'data'])

        hic_right = hic[center_region, right_region][0]
        for j in range(0, len(hic_right)):
            label = bin_size * (j + 1)
            val = hic_right[j]
            contact_list.append([label, val, str(i), 'data'])

    df = pandas.DataFrame(contact_list,
                          columns=["distance", "contacts", "region", "type"])

    if output is not None:
        old_backend = plt.get_backend()
        plt.switch_backend('pdf')
        plt.ioff()

    tsplot = sns.tsplot(data=df,
                        time="distance",
                        unit="region",
                        condition="type",
                        value="contacts",
                        estimator=np.median,
                        err_style="unit_traces",
                        err_palette="Reds")

    if output is not None:
        tsplot.figure.savefig(output)
        plt.close(tsplot.figure)
        plt.ion()
        plt.switch_backend(old_backend)
    else:
        plt.show()

    return df