def plot(self, regions): if isinstance(regions, tuple): x_region, y_region = regions else: x_region = regions y_region = x_region if isinstance(x_region, string_types): x_region = GenomicRegion.from_string(x_region) if isinstance(y_region, string_types): y_region = GenomicRegion.from_string(y_region) self._current_chromosome_x = x_region.chromosome self._current_chromosome_y = y_region.chromosome if self.ax is None: self.ax = plt.gca() self._before_plot((x_region, y_region)) plot_output = self._plot((x_region, y_region)) self._after_plot((x_region, y_region)) if plot_output is None: return self.fig, self.ax return plot_output
def _subset_rows(self, key): """ Iterate over a subset of regions given the specified key. :param key: A :class:`~GenomicRegion` object, or a list of the former. Also accepts slices and integers :return: Iterator over the specified subset of regions """ if isinstance(key, slice): for row in self._regions.where("(ix >= {}) & (ix < {})".format( key.start, key.stop)): yield row elif isinstance(key, int): yield self._regions[key] elif isinstance(key, list) and len(key) > 0 and isinstance( key[0], int): for ix in key: yield self._regions[ix] else: if isinstance(key, string_types): key = GenomicRegion.from_string(key) if isinstance(key, GenomicRegion): keys = [key] else: keys = key for k in keys: if isinstance(k, string_types): k = GenomicRegion.from_string(k) query = '(' if k.chromosome is not None: query += "(chromosome == b'%s') & " % k.chromosome if k.end is not None: query += "(start <= %d) & " % k.end if k.start is not None: query += "(end >= %d) & " % k.start if query.endswith(' & '): query = query[:-3] query += ')' if len(query) == 2: for row in self._regions: yield row else: for row in self._regions.where(query): yield row
def _convert_key(self, key, region_trees): if isinstance(key, string_types): key = GenomicRegion.from_string(key) if isinstance(key, GenomicRegion): start = None stop = None try: key_start = 0 if key.start is None else max(0, key.start - 1) key_end = key.end for interval in region_trees[ key.chromosome][key_start:key_end]: i = interval.data start = min(i, start) if start is not None else i stop = max(i + 1, stop) if stop is not None else i + 1 except KeyError: raise ValueError("Requested chromosome {} was not " "found in this matrix.".format( key.chromosome)) if start is None or stop is None: raise ValueError( "Requested region {} was not found in this matrix.".format( key)) return slice(start, stop, 1) return key
def test_get_node_x_by_region(self): region1 = GenomicRegion.from_string('chr1') nodes1 = list(self.hic.regions(region1)) assert len(nodes1) == 5 region2 = GenomicRegion.from_string('chr2') nodes2 = list(self.hic.regions(region2)) assert len(nodes2) == 3 region3 = GenomicRegion.from_string('chr3') nodes3 = list(self.hic.regions(region3)) assert len(nodes3) == 4 region4 = GenomicRegion.from_string('chr1:3452-6000') nodes4 = list(self.hic.regions(region4)) assert len(nodes4) == 2 region5 = GenomicRegion.from_string('chr1:1-51000') nodes5 = list(self.hic.regions(region5)) assert len(nodes5) == 5
def plot(self, region): if isinstance(region, string_types): region = GenomicRegion.from_string(region) if self.fix_chromosome: chromosome = region.chromosome if chromosome.startswith('chr'): chromosome = chromosome[3:] else: chromosome = 'chr' + chromosome region = GenomicRegion(chromosome=chromosome, start=region.start, end=region.end) self._plot(region)
def plot(self, region): if isinstance(region, string_types): region = GenomicRegion.from_string(region) if self.fix_chromosome: region = region.fix_chromosome(copy=True) if self.ax is None: self.ax = plt.gca() self._before_plot(region) plot_output = self._plot(region) self._after_plot(region) if plot_output is None: return self.fig, self.ax return plot_output
def __init__(self, hic_data, slice_region, names=None, colors=None, fill=None, buffering_strategy="relative", buffering_arg=1, weight_field=None, default_value=None, **kwargs): """ :param hic_data: :class:`~fanc.Hic` or :class:`~fanc.RegionMatrix`. Can be list of multiple Hi-C datasets. :param slice_region: String ("2L:1000000-1500000") or :class:`~GenomicRegion`. All interactions involving this region are shown. :param names: If multiple Hi-C datasets are provided, can pass a list of names. Are used as names in the legend of the plot. :param buffering_strategy: A valid buffering strategy for class:`~BufferedMatrix` :param buffering_arg: Adjust range of buffering for class:`~BufferedMatrix` """ kwargs.setdefault("aspect", .3) super(HicSlicePlot, self).__init__(**kwargs) if not isinstance(hic_data, (list, tuple)): hic_data = [hic_data] self.hic_buffers = [] for h in hic_data: hb = prepare_hic_buffer(h, buffering_strategy=buffering_strategy, buffering_arg=buffering_arg, weight_field=weight_field, default_value=default_value) self.hic_buffers.append(hb) self.names = names if isinstance(slice_region, string_types): slice_region = GenomicRegion.from_string(slice_region) self.slice_region = slice_region self.x = None self.y = None self.lines = [] self.fill = fill if colors is None: prop_cycle = plt.rcParams['axes.prop_cycle'] colors = prop_cycle.by_key()['color'] elif isinstance(colors, string_types): colors = [colors] self.colors = colors
def sub_sequence(self, chromosome, start=None, end=None): """ Extract the chromosome DNA sequence between start and end. :param chromosome: Name of chromosome :param start: start position in bp (1-based, inclusive) :param end: end position in bp (1-based, inclusive) :return: str """ if start is not None: selection_region = GenomicRegion(chromosome=chromosome, start=start, end=end) elif isinstance(chromosome, GenomicRegion): selection_region = chromosome else: selection_region = GenomicRegion.from_string(chromosome) res_chromosome = self[selection_region.chromosome] if selection_region.start is None: return res_chromosome.sequence return res_chromosome.sequence[selection_region.start - 1:selection_region.end]
def hic_contact_plot_linear(hic, regions, output=None, window_size=1000000): contact_list = [] half_window = int(window_size / 2) bin_size = hic.bin_size for i, feature_region in enumerate(regions): if isinstance(feature_region, string_types): feature_region = GenomicRegion.from_string(feature_region) center = feature_region.start + int( (feature_region.end - feature_region.start) / 2) center_region = GenomicRegion(chromosome=feature_region.chromosome, start=center, end=center) center_node = hic.get_node(center_region) left_region = GenomicRegion(chromosome=feature_region.chromosome, start=max(1, center_node.start - half_window), end=center_node.start) right_region = GenomicRegion(chromosome=feature_region.chromosome, start=center_node.end + 1, end=center_node.end + half_window) hic_left = hic[center_region, left_region][0] for j in range(0, len(hic_left)): j_r = len(hic_left) - j - 1 label = -1 * bin_size * j val = hic_left[j_r] contact_list.append([label, val, str(i), 'data']) hic_right = hic[center_region, right_region][0] for j in range(0, len(hic_right)): label = bin_size * (j + 1) val = hic_right[j] contact_list.append([label, val, str(i), 'data']) df = pandas.DataFrame(contact_list, columns=["distance", "contacts", "region", "type"]) if output is not None: old_backend = plt.get_backend() plt.switch_backend('pdf') plt.ioff() tsplot = sns.tsplot(data=df, time="distance", unit="region", condition="type", value="contacts", estimator=np.median, err_style="unit_traces", err_palette="Reds") if output is not None: tsplot.figure.savefig(output) plt.close(tsplot.figure) plt.ion() plt.switch_backend(old_backend) else: plt.show() return df