def cluster(self, strand=None, by=None, **kwargs): if strand is None: strand = self.stranded kwargs = fill_kwargs(kwargs) kwargs["sparse"] = {"self": False} _stranded = self.stranded if not strand and _stranded: # print(" WOOO " * 100) self.Strand2 = self.Strand self = self.unstrand() if not by: from pyranges.methods.cluster import _cluster df = pyrange_apply_single(_cluster, self, strand, kwargs) else: from pyranges.methods.cluster import _cluster_by kwargs["by"] = by df = pyrange_apply_single(_cluster_by, self, strand, kwargs) gr = PyRanges(df) # each cluster got same ids (0 to len). Need to make unique! new_dfs = {} first = True max_id = 0 for k, v in gr.items(): if first: max_id = v.Cluster.max() new_dfs[k] = v first = False continue v.loc[:, "Cluster"] += max_id max_id = v.Cluster.max() new_dfs[k] = v if not strand and _stranded: # print(" wooo " * 100) # print(new_dfs) new_dfs = { k: d.rename(columns={"Strand2": "Strand"}) for k, d in new_dfs.items() } # print(new_dfs) self = PyRanges(new_dfs) return self
def _to_rle(ranges, value_col=None, strand=True, rpm=False, **kwargs): try: from pyrle.methods import coverage from pyrle import PyRles except ImportError: raise Exception( "Using the coverage method requires that pyrle is installed.") _kwargs = { "strand": strand, "value_col": value_col, "sparse": { "self": False } } # already sparse kwargs.update(_kwargs) result = pyrange_apply_single(coverage, ranges, **kwargs) if rpm: multiplier = 1e6 / len(ranges) result = {k: v * multiplier for k, v in result.items()} return PyRles(result)
def slack(self, slack): kwargs = fill_kwargs({"slack": slack}) prg = PyRanges( pyrange_apply_single(_slack, self, self.stranded, kwargs)) return prg
def _coverage(ranges, value_col=None, strand=True, rpm=False, **kwargs): try: from pyrle.methods import coverage from pyrle import PyRles except ImportError: raise Exception( "Using the coverage method requires that pyrle is installed.") keep = [value_col if not value_col is None else "Score"] kwargs = { "value_col": value_col, "sparse": { "self": False } } # already sparse # from pydbg import dbg result = pyrange_apply_single(coverage, ranges, strand, kwargs) if rpm: multiplier = 1e6 / len(ranges) result = {k: v * multiplier for k, v in result.items()} return PyRles(result)
def sort(self, by=None, **kwargs): from pyranges.methods.sort import _sort kwargs["sparse"] = {"self": False} if by: kwargs["by"] = by kwargs = fill_kwargs(kwargs) return PyRanges( pyrange_apply_single(_sort, self, self.stranded, kwargs))
def merge(self, strand=None, **kwargs): from pyranges.methods.merge import _merge kwargs["sparse"] = {"self": True} df = pyrange_apply_single(_merge, self, strand, kwargs) return PyRanges(df)
def merge(self, strand=None, count=False, **kwargs): if strand is None: strand = self.stranded if not ("by" in kwargs): kwargs["sparse"] = {"self": True} from pyranges.methods.merge import _merge df = pyrange_apply_single(_merge, self, strand, kwargs) else: kwargs["sparse"] = {"self": False} from pyranges.methods.merge import _merge_by df = pyrange_apply_single(_merge_by, self, strand, kwargs) if not count: df = {k: v.drop("Count", axis=1) for k, v in df.items()} return PyRanges(df)
def tile(self, tile_size, strand=None, **kwargs): from pyranges.methods.windows import _tiles kwargs["sparse"] = {"self": False} kwargs["tile_size"] = tile_size df = pyrange_apply_single(_tiles, self, strand, kwargs) return PyRanges(df)
def window(self, window_size, strand=None, **kwargs): from pyranges.methods.windows import _windows kwargs["sparse"] = {"self": False} kwargs["window_size"] = window_size df = pyrange_apply_single(_windows, self, strand, kwargs) return PyRanges(df)
def apply(self, f, strand=False, as_pyranges=True, **kwargs): kwargs.update({"strand": strand}) kwargs = fill_kwargs(kwargs) result = pyrange_apply_single(f, self, strand, kwargs) if not as_pyranges: return result else: return PyRanges(result)
def slack(self, slack): if isinstance(slack, dict): assert self.stranded, "PyRanges must be stranded to add 5/3-end specific slack." kwargs = fill_kwargs({"slack": slack}) prg = PyRanges( pyrange_apply_single(_slack, self, self.stranded, kwargs)) return prg
def split(self, strand=None, **kwargs): if strand is None: strand = self.stranded kwargs = fill_kwargs(kwargs) from pyranges.methods.split import _split df = pyrange_apply_single(_split, self, strand, kwargs) return pr.PyRanges(df)
def drop_duplicate_positions(self, strand=None, **kwargs): from pyranges.methods.drop_duplicates import _drop_duplicate_positions if strand is None: strand = self.stranded kwargs["sparse"] = {"self": False} kwargs = fill_kwargs(kwargs) kwargs["strand"] = strand and self.stranded return PyRanges( pyrange_apply_single(_drop_duplicate_positions, self, strand, kwargs))
def eval(self, eval_cmd, strand=True, as_pyranges=True, **kwargs): f = lambda df: eval(eval_cmd) kwargs = fill_kwargs(kwargs) result = pyrange_apply_single(f, self, strand, kwargs) if not as_pyranges: return result else: return PyRanges(result)
def new_position(self, new_pos, strand=None, **kwargs): from pyranges.methods.new_position import _new_position kwargs["sparse"] = {"self": False} kwargs["new_pos"] = new_pos kwargs = fill_kwargs(kwargs) if strand is None: strand = self.stranded dfs = pyrange_apply_single(_new_position, self, strand, kwargs) return pr.PyRanges(dfs)
def apply(self, f, strand=False, as_pyranges=True, kwargs=None): if not kwargs: kwargs = {} kwargs = fill_kwargs(kwargs) f = ray.remote(f) result = pyrange_apply_single(f, self, strand, kwargs) if not as_pyranges: return result else: return PyRanges(result)
def apply(self, f, strand=None, as_pyranges=True, **kwargs): if strand is None: strand = self.stranded kwargs.update({"strand": strand}) kwargs.update(kwargs.get("kwargs", {})) kwargs = fill_kwargs(kwargs) result = pyrange_apply_single(f, self, strand, kwargs) if not as_pyranges: return result else: return PyRanges(result)
def set_columns(self, value): assert len(value) == len( self.columns), "New and old columns must be same length" def _columns(df): df.columns = value return df return pr.PyRanges( pyrange_apply_single(_columns, self, strand=None, kwargs={"sparse": { "self": False }}))
def assign(self, col, function, strand=False, **kwargs): kwargs = fill_kwargs(kwargs) result = pyrange_apply_single(function, self, strand, kwargs) first_result = next(iter(result.values())) assert type( first_result ) == pd.Series, "result of assign function must be Series, but is {}".format( type(first_result)) # do a deepcopy of object new_self = pr.PyRanges({k: v.copy() for k, v in self.items()}) new_self.__setattr__(col, result) return new_self
def _coverage(ranges, value_col=None, strand=True, rpm=False): try: from pyrle.methods import coverage from pyrle import PyRles except ImportError: raise Exception( "Using the coverage method requires that pyrle is installed.") kwargs = {"value_col": value_col} if value_col is None: kwargs["sparse"] = {"self": True} # from pydbg import dbg result = pyrange_apply_single(coverage, ranges, strand, kwargs) if rpm: multiplier = 1e6 / len(ranges) result = {k: v * multiplier for k, v in result.items()} return PyRles(result)
def subset(self, function, strand=None, **kwargs): kwargs = fill_kwargs(kwargs) if strand is None: strand = self.stranded if self.stranded and not strand: self = self.unstrand() result = pyrange_apply_single(function, self, strand, kwargs) if not result: return pr.PyRanges() first_result = next(iter(result.values())) assert first_result.dtype == bool, "result of subset function must be bool, but is {}".format( first_result.dtype) return self[result]
def tesify(self, slack=0): kwargs = fill_kwargs({"slack": slack}) return PyRanges( pyrange_apply_single(_tes, self, self.stranded, kwargs))
def sort(self, columns=("Start", "End"), **kwargs): from pyranges.methods.sort import _sort kwargs["sparse"] = {"self": False} kwargs = fill_kwargs(kwargs) return PyRanges( pyrange_apply_single(_sort, self, self.stranded, kwargs))
def three_end(self, slack=0): assert self.stranded, "Need stranded pyrange to find 3'." kwargs = fill_kwargs({"slack": slack}) return PyRanges(pyrange_apply_single(_tes, self, self.stranded, kwargs))