def pyrange_apply_single(function, self, strand, kwargs): if strand: assert self.stranded, \ "Can only do stranded operation when PyRange contains strand info" results = [] if strand: for (c, s), df in self.items(): kwargs["chromosome"] = c _strand = s kwargs["strand"] = _strand df = make_unary_sparse(kwargs, df) result = call_f_single(function, df, kwargs) results.append(result) keys = self.keys() elif not self.stranded: keys = [] for c, df in self.items(): kwargs["chromosome"] = c df = make_unary_sparse(kwargs, df) result = call_f_single(function, df, kwargs) results.append(result) keys.append(c) else: keys = [] for c in self.chromosomes: kwargs["chromosome"] = c dfs = self[c] if len(dfs.keys()) == 2: df1, df2 = dfs.values() # merge strands df1 = merge_dfs.remote(df1, df2) else: df1 = dfs.values()[0] df1 = make_unary_sparse(kwargs, df1) result = call_f_single(function, df1, kwargs) results.append(result) keys.append(c) results = ray.get(results) results = process_results(results, keys) return results
def pyrange_apply(function, self, other, **kwargs): strandedness = kwargs["strandedness"] other_strand = {"+": "-", "-": "+"} same_strand = {"+": "+", "-": "-"} if strandedness == "opposite": strand_dict = other_strand else: strand_dict = same_strand assert strandedness in ["same", "opposite", False, None] if strandedness: assert self.stranded and other.stranded, \ "Can only do stranded operations when both PyRanges contain strand info" results = [] items = natsorted(self.dfs.items()) keys = natsorted(self.dfs.keys()) if strandedness: for (c, s), df in items: os = strand_dict[s] if not (c, os) in other.keys() or len(other[c, os].values()) == 0: odf = pd.DataFrame(columns="Chromosome Start End".split()) else: odf = other[c, os].values()[0] df, odf = make_binary_sparse(kwargs, df, odf) result = call_f(function, df, odf, kwargs) results.append(result) else: if self.stranded and not other.stranded: for (c, s), df in items: if not c in other.chromosomes: odf = pd.DataFrame(columns="Chromosome Start End".split()) else: odf = other.dfs[c] df, odf = make_binary_sparse(kwargs, df, odf) result = call_f(function, df, odf, kwargs) results.append(result) elif not self.stranded and other.stranded: for c, df in items: if not c in other.chromosomes: odf = pd.DataFrame(columns="Chromosome Start End".split()) else: odf1 = other[c, "+"] odf2 = other[c, "-"] odf = merge_dfs.remote(odf1, odf2) df, odf = make_binary_sparse(kwargs, df, odf) result = call_f(function, df, odf, kwargs) results.append(result) elif self.stranded and other.stranded: for (c, s), df in self.items(): if not c in other.chromosomes: odfs = pr.PyRanges( pd.DataFrame(columns="Chromosome Start End".split())) else: odfs = other[c].values() # from pydbg import dbg # dbg(odfs) if len(odfs) == 2: odf = merge_dfs.remote(*odfs, kwargs) elif len(odfs) == 1: odf = odfs[0] else: odf = pd.DataFrame(columns="Chromosome Start End".split()) df, odf = make_binary_sparse(kwargs, df, odf) # dbg(df) # dbg(odf) result = call_f(function, df, odf, kwargs) results.append(result) else: for c, df in items: if not c in other.chromosomes: odf = pd.DataFrame(columns="Chromosome Start End".split()) else: odf = other.dfs[c] df, odf = make_binary_sparse(kwargs, df, odf) result = call_f(function, df, odf, kwargs) results.append(result) results = ray.get(results) results = process_results(results, keys) return results