Esempio n. 1
0
def pyrange_apply_single(function, self, strand, kwargs):

    if strand:
        assert self.stranded, \
            "Can only do stranded operation when PyRange contains strand info"

    results = []

    if strand:

        for (c, s), df in self.items():

            kwargs["chromosome"] = c
            _strand = s
            kwargs["strand"] = _strand

            df = make_unary_sparse(kwargs, df)
            result = call_f_single(function, df, kwargs)
            results.append(result)

        keys = self.keys()

    elif not self.stranded:

        keys = []
        for c, df in self.items():

            kwargs["chromosome"] = c

            df = make_unary_sparse(kwargs, df)
            result = call_f_single(function, df, kwargs)
            results.append(result)
            keys.append(c)

    else:

        keys = []
        for c in self.chromosomes:

            kwargs["chromosome"] = c

            dfs = self[c]

            if len(dfs.keys()) == 2:
                df1, df2 = dfs.values()
                # merge strands
                df1 = merge_dfs.remote(df1, df2)
            else:
                df1 = dfs.values()[0]

            df1 = make_unary_sparse(kwargs, df1)
            result = call_f_single(function, df1, kwargs)
            results.append(result)
            keys.append(c)

    results = ray.get(results)

    results = process_results(results, keys)

    return results
Esempio n. 2
0
def pyrange_apply(function, self, other, **kwargs):

    strandedness = kwargs["strandedness"]

    other_strand = {"+": "-", "-": "+"}
    same_strand = {"+": "+", "-": "-"}

    if strandedness == "opposite":
        strand_dict = other_strand
    else:
        strand_dict = same_strand

    assert strandedness in ["same", "opposite", False, None]

    if strandedness:
        assert self.stranded and other.stranded, \
            "Can only do stranded operations when both PyRanges contain strand info"

    results = []

    items = natsorted(self.dfs.items())
    keys = natsorted(self.dfs.keys())

    if strandedness:

        for (c, s), df in items:

            os = strand_dict[s]

            if not (c, os) in other.keys() or len(other[c, os].values()) == 0:
                odf = pd.DataFrame(columns="Chromosome Start End".split())
            else:
                odf = other[c, os].values()[0]

            df, odf = make_binary_sparse(kwargs, df, odf)

            result = call_f(function, df, odf, kwargs)
            results.append(result)

    else:

        if self.stranded and not other.stranded:

            for (c, s), df in items:

                if not c in other.chromosomes:
                    odf = pd.DataFrame(columns="Chromosome Start End".split())
                else:
                    odf = other.dfs[c]

                df, odf = make_binary_sparse(kwargs, df, odf)
                result = call_f(function, df, odf, kwargs)
                results.append(result)

        elif not self.stranded and other.stranded:

            for c, df in items:

                if not c in other.chromosomes:
                    odf = pd.DataFrame(columns="Chromosome Start End".split())
                else:
                    odf1 = other[c, "+"]
                    odf2 = other[c, "-"]
                    odf = merge_dfs.remote(odf1, odf2)

                df, odf = make_binary_sparse(kwargs, df, odf)

                result = call_f(function, df, odf, kwargs)
                results.append(result)

        elif self.stranded and other.stranded:

            for (c, s), df in self.items():

                if not c in other.chromosomes:
                    odfs = pr.PyRanges(
                        pd.DataFrame(columns="Chromosome Start End".split()))
                else:
                    odfs = other[c].values()

                # from pydbg import dbg
                # dbg(odfs)

                if len(odfs) == 2:
                    odf = merge_dfs.remote(*odfs, kwargs)
                elif len(odfs) == 1:
                    odf = odfs[0]
                else:
                    odf = pd.DataFrame(columns="Chromosome Start End".split())

                df, odf = make_binary_sparse(kwargs, df, odf)

                # dbg(df)
                # dbg(odf)

                result = call_f(function, df, odf, kwargs)
                results.append(result)

        else:

            for c, df in items:
                if not c in other.chromosomes:
                    odf = pd.DataFrame(columns="Chromosome Start End".split())
                else:
                    odf = other.dfs[c]

                df, odf = make_binary_sparse(kwargs, df, odf)

                result = call_f(function, df, odf, kwargs)
                results.append(result)

    results = ray.get(results)

    results = process_results(results, keys)

    return results