コード例 #1
0
def main(args):

    # TODO: need to create coverage of file if raw
    # else cluster Scores of binned

    requires_control = any(
        n in args for n in
        ["individual_log2fc_bigwigs", "input_bigwig", "log2fc_bigwig"])
    has_control = args.get("control")
    if requires_control and not has_control:
        raise Exception("Missing control data!")

    treatment_ranges = files_to_coverage(args["treatment"], args)

    if args.get("control"):
        control_ranges = files_to_coverage(args["control"], args)
        control_sum = pr.concat(control_ranges.values())

    treatment_sum = pr.concat(treatment_ranges.values())

    chromsizes = args["chromsizes_"]

    if args["bigwig"]:
        path = args["bigwig"]
        _create_path(path)

        for name, ranges in treatment_ranges.items():
            _basename = splitext(basename(name))[0]
            bw_name = join(path, _basename + ".bw")
            ranges.to_bigwig(bw_name, chromsizes)

        if has_control:
            for name, ranges in control_ranges.items():
                _basename = splitext(basename(name))[0]
                bw_name = join(path, _basename + ".bw")
                ranges.to_bigwig(bw_name, chromsizes)

    if args["individual_log2fc_bigwigs"]:

        path = args["individual_log2fc_bigwigs"]
        _create_path(path)
        for name, ranges in treatment_ranges.items():
            _basename = splitext(basename(name))[0]
            bw_name = join(path, _basename + "_log2fc.bw")
            ranges.to_bigwig(bw_name, chromsizes, divide_by=control_sum)

    if args["log2fc_bigwig"]:
        path = args["log2fc_bigwig"]
        _create_path(dirname(path))
        treatment_sum.to_bigwig(path, chromsizes, divide_by=control_sum)

    if args["chip_bigwig"]:
        path = args["chip_bigwig"]
        _create_path(dirname(path))
        treatment_sum.to_bigwig(path, chromsizes)

    if args["input_bigwig"]:
        path = args["input_bigwig"]
        _create_path(dirname(path))
        control_sum.to_bigwig(path, chromsizes)
コード例 #2
0
def main(args):

    # TODO: need to create coverage of file if raw
    # else cluster Scores of binned

    treatment_ranges = files_to_coverage(args["treatment"], args)
    print(treatment_ranges)

    if args.get("control"):
        control_ranges = files_to_coverage(args["control"], args)
        control_sum = pr.concat(control_ranges.values())

    treatment_sum = pr.concat(treatment_ranges.values())

    print(treatment_sum)
コード例 #3
0
ファイル: test_unary.py プロジェクト: endrebak/pyranges-rtd
def test_cluster_by(gr, strand):

    result = gr.cluster(by="ID", strand=strand).df
    print(result)
    df = gr.df

    if strand:
        groupby = ["Chromosome", "Strand", "ID"]
    else:
        groupby = ["Chromosome", "ID"]

    grs = []

    for _, gdf in natsorted(df.groupby(groupby)):
        grs.append(pr.PyRanges(gdf))

    clusters = [gr.cluster(strand=strand) for gr in grs]
    i = 1
    new_clusters = []
    for c in clusters:
        print("c")
        print(c)
        c.Cluster = i
        i += 1
        new_clusters.append(c)

    expected = pr.concat(new_clusters).df
    expected.loc[:, "Cluster"] = expected.Cluster.astype(np.int32)
    # expected = expected.drop_duplicates()

    print(expected)
    print(result)

    assert_df_equal(result, expected)
コード例 #4
0
ファイル: pyranges.py プロジェクト: xie186/pyranges
    def set_union(self, other, **kwargs):

        kwargs = fill_kwargs(kwargs)
        strandedness = kwargs["strandedness"]
        strand = True if strandedness else False

        gr = pr.concat([self, other], strand)
        gr = gr.merge(strand=strand, **kwargs)

        return gr
コード例 #5
0
    def unstrand(self):

        if not self.stranded:
            return self

        gr = pr.concat([self["+"], self["-"]])

        gr = gr.apply(lambda df: df.drop("Strand", axis=1))

        return gr
コード例 #6
0
    def unstrand(self):

        if not self.stranded:
            return self

        gr = pr.concat([self["+"], self["-"]])

        gr = gr.drop("Strand", drop_strand=True)

        return gr
コード例 #7
0
ファイル: pyranges.py プロジェクト: pedrotomazsilva/pyranges
    def unstrand(self):

        if not self.stranded:
            return self

        gr = pr.concat([self["+"], self["-"]])

        gr = gr.apply(lambda df: df.drop("Strand", axis=1).reset_index(drop=
                                                                       True))

        return pr.PyRanges(gr.dfs)
コード例 #8
0
def assert_equal_length_before_after(gr1, gr2):

    print("in test")
    l1 = len(gr1)
    l2 = len(gr2)
    c = pr.concat([gr1, gr2])

    if not gr1.stranded or not gr2.stranded:
        assert not c.stranded

    lc = len(c)
    assert l1 + l2 == lc
コード例 #9
0
def update_pr(changed_id, removed_id):
    global ERVs
    #print("{}\t{}".format(changed_id, removed_id))
    new_elem = elements[changed_id].span().pr()
    new_elem.ID = changed_id
    new_elem.Struct = elements[changed_id].meta_str()
    #print(new_elem)
    print("Merging {} into {}".format(removed_id, changed_id))
    ERVs = pr.concat([
        pr.PyRanges(
            ERVs.df.loc[~ERVs.df['ID'].isin([changed_id, removed_id])]),
        new_elem
    ])
コード例 #10
0
ファイル: pyranges.py プロジェクト: pedrotomazsilva/pyranges
    def to_example(self, nrows=10):

        nrows_half = int(min(nrows, len(self))/2)

        if nrows < len(self):
            first = self.head(nrows_half)
            last = self.tail(nrows_half)
            example = pr.concat([first, last])
        else:
            example = self

        d = {c: list(getattr(example, c)) for c in example.columns}

        return d
コード例 #11
0
    def unstrand(self):

        # from pydbg import dbg
        # dbg(self.stranded)

        if not self.stranded:
            return self

        gr = pr.concat([self["+"], self["-"]])
        # dbg(gr)

        gr = gr.drop("Strand", drop_strand=True)
        # dbg(gr)
        return gr
コード例 #12
0
    def get_target_proximal_ranges(self):
        downstream = self.get_target_ranges()
        upstream = self.get_target_ranges()

        upstream.End = upstream.Start - 1
        upstream.Start += -(self.get_target_proximity() + 1)
        # there may be edge exceptions where the Start coordinate < 0?

        downstream.Start = downstream.End + 1
        downstream.End += self.get_target_proximity() + 1
        # there may be edge exceptions where End coordinate drops off chromo.

        merged = pr.concat([upstream, downstream])
        return pr.gf.genome_bounds(merged,
                                   self.ref.get_reference_ranges(),
                                   clip=True)
コード例 #13
0
ファイル: test_unary.py プロジェクト: endrebak/pyranges-rtd
def test_merge_by(gr, strand):

    print(gr)
    result = gr.merge(by="ID").df.drop("ID", axis=1)

    df = gr.df

    grs = []
    for _, gdf in df.groupby("ID"):
        grs.append(pr.PyRanges(gdf))

    expected = pr.concat([gr.merge() for gr in grs]).df

    print(expected)
    print(result)

    assert_df_equal(result, expected)
コード例 #14
0
def lojs_overlap(feature_files, compare_pr):
    """
    Function to run left outer join in features to all_regions_file

    Args:
            :param feature_files: list of paths to file to run intersection with all_regions_file
            :param compare_pr: pyranges object containing all regions of interest. Should have column
                'idx'. Added in function epitome.functions.bed2Pyranges.

    :return arr: array same size as the number of genomic regions in all_regions_file
    """

    if len(feature_files) == 0:
        logger.warn("WARN: lojs_overlap failed for all files %s with 0 lines" %
                    ','.join(feature_files))
        return np.zeros(len(compare_pr))

    #### Number of files that must share a consensus ####
    if len(feature_files) <= 2:
        n = 1  # if there are 1-2 files just include all
    elif len(feature_files) >= 3 and len(feature_files) <= 7:
        n = 2
    else:
        n = int(len(feature_files) / 4)  # in 25% of files

    # Very slow: concatenate all bed files and only take regions with n overlap
    group_pr = pr.concat([pr.read_bed(i).merge() for i in feature_files])
    group_pr = group_pr.merge(count=True).df
    group_pr = group_pr[group_pr['Count'] >= n]

    # Remove count column and save to bed file
    group_pr.drop('Count', inplace=True, axis=1)

    type_ = (compare_pr.Start.dtype == 'int64')
    pr1 = pr.PyRanges(group_pr, int64=type_)

    intersected = compare_pr.count_overlaps(pr1)
    arr = intersected.df.sort_values(by='idx')['NumberOverlaps'].values
    arr[arr > 0] = 1
    return arr
コード例 #15
0
    def set_union(self, other, **kwargs):

        kwargs = fill_kwargs(kwargs)
        strandedness = kwargs["strandedness"]
        strand = True if strandedness else False

        if not strand:
            self = self.unstrand()
            other = other.unstrand()

        # from pydbg import dbg
        # dbg(self)
        # dbg(other)
        gr = pr.concat([self, other], strand)

        # from pydbg import dbg
        # dbg(gr)

        gr = gr.merge(strand=strand, **kwargs)
        # dbg(gr)

        return gr
コード例 #16
0
def count_overlaps(grs, features=None, how=None, nb_cpu=1, strandedness=None):

    if features is None:
        features = pr.concat(grs.values()).split()

    from pyranges.methods.intersection import _count_overlaps

    hits_gr = {}
    for name, gr in grs.items():

        gr = gr.drop()

        res = features.apply_pair(gr,
                                  _count_overlaps,
                                  as_pyranges=False,
                                  nb_cpu=nb_cpu,
                                  strandedness=strandedness)

        setattr(features, name, res)

        setattr(features, name, getattr(features, name).fillna(0))

    return features
コード例 #17
0
def parse_bed_files(bed_files):
    """Creates PyRanges objects from the BED files."""

    # Skip if no BED files are provided
    if len(bed_files) == 0:
        return

    # Load BED files
    beds = [pr.read_bed(b) for b in bed_files]

    # Check that all BED files have the first four columns
    for bed_file, bed in zip(bed_files, beds):
        assert "Name" in bed.columns, f"Name (column 4) missing from {bed_file}."

    # Concatenate BED files and only keep Name column
    bed = pr.concat(beds)
    bed = bed.unstrand()
    bed = bed[["Name"]]

    # Ensure unique names
    assert bed.Name.is_unique, "Names (column 4) not unique across BED files."

    return bed
コード例 #18
0
ファイル: statistics.py プロジェクト: bmill3r/pyranges
def mcc(grs, genome, labels=None, strand=False, verbose=False):
    import sys

    try:
        genome_length = int(genome)
    except (TypeError, ValueError):
        genome_length = int(genome.End.sum())

    from itertools import combinations_with_replacement, chain

    if labels is None:
        _labels = list(range(len(grs)))
        _labels = combinations_with_replacement(_labels, r=2)
    else:
        assert len(labels) == len(grs)
        _labels = combinations_with_replacement(labels, r=2)

    if verbose:
        # check that genome definition does not have many more
        # chromosomes than datafiles
        gr_cs = set(chain(*[gr.chromosomes for gr in grs]))

        g_cs = set(genome.chromosomes)
        surplus = g_cs - gr_cs
        if len(surplus):
            print(
                "The following chromosomes are in the genome, but not the PyRanges:",
                ", ".join(surplus),
                file=sys.stderr)

    # remove all non-loc columns before computation
    grs = [gr.merge(strand=strand) for gr in grs]

    if strand:

        def make_stranded(df):
            df = df.copy()
            df2 = df.copy()
            df.insert(df.shape[1], "Strand", "+")
            df2.insert(df2.shape[1], "Strand", "-")
            return pd.concat([df, df2])

        genome = genome.apply(make_stranded)

    strandedness = "same" if strand else None

    rowdicts = []
    for (lt, lf), (t, f) in zip(_labels, combinations_with_replacement(grs,
                                                                       r=2)):
        if verbose:
            print(lt, lf, file=sys.stderr)

        if lt == lf:

            if not strand:
                tp = t.length
                fn = 0
                tn = genome.length - tp
                fp = 0
                rowdicts.append({
                    "T": lt,
                    "F": lf,
                    "TP": tp,
                    "FP": fp,
                    "TN": tn,
                    "FN": fn,
                    "MCC": 1
                })
            else:
                for strand in "+ -".split():
                    tp = t[strand].length
                    fn = 0
                    tn = genome_length - tp
                    fp = 0
                    rowdicts.append({
                        "T": lt,
                        "F": lf,
                        "Strand": strand,
                        "TP": tp,
                        "FP": fp,
                        "TN": tn,
                        "FN": fn,
                        "MCC": 1
                    })
            continue

        else:
            c = pr.concat([t, f]).merge(strand=strand)
            j = t.join(f, strandedness=strandedness)
            tp_gr = j.new_position("intersection").merge(strand=strand)
            if strand:
                for strand in "+ -".split():
                    tp = tp_gr[strand].length
                    fp = f[strand].length - tp
                    fn = t[strand].length - tp
                    tn = genome_length - c[strand].length
                    mcc = _mcc(tp, fp, tn, fn)
                    rowdicts.append({
                        "T": lt,
                        "F": lf,
                        "Strand": strand,
                        "TP": tp,
                        "FP": fp,
                        "TN": tn,
                        "FN": fn,
                        "MCC": mcc
                    })
                    rowdicts.append({
                        "T": lf,
                        "F": lt,
                        "Strand": strand,
                        "TP": tp,
                        "FP": fn,
                        "TN": tn,
                        "FN": fp,
                        "MCC": mcc
                    })
            else:
                tp = tp_gr.length
                fp = f.length - tp
                fn = t.length - tp
                tn = genome_length - c.length
                mcc = _mcc(tp, fp, tn, fn)

                rowdicts.append({
                    "T": lt,
                    "F": lf,
                    "TP": tp,
                    "FP": fp,
                    "TN": tn,
                    "FN": fn,
                    "MCC": mcc
                })
                rowdicts.append({
                    "T": lf,
                    "F": lt,
                    "TP": tp,
                    "FP": fn,
                    "TN": tn,
                    "FN": fp,
                    "MCC": mcc
                })

    df = pd.DataFrame.from_dict(rowdicts).sort_values(["T", "F"])

    return df
コード例 #19
0
total_sizes = []
cds_sizes = []
exon_sizes = []
panel_prs = []

for panel in panels:
    print(panel)
    panel_pr = pr.PyRanges(genie.loc[(genie['SEQ_ASSAY_ID'] == panel) & genie['Chromosome'].isin(chromosomes), 'Chromosome':'End_Position'].rename(columns={'Start_Position': 'Start', 'End_Position': 'End'})).merge()
    total_sizes.append(sum([i + 1 for i in panel_pr.lengths()]))
    cds_sizes.append(sum([i + 1 for i in panel_pr.intersect(gff_cds_pr).lengths()]))
    exon_sizes.append(sum([i + 1 for i in panel_pr.intersect(gff_exon_pr).lengths()]))
    panel_prs.append(panel_pr)


grs = {k: v for k, v in zip(['CDS', 'exon'] + list(panels), [gff_cds_pr, gff_exon_pr] + panel_prs)}
result = pr.count_overlaps(grs, pr.concat({'maf': maf_pr}.values()))
result = result.df

tcga_maf = pd.merge(tcga_maf, result.iloc[:, 3:], how='left', on='index')


panel_df['total'] = total_sizes
panel_df['cds'] = cds_sizes
panel_df['exon'] = exon_sizes

##get assumed size of the most common kit: https://bitbucket.org/cghub/cghub-capture-kit-info/src/master/BI/vendor/Agilent/whole_exome_agilent_1.1_refseq_plus_3_boosters.targetIntervals.bed
agilent_df = pd.read_csv(file_path / 'whole_exome_agilent_1.1_refseq_plus_3_boosters.targetIntervals.bed', sep='\t', low_memory=False, header=None)
kit_pr = pr.PyRanges(agilent_df.rename(columns={0: 'Chromosome', 1: 'Start', 2: 'End'})).merge()
kit_total = sum([i + 1 for i in kit_pr.lengths()])
kit_cds = sum([i + 1 for i in kit_pr.intersect(gff_cds_pr).merge().lengths()])
kit_exon = sum([i + 1 for i in kit_pr.intersect(gff_exon_pr).merge().lengths()])
コード例 #20
0
ファイル: multioverlap.py プロジェクト: endrebak/pyranges-rtd
def count_overlaps(grs, features=None, strandedness=None, how=None, nb_cpu=1):
    """Count overlaps in multiple pyranges.

    Parameters
    ----------
    grs : dict of PyRanges

        The PyRanges to use as queries.

    features : PyRanges, default None

        The PyRanges to use as subject in the query. If None, the PyRanges themselves are used as a query.

    strandedness : {None, "same", "opposite", False}, default None, i.e. auto

        Whether to compare PyRanges on the same strand, the opposite or ignore strand
        information. The default, None, means use "same" if both PyRanges are strande,
        otherwise ignore the strand information.

     how : {None, "all", "containment"}, default None, i.e. all

        What intervals to report. By default reports all overlapping intervals. "containment"
        reports intervals where the overlapping is contained within it.

    nb_cpu : int, default 1

        How many cpus to use. Can at most use 1 per chromosome or chromosome/strand tuple.
        Will only lead to speedups on large datasets.

    Examples
    --------

    >>> a = '''Chromosome Start End
    ... chr1    6    12
    ... chr1    10    20
    ... chr1    22    27
    ... chr1    24    30'''

    >>> b = '''Chromosome Start End
    ... chr1    12    32
    ... chr1    14    30'''

    >>> c = '''Chromosome Start End
    ... chr1    8    15
    ... chr1    10    14
    ... chr1    32    34'''

    >>> grs = {n: pr.from_string(s) for n, s in zip(["a", "b", "c"], [a, b, c])}
    >>> for k, v in grs.items():
    ...     print("Name: " + k)
    ...     print(v)
    Name: a
    +--------------+-----------+-----------+
    | Chromosome   |     Start |       End |
    | (category)   |   (int32) |   (int32) |
    |--------------+-----------+-----------|
    | chr1         |         6 |        12 |
    | chr1         |        10 |        20 |
    | chr1         |        22 |        27 |
    | chr1         |        24 |        30 |
    +--------------+-----------+-----------+
    Unstranded PyRanges object has 4 rows and 3 columns from 1 chromosomes.
    For printing, the PyRanges was sorted on Chromosome.
    Name: b
    +--------------+-----------+-----------+
    | Chromosome   |     Start |       End |
    | (category)   |   (int32) |   (int32) |
    |--------------+-----------+-----------|
    | chr1         |        12 |        32 |
    | chr1         |        14 |        30 |
    +--------------+-----------+-----------+
    Unstranded PyRanges object has 2 rows and 3 columns from 1 chromosomes.
    For printing, the PyRanges was sorted on Chromosome.
    Name: c
    +--------------+-----------+-----------+
    | Chromosome   |     Start |       End |
    | (category)   |   (int32) |   (int32) |
    |--------------+-----------+-----------|
    | chr1         |         8 |        15 |
    | chr1         |        10 |        14 |
    | chr1         |        32 |        34 |
    +--------------+-----------+-----------+
    Unstranded PyRanges object has 3 rows and 3 columns from 1 chromosomes.
    For printing, the PyRanges was sorted on Chromosome.

    >>> pr.count_overlaps(grs)
    +--------------+-----------+-----------+-----------+-----------+-----------+
    | Chromosome   | Start     | End       | a         | b         | c         |
    | (object)     | (int32)   | (int32)   | (int32)   | (int32)   | (int32)   |
    |--------------+-----------+-----------+-----------+-----------+-----------|
    | chr1         | 6         | 8         | 1         | 0         | 0         |
    | chr1         | 8         | 10        | 1         | 0         | 1         |
    | chr1         | 10        | 12        | 2         | 0         | 2         |
    | chr1         | 12        | 14        | 1         | 1         | 2         |
    | ...          | ...       | ...       | ...       | ...       | ...       |
    | chr1         | 24        | 27        | 2         | 2         | 0         |
    | chr1         | 27        | 30        | 1         | 2         | 0         |
    | chr1         | 30        | 32        | 0         | 1         | 0         |
    | chr1         | 32        | 34        | 0         | 0         | 1         |
    +--------------+-----------+-----------+-----------+-----------+-----------+
    Unstranded PyRanges object has 12 rows and 6 columns from 1 chromosomes.
    For printing, the PyRanges was sorted on Chromosome.

    >>> gr = pr.PyRanges(chromosomes=["chr1"] * 4, starts=[0, 10, 20, 30], ends=[10, 20, 30, 40])
    >>> gr
    +--------------+-----------+-----------+
    | Chromosome   |     Start |       End |
    | (category)   |   (int32) |   (int32) |
    |--------------+-----------+-----------|
    | chr1         |         0 |        10 |
    | chr1         |        10 |        20 |
    | chr1         |        20 |        30 |
    | chr1         |        30 |        40 |
    +--------------+-----------+-----------+
    Unstranded PyRanges object has 4 rows and 3 columns from 1 chromosomes.
    For printing, the PyRanges was sorted on Chromosome.

    >>> pr.count_overlaps(grs, gr)
    +--------------+-----------+-----------+-----------+-----------+-----------+
    | Chromosome   |     Start |       End |         a |         b |         c |
    | (category)   |   (int32) |   (int32) |   (int32) |   (int32) |   (int32) |
    |--------------+-----------+-----------+-----------+-----------+-----------|
    | chr1         |         0 |        10 |         1 |         0 |         1 |
    | chr1         |        10 |        20 |         2 |         2 |         2 |
    | chr1         |        20 |        30 |         2 |         2 |         0 |
    | chr1         |        30 |        40 |         0 |         1 |         1 |
    +--------------+-----------+-----------+-----------+-----------+-----------+
    Unstranded PyRanges object has 4 rows and 6 columns from 1 chromosomes.
    For printing, the PyRanges was sorted on Chromosome.
    """

    kwargs = {
        "as_pyranges": False,
        "nb_cpu": nb_cpu,
        "strandedness": strandedness,
        "how": how,
        "nb_cpu": nb_cpu
    }
    names = list(grs.keys())

    if features is None:
        features = pr.concat(grs.values()).split(between=True)

    from pyranges.methods.intersection import _count_overlaps

    for name, gr in grs.items():

        gr = gr.drop()

        res = features.apply_pair(gr, _count_overlaps, **kwargs)

        setattr(features, name, res)

        setattr(features, name, getattr(features, name).fillna(0))

    def to_int(df):
        df.loc[:, names] = df[names].astype(np.int32)
        return df

    features = features.apply(to_int)

    return features
コード例 #21
0
ファイル: pyranges.py プロジェクト: pedrotomazsilva/pyranges
    def k_nearest(self, other, k=1, **kwargs):

        from pyranges.methods.k_nearest import _nearest
        from sorted_nearest import get_all_ties, get_different_ties

        kwargs = fill_kwargs(kwargs)
        kwargs["stranded"] = self.stranded and other.stranded

        overlap = kwargs.get("overlap", True)
        ties = kwargs.get("ties", False)

        self = pr.PyRanges({k: v.copy() for k, v in self.dfs.items()})

        try: # if k is an array
            k = k.values
        except:
            pass

        self.__k__ = k
        self.__IX__ = np.arange(len(self))


        # from time import time
        # start = time()
        dfs = pyrange_apply(_nearest, self, other, **kwargs)
        # end = time()
        # print("nearest", end - start)

        nearest = PyRanges(dfs)
        # nearest.msp()
        # raise
        # print("nearest len", len(nearest))

        if not overlap:
            # self = self.drop(like="__k__|__IX__")
            result = nearest#.drop(like="__k__|__IX__")
        else:
            from collections import defaultdict
            overlap_kwargs = {k: v for k, v in kwargs.items()}
            # print("kwargs ties:", kwargs.get("ties"))
            overlap_kwargs["how"] = defaultdict(lambda: None, {"first": "first", "last": "last"})[kwargs.get("ties")]
            # start = time()
            overlaps = self.join(other, **overlap_kwargs)
            # end = time()
            # print("overlaps", end - start)
            overlaps.Distance = 0
            # print("overlaps len", len(overlaps))

            result = pr.concat([overlaps, nearest])

        if not len(result):
            return pr.PyRanges()
        # print(result)
        # print(overlaps.drop(like="__").df)
        # raise

        # start = time()
        new_result = {}
        if ties in ["first", "last"]:
            # method = "tail" if ties == "last" else "head"
            # keep = "last" if ties == "last" else "first"

            for c, df in result:
                # start = time()
                # print(c)
                # print(df)

                df = df.sort_values(["__IX__", "Distance"])
                grpby = df.groupby("__k__", sort=False)
                dfs = []
                for k, kdf in grpby:
                    # print("k", k)
                    # print(kdf)
                    # dist_bool = ~kdf.Distance.duplicated(keep=keep)
                    # print(dist_bool)
                    # kdf = kdf[dist_bool]
                    grpby2 = kdf.groupby("__IX__", sort=False)
                    # f = getattr(grpby2, method)
                    _df = grpby2.head(k)
                    # print(_df)
                    dfs.append(_df)
                # raise

                if dfs:
                    new_result[c] = pd.concat(dfs)
                # print(new_result[c])
        elif ties == "different" or not ties:
            for c, df in result:

                # print(df)

                if df.empty:
                    continue
                dfs = []

                df = df.sort_values(["__IX__", "Distance"])
                grpby = df.groupby("__k__", sort=False)

                # for each index
                # want to keep until we have k
                # then keep all with same distance
                for k, kdf in grpby:
                    # print("kdf " * 10)
                    # print("k " * 5, k)
                    # print(kdf["__IX__ Distance".split()])
                    # print(kdf.dtypes)
                    # print(kdf.index.dtypes)
                    # if ties:
                    if ties:
                        lx = get_different_ties(kdf.index.values, kdf.__IX__.values, kdf.Distance.astype(np.int64).values, k)
                    else:
                        lx = get_all_ties(kdf.index.values, kdf.__IX__.values, kdf.Distance.astype(np.int64).values, k)
                    # print(lx)


                    # else:
                    #     lx = get_all_ties(kdf.index.values, kdf.__IX__.values, kdf.Distance.astype(np.int64).values, k)
                    _df = kdf.reindex(lx)
                    # print("_df", _df)
                    dfs.append(_df)

                if dfs:
                    new_result[c] = pd.concat(dfs)

        result = pr.PyRanges(new_result)

        if not result.__IX__.is_monotonic:
            result = result.sort("__IX__")

        result = result.drop(like="__IX__|__k__")

        self = self.drop(like="__k__|__IX__")

        def prev_to_neg(df, kwargs):

            strand = df.Strand.iloc[0] if "Strand" in df else "+"

            suffix = kwargs["suffix"]

            bools = df["End" + suffix] < df.Start
            if not strand == "+":
                bools = ~bools

            df.loc[bools, "Distance"] = -df.loc[bools, "Distance"]
            return df

        # print(result)
        result = result.apply(prev_to_neg, suffix=kwargs["suffix"])
        # print(result)

        # end = time()
        # print("final stuff", end - start)

        return result