예제 #1
0
def overlap_2d_bedtools(target, reference, margin, return_ref=False):

    l_target = target[['chrom1',
                       'pos1']].rename(columns=lambda x: x.replace('1', ''))
    l_target = insert_margin(l_target, margin)

    l_ref = reference[['chrom1',
                       'pos1']].rename(columns=lambda x: x.replace('1', ''))
    l_ref = insert_margin(l_ref, margin)

    with tsv(l_ref) as a, tsv(l_target) as b:
        l_intersect = bedtools.intersect(a=a.name, b=b.name, wa=True, wb=True)
        l_intersect.columns = [col + '_r' for col in l_ref.columns
                               ] + [col + '_t' for col in l_target.columns]
        l_intersect.set_index(['dot_id_r', 'dot_id_t'], inplace=True)

    r_target = target[['chrom2',
                       'pos2']].rename(columns=lambda x: x.replace('2', ''))
    r_target = insert_margin(r_target, margin)

    r_ref = reference[['chrom2',
                       'pos2']].rename(columns=lambda x: x.replace('2', ''))
    r_ref = insert_margin(r_ref, margin)

    with tsv(r_ref) as a, tsv(r_target) as b:
        r_intersect = bedtools.intersect(a=a.name, b=b.name, wa=True, wb=True)
        r_intersect.columns = [col + '_r' for col in r_ref.columns
                               ] + [col + '_t' for col in r_target.columns]
        r_intersect.set_index(['dot_id_r', 'dot_id_t'], inplace=True)

    merged_df = l_intersect.merge(r_intersect,
                                  how='inner',
                                  left_index=True,
                                  right_index=True).reset_index()

    target_inds = merged_df.dot_id_t.values
    target_result = target.loc[target_inds].copy().sort_index(
    ).drop_duplicates()

    if return_ref:
        ref_inds = merged_df.dot_id_r.values
        reference_result = reference.loc[ref_inds].copy().sort_index(
        ).drop_duplicates()

        return target_result, reference_result

    return target_result
예제 #2
0
def pair_sites(sites, separation, slop):
    """
    Create "hand" intervals to the right and to the left of each site.
    Then join right hands with left hands to pair sites together.

    """
    from bioframe.tools import tsv, bedtools

    mids = (sites["start"] + sites["end"]) // 2
    left_hand = sites[["chrom"]].copy()
    left_hand["start"] = mids - separation - slop
    left_hand["end"] = mids - separation + slop
    left_hand["site_id"] = left_hand.index
    left_hand["direction"] = "L"
    left_hand["snip_mid"] = mids
    left_hand["snip_strand"] = sites["strand"]

    right_hand = sites[["chrom"]].copy()
    right_hand["start"] = mids + separation - slop
    right_hand["end"] = mids + separation + slop
    right_hand["site_id"] = right_hand.index
    right_hand["direction"] = "R"
    right_hand["snip_mid"] = mids
    right_hand["snip_strand"] = sites["strand"]

    # ignore out-of-bounds hands
    mask = (left_hand["start"] > 0) & (right_hand["start"] > 0)
    left_hand = left_hand[mask].copy()
    right_hand = right_hand[mask].copy()

    # intersect right hands (left anchor site)
    # with left hands (right anchor site)
    with tsv(right_hand) as R, tsv(left_hand) as L:
        out = bedtools.intersect(a=R.name, b=L.name, wa=True, wb=True)
        out.columns = [c + "_r" for c in right_hand.columns] + [
            c + "_l" for c in left_hand.columns
        ]
    return out
예제 #3
0
def pair_sites(sites, separation, slop):
    """
    Create "hand" intervals to the right and to the left of each site.
    Then join right hands with left hands to pair sites together.

    """
    from bioframe.tools import tsv, bedtools

    mids = (sites['start'] + sites['end']) // 2
    left_hand = sites[['chrom']].copy()
    left_hand['start'] = mids - separation - slop
    left_hand['end'] = mids - separation + slop
    left_hand['site_id'] = left_hand.index
    left_hand['direction'] = 'L'
    left_hand['snip_mid'] = mids
    left_hand['snip_strand'] = sites['strand']

    right_hand = sites[['chrom']].copy()
    right_hand['start'] = mids + separation - slop
    right_hand['end'] = mids + separation + slop
    right_hand['site_id'] = right_hand.index
    right_hand['direction'] = 'R'
    right_hand['snip_mid'] = mids
    right_hand['snip_strand'] = sites['strand']

    # ignore out-of-bounds hands
    mask = (left_hand['start'] > 0) & (right_hand['start'] > 0)
    left_hand = left_hand[mask].copy()
    right_hand = right_hand[mask].copy()

    # intersect right hands (left anchor site)
    # with left hands (right anchor site)
    with tsv(right_hand) as R, tsv(left_hand) as L:
        out = bedtools.intersect(a=R.name, b=L.name, wa=True, wb=True)
        out.columns = ([c + '_r' for c in right_hand.columns] +
                       [c + '_l' for c in left_hand.columns])
    return out