Esempio n. 1
0
def _overlap(scdf, ocdf, kwargs):

    invert = kwargs["invert"]
    return_indexes = kwargs.get("return_indexes", False)

    if scdf.empty or ocdf.empty:
        return None

    how = kwargs["how"]

    assert how in "containment first".split() + [False, None]
    starts = scdf.Start.values
    ends = scdf.End.values
    indexes = scdf.index.values

    it = NCLS(ocdf.Start.values, ocdf.End.values, ocdf.index.values)

    if not how:
        _indexes = it.all_overlaps_self(starts, ends, indexes)
    elif how == "containment":
        _indexes = it.has_containment(starts, ends, indexes)
    else:
        _indexes = it.has_overlaps(starts, ends, indexes)

    if invert:
        _indexes = scdf.index.difference(_indexes)

    if return_indexes:
        return _indexes

    return scdf.reindex(_indexes)
Esempio n. 2
0
def _first_df(scdf, ocdf, how=False, invert=False, n_jobs=1, **kwargs):

    assert how in "containment first".split() + [False, None]
    starts = scdf.Start.values
    ends = scdf.End.values
    indexes = scdf.index.values

    print("n_jobs " * 10)
    print(n_jobs)

    if n_jobs > 1:
        print("deepcopy")
        scdf = scdf.copy(deep=True)

    it = NCLS(ocdf.Start.values, ocdf.End.values, ocdf.index.values)

    if not how:
        _indexes = it.has_overlaps(starts, ends, indexes)
    elif how == "containment":
        _indexes = it.has_containments(starts, ends, indexes)

    if not invert:
        return scdf.reindex(_indexes)
    else:
        return scdf.loc[~scdf.index.isin(_indexes)]


from ncls import NCLS

import pickle
import pandas as pd
import numpy as np

# starts = np.random.randint(0, int(1e8), int(1e3))
starts = np.array(range(100))
ends = starts + 100
ids = starts

ncls = NCLS(starts, ends, ids)

starts2 = np.array([0, 10, 20, 40000], dtype=np.long)
ends2 = np.array([5, 15, 25, 50000], dtype=np.long)
indexes2 = np.array([0, 1, 2, 3], dtype=np.long)


print(starts)
print(ncls.has_overlaps(starts2, ends2, indexes2))

# for i in range(0, 100):
#     for j in ncls.find_overlap_list(i, i + 10):
#         print(j)