def _overlap(scdf, ocdf, kwargs): invert = kwargs["invert"] return_indexes = kwargs.get("return_indexes", False) if scdf.empty or ocdf.empty: return None how = kwargs["how"] assert how in "containment first".split() + [False, None] starts = scdf.Start.values ends = scdf.End.values indexes = scdf.index.values it = NCLS(ocdf.Start.values, ocdf.End.values, ocdf.index.values) if not how: _indexes = it.all_overlaps_self(starts, ends, indexes) elif how == "containment": _indexes = it.has_containment(starts, ends, indexes) else: _indexes = it.has_overlaps(starts, ends, indexes) if invert: _indexes = scdf.index.difference(_indexes) if return_indexes: return _indexes return scdf.reindex(_indexes)
def _first_df(scdf, ocdf, how=False, invert=False, n_jobs=1, **kwargs): assert how in "containment first".split() + [False, None] starts = scdf.Start.values ends = scdf.End.values indexes = scdf.index.values print("n_jobs " * 10) print(n_jobs) if n_jobs > 1: print("deepcopy") scdf = scdf.copy(deep=True) it = NCLS(ocdf.Start.values, ocdf.End.values, ocdf.index.values) if not how: _indexes = it.has_overlaps(starts, ends, indexes) elif how == "containment": _indexes = it.has_containments(starts, ends, indexes) if not invert: return scdf.reindex(_indexes) else: return scdf.loc[~scdf.index.isin(_indexes)]
from ncls import NCLS import pickle import pandas as pd import numpy as np # starts = np.random.randint(0, int(1e8), int(1e3)) starts = np.array(range(100)) ends = starts + 100 ids = starts ncls = NCLS(starts, ends, ids) starts2 = np.array([0, 10, 20, 40000], dtype=np.long) ends2 = np.array([5, 15, 25, 50000], dtype=np.long) indexes2 = np.array([0, 1, 2, 3], dtype=np.long) print(starts) print(ncls.has_overlaps(starts2, ends2, indexes2)) # for i in range(0, 100): # for j in ncls.find_overlap_list(i, i + 10): # print(j)