def _intersection(scdf, ocdf, kwargs): how = kwargs["how"] if ocdf.empty or scdf.empty: return None assert how in "containment first last".split() + [False, None] starts = scdf.Start.values ends = scdf.End.values indexes = scdf.index.values in_dtype = ocdf.Start.dtype oncls = NCLS(ocdf.Start.values, ocdf.End.values, ocdf.index.values) if not how or how is None: _self_indexes, _other_indexes = oncls.all_overlaps_both( starts, ends, indexes) elif how == "containment": _self_indexes, _other_indexes = oncls.all_containments_both( starts, ends, indexes) elif how == "first": _self_indexes, _other_indexes = oncls.first_overlap_both( starts, ends, indexes) elif how == "last": _self_indexes, _other_indexes = oncls.last_overlap_both( starts, ends, indexes) _self_indexes = _self_indexes _other_indexes = _other_indexes scdf, ocdf = scdf.reindex(_self_indexes), ocdf.reindex(_other_indexes) new_starts = pd.Series( np.where(scdf.Start.values > ocdf.Start.values, scdf.Start, ocdf.Start), index=scdf.index, dtype=in_dtype) new_ends = pd.Series( np.where(scdf.End.values < ocdf.End.values, scdf.End, ocdf.End), index=scdf.index, dtype=in_dtype) pd.options.mode.chained_assignment = None # default='warn' scdf.loc[:, "Start"] = new_starts scdf.loc[:, "End"] = new_ends pd.options.mode.chained_assignment = 'warn' if not scdf.empty: return scdf else: return None
def _both_indexes(scdf, ocdf, how=False): assert (how in "containment first last outer right left".split() + [False, None]) or isinstance(how, int) starts = scdf.Start.values ends = scdf.End.values indexes = scdf.index.values it = NCLS(ocdf.Start.values, ocdf.End.values, ocdf.index.values) if not how: _self_indexes, _other_indexes = it.all_overlaps_both( starts, ends, indexes) elif how == "containment": _self_indexes, _other_indexes = it.all_containments_both( starts, ends, indexes) elif how == "first": _self_indexes, _other_indexes = it.first_overlap_both( starts, ends, indexes) elif how == "last": _self_indexes, _other_indexes = it.last_overlap_both( starts, ends, indexes) six = scdf.index oix = ocdf.index elif how in ["outer", "left", "right"]: _self_indexes, _other_indexes = it.all_overlaps_both( starts, ends, indexes) missing_in_s = scdf.index.difference(_self_indexes) missing_in_o = ocdf.index.difference(_other_indexes) filler_s = np.ones(len(missing_in_o), dtype=int) * -1 filler_o = np.ones(len(missing_in_s), dtype=int) * -1 if how == "outer": _self_indexes = np.concatenate( [_self_indexes, missing_in_s, filler_s]) _other_indexes = np.concatenate( [_other_indexes, filler_o, missing_in_o]) elif how == "left": _self_indexes = np.concatenate([_self_indexes, missing_in_s]) _other_indexes = np.concatenate([_other_indexes, filler_o]) elif how == "right": _self_indexes = np.concatenate([_self_indexes, filler_s]) _other_indexes = np.concatenate([_other_indexes, missing_in_o]) return _self_indexes, _other_indexes
def _both_indexes(scdf, ocdf, how=False): assert (how in "containment first last".split() + [False, None]) or isinstance(how, int) starts = scdf.Start.values ends = scdf.End.values indexes = scdf.index.values it = NCLS(ocdf.Start.values, ocdf.End.values, ocdf.index.values) if not how: _self_indexes, _other_indexes = it.all_overlaps_both( starts, ends, indexes) elif how == "containment": _self_indexes, _other_indexes = it.all_containments_both( starts, ends, indexes) elif how == "first": _self_indexes, _other_indexes = it.first_overlap_both( starts, ends, indexes) elif how == "last": _self_indexes, _other_indexes = it.last_overlap_both( starts, ends, indexes) return _self_indexes, _other_indexes