def at_time(self, start, end=None, sort_diff=True, **window_kwargs): if is_mp(start): start = start.start_date end = start.end_date elif isinstance(start, str) and end is None: mp = parse_date_tz(start) start = mp.start_date end = mp.end_date elif isinstance(start, str) and isinstance(end, str): mp = parse_date_tz(start) start = mp.start_date mp = parse_date_tz(end) end = mp.end_date elif end is None and window_kwargs: end = start elif end is None: raise ValueError("Either a metaperiod, a date string, 2 times, or time + window_kwargs.") self.infer_time() if window_kwargs: start = start - pd.Timedelta(**window_kwargs) end = end + pd.Timedelta(**window_kwargs) if self._start_col is None: res = self[ab_overlap_c(start, end, self[self._time_col])] else: res = self[ab_overlap_cd(self[self._start_col], self[self._end_col], start, end)] if not res.empty and sort_diff: # avg_time = start + (end - start) / 2 # res["sort_score"] = -abs(res[self._time_col] - avg_time) # res = res.sort_values('sort_score').drop('sort_score', axis=1) res["sort_score"] = res[self._time_col] res = res.sort_values("sort_score").drop("sort_score", axis=1) return self.__class__(res)
def load(cls, nrows=None): files = just.glob("~/nostalgia_data/input/mijn_chipkaart/*.csv") data = pd.concat([pd.read_csv(x, sep=";", nrows=nrows) for x in files]) data["Bedrag"] = [float(x.replace(",", ".")) for x in data["Bedrag"]] data["Datum"] = [ parse_date_tz(x + " " + y).start_date for x, y in zip(data.Datum, data["Check-uit"]) ] return cls(data)
def _select_at_day(self, day_or_class): if isinstance(day_or_class, pd.DataFrame): days = day_or_class.time.dt.date.unique() return self.time.dt.date.isin(days) elif isinstance(day_or_class, (list, tuple, set, pd.Series)): return self.time.dt.date.isin(set(day_or_class)) else: mp = parse_date_tz(day_or_class) return (self.time.dt.date >= mp.start_date.date()) & (self.time.dt.date < mp.end_date.date())
def at(self, time_or_place): if isinstance(time_or_place, NDF) and time_or_place.df_name.endswith("places"): return self.when_at(time_or_place) if isinstance(time_or_place, str): mp = parse_date_tz(time_or_place) if mp: start = mp.start_date end = mp.end_date return self.at_time(start, end) else: return self.when_at(get_type_from_registry("places").containing(time_or_place)) raise ValueError("neither time nor place was passed")
def find_entities(sentence): global ts if ts is None: ts = get_ts() mp = parse_date_tz(sentence) try: ents = ts.findall(sentence) except AttributeError: raise AttributeError("No entities have been registered") # remove metaperiod tokens from otherwise matching if mp is not None: wrongs = set() for l, e in mp.spans: wrongs.update(range(l, e)) ents = [x for x in ents if x.start not in wrongs and x.end not in wrongs] ents.append(ResultInfo("MP", "filter", at_time_wrapper(mp), orig_word=" ".join(mp.matches))) return ents
def at(self, time): mp = parse_date_tz(time) return self[self.index.overlaps(pd.Interval(mp.start_date, mp.end_date))]