def handle_missing(self, fltr): if self.cfg is None or not self.cfg.get("missing", False): return fltr return { "missing": True, "query": "{col} != {col}".format(col=build_col_key(self.column)), }
def build_filter(self): if self.cfg is None: return super(DateFilter, self).handle_missing(None) start, end = (self.cfg.get(p) for p in ["start", "end"]) fltr = dict(start=start, end=end) queries = [] if start: queries.append("{} >= '{}'".format(build_col_key(self.column), start)) if end: queries.append("{} <= '{}'".format(build_col_key(self.column), end)) if len(queries) == 2 and start == end: queries = ["{} == '{}'".format(build_col_key(self.column), start)] if not len(queries): return super(DateFilter, self).handle_missing(None) fltr["query"] = " and ".join(queries) return super(DateFilter, self).handle_missing(fltr)
def build_filter(self): if self.cfg is None: return super(NumericFilter, self).handle_missing(None) cfg_val, cfg_operand, cfg_min, cfg_max = ( self.cfg.get(p) for p in ["value", "operand", "min", "max"]) if cfg_operand in ["=", "ne"]: state = make_list(cfg_val or []) if not len(state): return super(NumericFilter, self).handle_missing(None) fltr = dict(value=cfg_val, operand=cfg_operand) if len(state) == 1: fltr["query"] = "{} {} {}".format( build_col_key(self.column), "==" if cfg_operand == "=" else "!=", state[0], ) else: fltr["query"] = "{} {} ({})".format( build_col_key(self.column), "in" if cfg_operand == "=" else "not in", ", ".join(map(str, state)), ) return super(NumericFilter, self).handle_missing(fltr) if cfg_operand in ["<", ">", "<=", ">="]: if cfg_val is None: return super(NumericFilter, self).handle_missing(None) fltr = dict( value=cfg_val, operand=cfg_operand, query="{} {} {}".format(build_col_key(self.column), cfg_operand, cfg_val), ) return super(NumericFilter, self).handle_missing(fltr) if cfg_operand in ["[]", "()"]: fltr = dict(operand=cfg_operand) queries = [] if cfg_min is not None: fltr["min"] = cfg_min queries.append("{} >{} {}".format( build_col_key(self.column), "=" if cfg_operand == "[]" else "", cfg_min, )) if cfg_max is not None: fltr["max"] = cfg_max queries.append("{} <{} {}".format( build_col_key(self.column), "=" if cfg_operand == "[]" else "", cfg_max, )) if len(queries) == 2 and cfg_max == cfg_min: queries = [ "{} == {}".format(build_col_key(self.column), cfg_max) ] if not len(queries): return super(NumericFilter, self).handle_missing(None) fltr["query"] = " and ".join(queries) return super(NumericFilter, self).handle_missing(fltr) return super(NumericFilter, self).handle_missing(None)
def build_filter(self): if self.cfg is None or not len(self.cfg.get("value", [])): return super(StringFilter, self).handle_missing(None) state = self.cfg.get("value", []) operand = self.cfg.get("operand", "=") fltr = dict(value=state) if len(state) == 1: val_str = ("'{}'" if self.classification == "S" else "{}").format( state[0]) fltr["query"] = "{} {} {}".format(build_col_key(self.column), "==" if operand == "=" else "!=", val_str) else: val_str = ("'{}'".format("', '".join(state)) if self.classification == "S" else ",".join(state)) fltr["query"] = "{} {} ({})".format( build_col_key(self.column), "in" if operand == "=" else "not in", val_str, ) return super(StringFilter, self).handle_missing(fltr)
def group_filter_handler(col_def, group_val, group_classifier): col_def_segs = col_def.split("|") if len(col_def_segs) > 1: col, freq = col_def_segs if group_val == "nan": return "{col} != {col}".format( col=build_col_key(col)), "{}: NaN".format(col) if freq == "WD": return ( "{}.dt.dayofweek == {}".format(build_col_key(col), group_val), "{}.dt.dayofweek: {}".format(col, group_val), ) elif freq == "H2": return ( "{}.dt.hour == {}".format(build_col_key(col), group_val), "{}.dt.hour: {}".format(col, group_val), ) elif freq == "H": ts_val = pd.Timestamp(group_val) day = ts_val.strftime("%Y%m%d") hour = ts_val.hour return ( "{col}.dt.date == '{day}' and {col}.dt.hour == {hour}".format( col=build_col_key(col), day=day, hour=hour), "{col}.dt.date: {day}, {col}.dt.hour: {hour}".format( col=col, day=day, hour=hour), ) elif freq == "D": ts_val = convert_date_val_to_date(group_val) day = ts_val.strftime("%Y%m%d") return ( "{col}.dt.date == '{day}'".format(col=build_col_key(col), day=day), "{}.dt.date: {}".format(col, day), ) elif freq == "W": ts_val = convert_date_val_to_date(group_val) return ( "{col}.dt.year == {year} and {col}.dt.week == {week}".format( col=build_col_key(col), year=ts_val.year, week=ts_val.week), "{col}.dt.year: {year}, {col}.dt.week: {week}".format( col=col, year=ts_val.year, week=ts_val.week), ) elif freq == "M": ts_val = convert_date_val_to_date(group_val) return ( "{col}.dt.year == {year} and {col}.dt.month == {month}".format( col=build_col_key(col), year=ts_val.year, month=ts_val.month), "{col}.dt.year: {year}, {col}.dt.month: {month}".format( col=col, year=ts_val.year, month=ts_val.month), ) elif freq == "Q": ts_val = convert_date_val_to_date(group_val) return ( "{col}.dt.year == {year} and {col}.dt.quarter == {quarter}". format(col=build_col_key(col), year=ts_val.year, quarter=ts_val.quarter), "{col}.dt.year: {year}, {col}.dt.quarter: {quarter}".format( col=col, year=ts_val.year, quarter=ts_val.quarter), ) elif freq == "Y": ts_val = convert_date_val_to_date(group_val) return ( "{col}.dt.year == {year}".format(col=build_col_key(col), year=ts_val.year), "{}.dt.year: {}".format(col, ts_val.year), ) if group_val == "nan": return "{col} != {col}".format( col=build_col_key(col_def)), "{}: NaN".format(col_def) if group_classifier in ["I", "F", "B"]: return ( "{col} == {val}".format(col=build_col_key(col_def), val=group_val), "{}: {}".format(col_def, group_val), ) if group_classifier == "D": group_val = convert_date_val_to_date(group_val).strftime("%Y%m%d") return ( "{col} == '{val}'".format(col=build_col_key(col_def), val=group_val), "{}: {}".format(col_def, group_val), )
def build_filter(self): if self.cfg is None: return super(StringFilter, self).handle_missing(None) action = self.cfg.get("action", "equals") if action == "equals" and not len(self.cfg.get("value", [])): return super(StringFilter, self).handle_missing(None) elif action != "equals" and not self.cfg.get("raw"): return super(StringFilter, self).handle_missing(None) state = self.cfg.get("value", []) case_sensitive = self.cfg.get("caseSensitive", False) operand = self.cfg.get("operand", "=") raw = self.cfg.get("raw") fltr = dict( value=state, operand=operand, caseSensitive=case_sensitive, action=action, raw=raw, ) if action == "equals": if len(state) == 1: val_str = ("'{}'" if self.classification == "S" else "{}").format(state[0]) fltr["query"] = "{} {} {}".format( build_col_key(self.column), "==" if operand == "=" else "!=", val_str, ) else: val_str = ("'{}'".format("', '".join(state)) if self.classification == "S" else ",".join(state)) fltr["query"] = "{} {} ({})".format( build_col_key(self.column), "in" if operand == "=" else "not in", val_str, ) elif action in ["startswith", "endswith"]: case_insensitive_conversion = "" if case_sensitive else ".str.lower()" fltr["query"] = "{}{}.str.{}('{}', na=False)".format( build_col_key(self.column), case_insensitive_conversion, action, raw if case_sensitive else raw.lower(), ) fltr["query"] = handle_ne(fltr["query"], operand) elif action == "contains": fltr["query"] = "{}.str.contains('{}', na=False, case={})".format( build_col_key(self.column), raw, "True" if case_sensitive else "False", ) fltr["query"] = handle_ne(fltr["query"], operand) elif action == "length": if "," in raw: start, end = raw.split(",") fltr["query"] = "{start} <= {col}.str.len() <= {end}".format( col=build_col_key(self.column), start=start, end=end, ) else: fltr["query"] = "{}.str.len() == {}".format( build_col_key(self.column), raw) fltr["query"] = handle_ne(fltr["query"], operand) return super(StringFilter, self).handle_missing(fltr)