def _like_filter(df, q): assert_len(q, 3) op, column, raw_expr = q if not is_quoted(raw_expr): raise_malformed("like expects a quoted string as second argument", q) regexp = unquote(raw_expr) if not regexp.startswith('%'): regexp = '^' + regexp else: regexp = regexp[1:] if not regexp.endswith('%'): regexp += '$' else: regexp = regexp[:-1] # 'like' is case sensitive, 'ilike' is case insensitive case = op == 'like' try: return df[column].str.contains(regexp, case=case) except AttributeError: raise_malformed("Invalid column type for (i)like", q)
def _prepare_arg(df, arg): if isinstance(arg, basestring): if is_quoted(arg): return unquote(arg) return getattr(df, arg) return arg
def _add_stand_in_columns(df, stand_in_columns): if not stand_in_columns: return df for column_name, stand_in_value in stand_in_columns: if column_name not in df: if stand_in_value in df: df.loc[:, column_name] = df[stand_in_value] else: dtype = _get_dtype(stand_in_value) stand_in_value = unquote(stand_in_value) arr = numpy.full(len(df), stand_in_value, dtype=dtype) df.loc[:, column_name] = pandas.Series(arr, index=df.index)