コード例 #1
0
ファイル: ingredients.py プロジェクト: juiceinc/recipe
    def __init__(self, **kwargs):
        self.id = kwargs.pop("id", uuid4().hex[:12])
        self.columns = kwargs.pop("columns", [])
        self.filters = kwargs.pop("filters", [])
        self.havings = kwargs.pop("havings", [])
        self.group_by = kwargs.pop("group_by", [])
        self.formatters = kwargs.pop("formatters", [])
        self.quickselects = kwargs.pop("quickselects", [])
        self.column_suffixes = kwargs.pop("column_suffixes", None)
        self.cache_context = kwargs.pop("cache_context", "")
        self.datatype = kwargs.pop("datatype", None)
        self.datatype_by_role = kwargs.pop("datatype_by_role", dict())
        self.anonymize = False
        self.roles = {}
        self._labels = []
        self.error = kwargs.pop("error", None)

        # What order should this be in
        self.ordering = kwargs.pop("ordering", "asc")
        self.group_by_strategy = kwargs.pop("group_by_strategy", "labels")

        if not isinstance(self.formatters, (list, tuple)):
            raise BadIngredient(
                "formatters passed to an ingredient must be a list or tuple"
            )
        # If explicit suffixes are passed in, there must be one for each column
        if self.column_suffixes is not None and len(self.column_suffixes) != len(
            self.columns
        ):
            raise BadIngredient("column_suffixes must be the same length as columns")

        # Any remaining passed properties are available in self.meta
        self.meta = AttrDict(kwargs)
コード例 #2
0
def ingredient_from_unvalidated_dict(unvalidated_ingr, selectable):
    try:
        ingr_dict = normalize_schema(ingredient_schema,
                                     unvalidated_ingr,
                                     allow_unknown=True)
    except E.SureError as e:
        raise BadIngredient(str(e))
    return create_ingredient_from_config(ingr_dict, selectable)
コード例 #3
0
def parse_unvalidated_condition(cond, selectable):
    if cond is None:
        return
    try:
        cond = normalize_schema(condition_schema, cond, allow_unknown=False)
    except E.SureError as e:
        raise BadIngredient(str(e))
    return parse_validated_condition(cond, selectable)
コード例 #4
0
def parse_unvalidated_field(unvalidated_fld, selectable, aggregated=True):
    kind = "Metric" if aggregated else "Dimension"
    ingr = {"field": unvalidated_fld, "kind": kind}
    try:
        ingr_dict = normalize_schema(ingredient_schema,
                                     ingr,
                                     allow_unknown=True)
    except E.SureError as e:
        raise BadIngredient(str(e))
    return parse_validated_field(ingr_dict["field"], selectable)
コード例 #5
0
def create_ingredient_from_config(ingr_dict, selectable):
    """Create an ingredient from a validated config object."""
    kind = ingr_dict.pop("kind", "metric")
    IngredientClass = ingredient_class_for_name(kind.title())

    if IngredientClass is None:
        raise BadIngredient("Unknown ingredient kind")

    field_defn = ingr_dict.pop("field", None)
    divide_by_defn = ingr_dict.pop("divide_by", None)

    field = parse_validated_field(field_defn,
                                  selectable,
                                  use_bucket_labels=True)
    if isinstance(field_defn, dict) and "buckets" in field_defn:
        ingr_dict["order_by_expression"] = parse_validated_field(
            field_defn, selectable, use_bucket_labels=False)

    if divide_by_defn is not None:
        # Perform a divide by zero safe division
        divide_by = parse_validated_field(divide_by_defn, selectable)
        field = cast(field, Float) / (
            func.coalesce(cast(divide_by, Float), 0.0) + SAFE_DIVISON_EPSILON)

    quickselects = ingr_dict.pop("quickselects", None)
    parsed_quickselects = []
    if quickselects:
        for qf in quickselects:
            parsed_quickselects.append({
                "name":
                qf["name"],
                "condition":
                parse_validated_condition(qf.get("condition", None),
                                          selectable),
            })
    ingr_dict["quickselects"] = parsed_quickselects

    args = [field]
    # Each extra field contains a name and a field
    for extra in ingr_dict.pop("extra_fields", []):
        ingr_dict[extra.get("name")] = parse_validated_field(
            extra.get("field"), selectable)

    try:
        return IngredientClass(*args, **ingr_dict)
    except BadIngredient as e:
        error = {
            "type": "bad_ingredient",
            "extra": {
                "details": str(e),
            },
        }
        return InvalidIngredient(error=error)
コード例 #6
0
ファイル: ingredients.py プロジェクト: juiceinc/recipe
    def make_column_suffixes(self):
        """Make sure we have the right column suffixes. These will be appended
        to `id` when generating the query.

        Developers note: These are generated when the query runs because the
        recipe may be run with anonymization on or off, which will inject
        a formatter.
        """
        if self.column_suffixes:
            return self.column_suffixes

        if len(self.columns) == 0:
            return ()

        elif len(self.columns) == 1:
            if self.formatters:
                return ("_raw",)
            else:
                return ("",)
        else:
            raise BadIngredient(
                "column_suffixes must be supplied if there is " "more than one column"
            )
コード例 #7
0
ファイル: ingredients.py プロジェクト: juiceinc/recipe
    def __init__(self, expression, **kwargs):
        super(Dimension, self).__init__(**kwargs)
        if self.datatype is None:
            self.datatype = datatype_from_column_expression(expression)

        # We must always have a value role
        self.roles = {"value": expression}

        for k, v in kwargs.items():
            role = None
            if k.endswith("_expression"):
                # Remove _expression to get the role
                role = k[:-11]
            if role:
                if role == "raw":
                    raise BadIngredient("raw is a reserved role in dimensions")
                self.roles[role] = v

        if not self.datatype_by_role:
            for k, expr in self.roles.items():
                self.datatype_by_role[k] = datatype_from_column_expression(expr)

        self.columns = []
        self._group_by = []
        self.role_keys = []
        if "id" in self.roles:
            self.columns.append(self.roles["id"])
            self._group_by.append(self.roles["id"])
            self.role_keys.append("id")
        if "value" in self.roles:
            self.columns.append(self.roles["value"])
            self._group_by.append(self.roles["value"])
            self.role_keys.append("value")

        # Add all the other columns in sorted order of role
        # with order_by coming last
        # For instance, if the following are passed
        # expression, id_expression, order_by_expresion, zed_expression the order of
        # columns would be "id", "value", "zed", "order_by"
        # When using group_bys for ordering we put them in reverse order.
        ordered_roles = [
            k for k in sorted(self.roles.keys()) if k not in ("id", "value")
        ]
        # Move order_by to the end
        if "order_by" in ordered_roles:
            ordered_roles.remove("order_by")
            ordered_roles.append("order_by")

        for k in ordered_roles:
            self.columns.append(self.roles[k])
            self._group_by.append(self.roles[k])
            self.role_keys.append(k)

        if "lookup" in kwargs:
            self.lookup = kwargs.get("lookup")
            if not isinstance(self.lookup, dict):
                raise BadIngredient("lookup must be a dictionary")
            # Inject a formatter that performs the lookup
            if "lookup_default" in kwargs:
                self.lookup_default = kwargs.get("lookup_default")
                self.formatters.insert(
                    0, lambda value: self.lookup.get(value, self.lookup_default)
                )
            else:
                self.formatters.insert(0, lambda value: self.lookup.get(value, value))
コード例 #8
0
def create_ingredient_from_parsed(ingr_dict, builder, debug=False):
    """Create an ingredient from config version 2 object ."""
    kind = ingr_dict.pop("kind", "metric")
    IngredientClass = ingredient_class_for_name(kind.title())
    if IngredientClass is None:
        raise BadIngredient(f"Unknown ingredient kind {kind}")

    args = []

    # For some formats, we will automatically convert dates
    format = ingr_dict.get("format")
    if isinstance(format,
                  str) and format.startswith("<") and format.endswith(">"):
        format = format[1:-1]
    convert_dates_lookup = {"%Y": "year_conv", "%B %Y": "month_conv"}
    convert_dates_with = convert_dates_lookup.get(format)
    convert_datetimes_lookup = {
        "%Y": "dt_year_conv",
        "%B %Y": "dt_month_conv",
        "%B %-d, %Y": "dt_day_conv",
        "%-d %b %Y": "dt_day_conv",
        "%-m/%-d/%Y": "dt_day_conv",
        "%B %-d, %Y": "dt_day_conv",
        "%-m-%-d-%Y": "dt_day_conv",
    }
    convert_datetimes_with = convert_datetimes_lookup.get(format)

    if builder.drivername.startswith("mssql"):
        # SQLServer can not use aliases in group bys and also
        # does not support date/time conversions due to an issue with pyodbc
        # parameters in queries
        # https://github.com/mkleehammer/pyodbc/issues/479
        default_group_by_strategy = "direct"
    else:
        default_group_by_strategy = "labels"

    try:
        if kind in ("metric", "dimension"):
            if kind == "metric":
                fld_defn = ingr_dict.pop("field", None)
                # SQLAlchemy ingredient with required aggregation
                expr, datatype = builder.parse(
                    fld_defn,
                    enforce_aggregation=True,
                    debug=debug,
                    convert_dates_with=convert_dates_with,
                    convert_datetimes_with=convert_datetimes_with,
                )
                # Save the data type in the ingredient
                ingr_dict["datatype"] = datatype
                if datatype != "num":
                    error = {
                        "type": "Can not parse field",
                        "extra": {
                            "details": "A string can not be aggregated"
                        },
                    }
                    return InvalidIngredient(error=error)
                args = [expr]
            else:
                ingr_dict["group_by_strategy"] = ingr_dict.get(
                    "group_by_strategy", default_group_by_strategy)
                fld_defn = ingr_dict.pop("field", None)
                buckets = ingr_dict.pop("buckets", None)
                buckets_default_label = ingr_dict.pop("buckets_default_label",
                                                      None)
                if buckets:
                    fld_defn, order_by_fld = _convert_bucket_to_field(
                        fld_defn, buckets, buckets_default_label, builder)
                    if "extra_fields" not in ingr_dict:
                        ingr_dict["extra_fields"] = []
                    ingr_dict["extra_fields"].append({
                        "name": "order_by_expression",
                        "field": order_by_fld
                    })
                expr, datatype = builder.parse(
                    fld_defn,
                    forbid_aggregation=True,
                    debug=debug,
                    convert_dates_with=convert_dates_with,
                    convert_datetimes_with=convert_datetimes_with,
                )
                # Save the data type in the ingredient
                ingr_dict["datatype"] = datatype
                args = [expr]

                # Convert extra fields to sqlalchemy expressions and add them directly to
                # the kwargs, saving datatypes
                datatype_by_role = {"value": datatype}
                for extra in ingr_dict.pop("extra_fields", []):
                    raw_role = extra.get("name")
                    if raw_role.endswith("_expression"):
                        # Remove _expression to get the role
                        role = raw_role[:-11]
                    else:
                        role = raw_role

                    expr, datatype = builder.parse(
                        extra.get("field"),
                        forbid_aggregation=True,
                        debug=debug,
                        convert_dates_with=convert_dates_with,
                        convert_datetimes_with=convert_datetimes_with,
                    )
                    datatype_by_role[role] = datatype
                    ingr_dict[raw_role] = expr
                ingr_dict["datatype_by_role"] = datatype_by_role

            parsed_quickselects = []
            for qs in ingr_dict.pop("quickselects", []):
                condition_defn = qs.get("condition")
                expr, _ = builder.parse(
                    condition_defn,
                    forbid_aggregation=True,
                    debug=debug,
                    convert_dates_with=convert_dates_with,
                    convert_datetimes_with=convert_datetimes_with,
                )
                parsed_quickselects.append({
                    "name": qs["name"],
                    "condition": expr
                })
            ingr_dict["quickselects"] = parsed_quickselects

        elif kind == "filter":
            condition_defn = ingr_dict.get("condition")
            expr, _ = builder.parse(
                condition_defn,
                forbid_aggregation=True,
                debug=debug,
                convert_dates_with=convert_dates_with,
                convert_datetimes_with=convert_datetimes_with,
            )
            args = [expr]
        elif kind == "having":
            condition_defn = ingr_dict.get("condition")
            expr, _ = builder.parse(
                condition_defn,
                forbid_aggregation=False,
                debug=debug,
                convert_dates_with=convert_dates_with,
                convert_datetimes_with=convert_datetimes_with,
            )
            args = [expr]

    except (GrammarError, LarkError) as e:
        error_msg = str(e)
        if "Expecting:" in error_msg:
            error_msg = error_msg.split("Expecting:")[0]

        error = {
            "type": "Can not parse field",
            "extra": {
                "details": error_msg
            }
        }
        return InvalidIngredient(error=error)

    try:
        return IngredientClass(*args, **ingr_dict)
    except BadIngredient as e:
        # Some internal error while running the Ingredient constructor
        error = {"type": "bad_ingredient", "extra": {"details": str(e)}}
        return InvalidIngredient(error=error)
コード例 #9
0
    def brew_query_parts(self, order_by_keys=[]):
        """Make columns, group_bys, filters, havings"""
        columns, group_bys, filters, havings = [], [], set(), set()
        order_by_keys = list(order_by_keys)
        all_filters = set()

        for ingredient in self.ingredients():
            if ingredient.error:
                error_type = ingredient.error.get("type")
                if error_type == "invalid_column":
                    extra = ingredient.error.get("extra", {})
                    column_name = extra.get("column_name")
                    ingredient_name = extra.get("ingredient_name")
                    error_msg = 'Invalid column "{0}" in ingredient "{1}"'.format(
                        column_name, ingredient_name)
                    raise InvalidColumnError(error_msg,
                                             column_name=column_name)
                raise BadIngredient(str(ingredient.error))
            if ingredient.query_columns:
                columns.extend(ingredient.query_columns)
            if ingredient.group_by:
                group_bys.extend(ingredient.group_by)
            if ingredient.filters:
                # Ensure we don't add duplicate filters
                for new_f in ingredient.filters:
                    from recipe.utils import filter_to_string

                    new_f_str = filter_to_string(new_f)
                    if new_f_str not in all_filters:
                        filters.add(new_f)
                        all_filters.add(new_f_str)
            if ingredient.havings:
                havings.update(ingredient.havings)

            # If there is an order_by key on one of the ingredients, make sure
            # the recipe orders by this ingredient
            if "order_by" in ingredient.roles:
                if (ingredient.id not in order_by_keys
                        and "-" + ingredient.id not in order_by_keys):
                    if ingredient.ordering == "desc":
                        order_by_keys.append("-" + ingredient.id)
                    else:
                        order_by_keys.append(ingredient.id)

        order_bys = OrderedDict()
        for key in order_by_keys:
            try:
                ingr = self.find(key, (Dimension, Metric))
                for c in ingr.order_by_columns:
                    # Avoid duplicate order by columns
                    if str(c) not in [str(o) for o in order_bys]:
                        order_bys[c] = None
            except BadRecipe as e:
                # Ignore order_by if the dimension/metric is not used.
                # TODO: Add structlog warning
                pass

        return {
            "columns": columns,
            "group_bys": group_bys,
            "filters": filters,
            "havings": havings,
            "order_bys": list(order_bys.keys()),
        }
コード例 #10
0
    def from_config(
        cls,
        obj,
        selectable,
        ingredient_constructor=ingredient_from_validated_dict,
        metadata=None,
    ):
        """Create a shelf using a dict shelf definition.

        :param obj: A Python dictionary describing a Shelf.
        :param selectable: A SQLAlchemy Table, a Recipe, a table name, or a
            SQLAlchemy join to select from.
        :param metadata: If `selectable` is passed as a table name, then in
            order to introspect its schema, we must have the SQLAlchemy
            MetaData object to associate it with.
        :return: A shelf that contains the ingredients defined in obj.
        """
        from recipe import Recipe

        if isinstance(selectable, Recipe):
            selectable = selectable.subquery()
        elif isinstance(selectable, str):
            if "." in selectable:
                schema, tablename = selectable.split(".")
            else:
                schema, tablename = None, selectable

            selectable = Table(tablename,
                               metadata,
                               schema=schema,
                               extend_existing=True,
                               autoload=True)

        try:
            validated_shelf = normalize_schema(shelf_schema,
                                               obj,
                                               allow_unknown=True)
        except E.SureError as e:
            raise BadIngredient(str(e))
        d = {}
        builder = None

        for k, v in validated_shelf.items():
            if ingredient_constructor == ingredient_from_validated_dict:
                version = str(v.get("_version", "1"))
                if version == "1":
                    d[k] = ingredient_constructor(v, selectable)
                else:
                    if builder is None:
                        builder = SQLAlchemyBuilder.get_builder(
                            selectable=selectable)
                    d[k] = ingredient_constructor(v,
                                                  selectable,
                                                  builder=builder)
            else:
                d[k] = ingredient_constructor(v, selectable)

            if isinstance(d[k], InvalidIngredient):
                if not d[k].error.get("extra"):
                    d[k].error["extra"] = {}
                d[k].error["extra"]["ingredient_name"] = k
        shelf = cls(d, select_from=selectable)

        return shelf