예제 #1
0
    def _transform_widget(self, widget, dataframe, dimensions, references,
                          operations):
        display_schema = self.widget_group.slicer.manager.display_schema(
            metrics=widget.metrics,
            dimensions=dimensions,
            references=references,
            operations=operations,
        )

        # Temporary fix to enable operations to get output properly. Can removed when the Fireant API is refactored.
        operation_columns = [
            '{}_{}'.format(operation.metric_key, operation.key)
            for operation in operations if operation.key != Totals.key
        ]

        columns = utils.flatten(widget.metrics) + operation_columns

        if references:
            # This escapes a pandas bug where a data frame subset of columns still returns the columns of the
            # original data frame
            reference_keys = [''] + [ref.key for ref in references]
            subset_columns = pd.MultiIndex.from_product(
                [reference_keys, columns])
            subset = pd.DataFrame(dataframe[subset_columns],
                                  columns=subset_columns)

        else:
            subset = dataframe[columns]

        return widget.transformer.transform(subset, display_schema)
예제 #2
0
    def prevalidate_request(self, slicer, metrics, dimensions, metric_filters,
                            dimension_filters, references, operations):
        """
        Ensure no references or operations are passed and that there is no more than one enabled metric
        """

        if len(references) > 0 or len(operations) > 0:
            raise TransformationException(
                'References and Operations cannot be used with '
                '{} charts'.format(self.chart_type))

        if len(utils.flatten(metrics)) > 1:
            raise TransformationException(
                'Only one metric can be specified when using '
                '{} charts'.format(self.chart_type))
예제 #3
0
 def _schema(self,
             dimensions=None,
             metric_filters=None,
             dimension_filters=None,
             references=None,
             operations=None,
             pagination=None):
     return dict(metrics=[
         metric for widget in self.widget_group.widgets
         for metric in utils.flatten(widget.metrics)
     ],
                 dimensions=dimensions,
                 metric_filters=self.widget_group.metric_filters +
                 (metric_filters or []),
                 dimension_filters=self.widget_group.dimension_filters +
                 (dimension_filters or []),
                 references=references,
                 operations=operations,
                 pagination=pagination)
예제 #4
0
    def get_query(self,
                  metrics=(),
                  dimensions=(),
                  metric_filters=(),
                  dimension_filters=(),
                  references=(),
                  operations=(),
                  pagination=None):
        """ Returns the PyPika query object after building the query from the given params """
        metrics = utils.filter_duplicates(utils.flatten(metrics))
        dimensions = utils.filter_duplicates(dimensions)

        query_schema = self.data_query_schema(
            metrics=metrics,
            dimensions=dimensions,
            metric_filters=metric_filters,
            dimension_filters=dimension_filters,
            references=references,
            operations=operations,
            pagination=pagination)

        return self._build_data_query(**query_schema)
예제 #5
0
    def _select_dimensions(query, dimensions, rollup):
        dims = [
            dimension.as_(key) for key, dimension in dimensions.items()
            if key not in chain(*rollup)
        ]
        if dims:
            query = query.select(*dims).groupby(*dims)

        # Rollup is passed in as a list of lists so that multiple columns can be rolled up together (such as for
        # Unique dimensions)
        rollup_dims = [[
            dimension.as_(dimension_key)
            for dimension_key, dimension in dimensions.items()
            if dimension_key in keys
        ] for keys in rollup]

        # Remove entry levels
        flattened_rollup_dims = utils.flatten(rollup_dims)

        if flattened_rollup_dims:
            query = query.select(*flattened_rollup_dims).rollup(*rollup_dims)

        return query
예제 #6
0
def make_slicer_query(
        database: Database,
        base_table: Table,
        joins: Iterable[Join] = (),
        dimensions: Iterable[Field] = (),
        metrics: Iterable[Field] = (),
        filters: Iterable[Filter] = (),
        orders: Iterable = (),
):
    """
    Creates a pypika/SQL query from a list of slicer elements.

    This is the base implementation shared by two implementations: the query to fetch data for a slicer request and
    the query to fetch choices for dimensions.

    This function only handles dimensions (select+group by) and filtering (where/having), which is everything needed
    for the query to fetch choices for dimensions.

    The slicer query extends this with metrics, references, and totals.

    :param database:

    :param base_table:
        pypika.Table - The base table of the query, the one in the FROM clause
    :param joins:
        A collection of joins available in the slicer. This should include all slicer joins. Only joins required for
        the query will be used.
    :param dimensions:
        A collection of dimensions to use in the query.
    :param metrics:
        A collection of metrics to use in the query.
    :param filters:
        A collection of filters to apply to the query.
    :param orders:
        A collection of orders as tuples of the metric/dimension to order by and the direction to order in.

    :return:
    """
    query = database.query_cls.from_(base_table, immutable=False)
    elements = flatten([metrics, dimensions, filters])

    # Add joins
    join_tables_needed_for_query = find_required_tables_to_join(
        elements, base_table)

    for join in find_joins_for_tables(joins, base_table,
                                      join_tables_needed_for_query):
        query = query.join(join.table, how=join.join_type).on(join.criterion)

    # Add dimensions
    for dimension in dimensions:
        dimension_term = make_term_for_field(dimension, database.trunc_date)
        query = query.select(dimension_term)
        if not isinstance(dimension, Rollup):
            query = query.groupby(dimension_term)

    # Add filters
    for fltr in filters:
        query = (query.having(fltr.definition)
                 if fltr.is_aggregate else query.where(fltr.definition))

    # Add metrics
    metric_terms = [make_term_for_field(metric) for metric in metrics]
    if metric_terms:
        query = query.select(*metric_terms)

    # In the case that the orders are determined by a field that is not selected as a metric or dimension, then it needs
    # to be added to the query.
    select_aliases = {el.alias for el in query._selects}
    for (orderby_field, orientation) in orders:
        orderby_term = make_term_for_field(orderby_field)
        query = query.orderby(orderby_term, order=orientation)

        if orderby_term.alias not in select_aliases:
            query = query.select(orderby_term)

    return query
예제 #7
0
def make_slicer_query(database: Database,
                      base_table: Table,
                      joins: Iterable[Join] = (),
                      dimensions: Iterable[Dimension] = (),
                      metrics: Iterable[Metric] = (),
                      filters: Iterable[Filter] = (),
                      orders: Iterable = ()):
    """
    Creates a pypika/SQL query from a list of slicer elements.

    This is the base implementation shared by two implementations: the query to fetch data for a slicer request and
    the query to fetch choices for dimensions.

    This function only handles dimensions (select+group by) and filtering (where/having), which is everything needed
    for the query to fetch choices for dimensions.

    The slicer query extends this with metrics, references, and totals.

    :param database:

    :param base_table:
        pypika.Table - The base table of the query, the one in the FROM clause
    :param joins:
        A collection of joins available in the slicer. This should include all slicer joins. Only joins required for
        the query will be used.
    :param dimensions:
        A collection of dimensions to use in the query.
    :param metrics:
        A collection of metircs to use in the query.
    :param filters:
        A collection of filters to apply to the query.
    :param orders:
        A collection of orders as tuples of the metric/dimension to order by and the direction to order in.

    :return:
    """
    query = database.query_cls.from_(base_table)
    elements = flatten([metrics, dimensions, filters])

    # Add joins
    join_tables_needed_for_query = find_required_tables_to_join(elements, base_table)
    for join in find_joins_for_tables(joins, base_table, join_tables_needed_for_query):
        query = query.join(join.table, how=join.join_type).on(join.criterion)

    # Add dimensions
    for dimension in dimensions:
        terms = make_terms_for_dimension(dimension, database.trunc_date)
        query = query.select(*terms)
        # Don't group TotalsDimensions
        if not isinstance(dimension, TotalsDimension):
            query = query.groupby(*terms)

    # Add filters
    for filter_ in filters:
        query = query.where(filter_.definition) \
            if isinstance(filter_, DimensionFilter) \
            else query.having(filter_.definition)

    # Add metrics
    terms = make_terms_for_metrics(metrics)
    if terms:
        query = query.select(*terms)

    # Get the aliases for selected elements so missing ones can be included in the query if they are used for sorting
    select_aliases = {el.alias for el in query._selects}
    for (term, orientation) in orders:
        query = query.orderby(term, order=orientation)

        if term.alias not in select_aliases:
            query = query.select(term)

    return query
예제 #8
0
def make_slicer_query(database: Database,
                      base_table: Table,
                      joins: Iterable[Join] = (),
                      dimensions: Iterable[Dimension] = (),
                      metrics: Iterable[Metric] = (),
                      filters: Iterable[Filter] = (),
                      orders: Iterable = ()):
    """
    Creates a pypika/SQL query from a list of slicer elements.

    This is the base implementation shared by two implementations: the query to fetch data for a slicer request and
    the query to fetch choices for dimensions.

    This function only handles dimensions (select+group by) and filtering (where/having), which is everything needed
    for the query to fetch choices for dimensions.

    The slicer query extends this with metrics, references, and totals.

    :param database:

    :param base_table:
        pypika.Table - The base table of the query, the one in the FROM clause
    :param joins:
        A collection of joins available in the slicer. This should include all slicer joins. Only joins required for
        the query will be used.
    :param dimensions:
        A collection of dimensions to use in the query.
    :param metrics:
        A collection of metircs to use in the query.
    :param filters:
        A collection of filters to apply to the query.
    :param orders:
        A collection of orders as tuples of the metric/dimension to order by and the direction to order in.

    :return:
    """
    query = database.query_cls.from_(base_table)
    elements = flatten([metrics, dimensions, filters])

    # Add joins
    join_tables_needed_for_query = find_required_tables_to_join(
        elements, base_table)
    for join in find_joins_for_tables(joins, base_table,
                                      join_tables_needed_for_query):
        query = query.join(join.table, how=join.join_type).on(join.criterion)

    # Add dimensions
    for dimension in dimensions:
        terms = make_terms_for_dimension(dimension, database.trunc_date)
        query = query.select(*terms)
        # Don't group TotalsDimensions
        if not isinstance(dimension, TotalsDimension):
            query = query.groupby(*terms)

    # Add filters
    for filter_ in filters:
        query = query.where(filter_.definition) \
            if isinstance(filter_, DimensionFilter) \
            else query.having(filter_.definition)

    # Add metrics
    terms = make_terms_for_metrics(metrics)
    if terms:
        query = query.select(*terms)

    # Get the aliases for selected elements so missing ones can be included in the query if they are used for sorting
    select_aliases = {el.alias for el in query._selects}
    for (term, orientation) in orders:
        query = query.orderby(term, order=orientation)

        if term.alias not in select_aliases:
            query = query.select(term)

    return query
예제 #9
0
def make_slicer_query(
    database: Database,
    base_table: Table,
    joins: Sequence[Join] = (),
    dimensions: Sequence[Field] = (),
    metrics: Sequence[Field] = (),
    filters: Sequence[Filter] = (),
    orders: Sequence = (),
) -> Type[QueryBuilder]:
    """
    Creates a pypika/SQL query from a list of slicer elements.

    This is the base implementation shared by two implementations: the query to fetch data for a slicer request and
    the query to fetch choices for dimensions.

    This function only handles dimensions (select+group by) and filtering (where/having), which is everything needed
    for the query to fetch choices for dimensions.

    The slicer query extends this with metrics, references, and totals.

    :param database:

    :param base_table:
        pypika.Table - The base table of the query, the one in the FROM clause
    :param joins:
        A collection of joins available in the slicer. This should include all slicer joins. Only joins required for
        the query will be used.
    :param dimensions:
        A collection of dimensions to use in the query.
    :param metrics:
        A collection of metrics to use in the query.
    :param filters:
        A collection of filters to apply to the query.
    :param orders:
        A collection of orders as tuples of the metric/dimension to order by and the direction to order in.

    :return:
    """
    query = database.query_cls.from_(base_table, immutable=False)
    elements = flatten([metrics, dimensions, filters])

    # Add joins
    join_tables_needed_for_query = find_required_tables_to_join(elements, base_table)

    for join in find_joins_for_tables(joins, base_table, join_tables_needed_for_query):
        query = query.join(join.table, how=join.join_type).on(join.criterion)

    # Add dimensions
    for dimension in dimensions:
        dimension_term = make_term_for_field(dimension, database.trunc_date)
        query = query.select(dimension_term)

        # Some database platforms like MSSQL do not support grouping by static value columns.
        # Fireant uses static value columns for totals placeholders.
        # TODO this can be reverted once an issue with data blending attaching unnecessary subqueries
        # is removed as in some cases the left join can cause duplicate rows to appear when not grouping a rollup column
        ungroupable_rollup = isinstance(dimension, Rollup) and not database.can_group_static_value

        if not dimension.is_aggregate and not ungroupable_rollup:
            query = query.groupby(dimension_term)

    # Add filters
    for fltr in filters:
        query = (
            query.having(fltr.definition)
            if fltr.is_aggregate
            else query.where(fltr.definition)
        )

    # Add metrics
    metric_terms = [make_term_for_field(metric) for metric in metrics]
    if metric_terms:
        query = query.select(*metric_terms)

    # In the case that the orders are determined by a field that is not selected as a metric or dimension, then it needs
    # to be added to the query.
    select_aliases = {el.alias for el in query._selects}
    for (orderby_field, orientation) in orders:
        orderby_term = make_term_for_field(orderby_field)
        query = query.orderby(orderby_term, order=orientation)

        if orderby_term.alias not in select_aliases:
            query = query.select(orderby_term)

    return query
예제 #10
0
    def _get_and_transform_data(self,
                                tx,
                                metrics=(),
                                dimensions=(),
                                metric_filters=(),
                                dimension_filters=(),
                                references=(),
                                operations=(),
                                pagination=None):
        """
        Handles a request and applies a transformation to the result.  This is the implementation of all of the
        transformer manager methods, which are constructed in the __init__ function of this class for each transformer.

        The request is first validated with the transformer then the request is executed via the SlicerManager and then
        lastly the result is transformed and returned.

        :param tx:
            The transformer to use

        :param metrics:
            See ``fireant.slicer.SlicerManager``
            A list of metrics to include in the query.

        :param dimensions:
            See ``fireant.slicer.SlicerManager``
            A list of dimensions to include in the query.

        :param metric_filters:
            See ``fireant.slicer.SlicerManager``
            A list of metrics filters to apply to the query.

        :param dimension_filters:
            See ``fireant.slicer.SlicerManager``
            A list of dimension filters to apply to the query.

        :param references:
            See ``fireant.slicer.SlicerManager``
            A list of references to include in the query

        :param operations:
            See ``fireant.slicer.SlicerManager``
            A list of post-operations to apply to the result before transformation.

        :param pagination:
            See: ``fireant.slicer.pagination.Paginator`` object
            An object detailing the pagination to apply to the query

        :return:
            The transformed result of the request.
        """
        tx.prevalidate_request(self.manager.slicer,
                               metrics=metrics,
                               dimensions=[
                                   utils.slice_first(dimension)
                                   for dimension in dimensions
                               ],
                               metric_filters=metric_filters,
                               dimension_filters=dimension_filters,
                               references=references,
                               operations=operations)

        # Loads data and transforms it with a given transformer.
        dataframe = self.manager.data(metrics=utils.flatten(metrics),
                                      dimensions=dimensions,
                                      metric_filters=metric_filters,
                                      dimension_filters=dimension_filters,
                                      references=references,
                                      operations=operations,
                                      pagination=pagination)
        display_schema = self.manager.display_schema(metrics, dimensions,
                                                     references, operations)

        return tx.transform(dataframe, display_schema)
예제 #11
0
    def data(self,
             metrics=(),
             dimensions=(),
             metric_filters=(),
             dimension_filters=(),
             references=(),
             operations=(),
             pagination=None):
        """
        :param metrics:
            Type: list or tuple
            A set of metrics to include in the query.

        :param dimensions:
            Type: list or tuple
            A set of dimensions to split the metrics into groups.

        :param metric_filters:
            Type: list or tuple
            A set of filters to constrain the data with by metric thresholds.

        :param dimension_filters:
            Type: list or tuple
            A set of filters to constrain the data with by dimension.

        :param references:
            Type: list or tuple
            A set of comparisons to include in the query.

        :param operations:
            Type: list or tuple
            A set of operations to perform on the response.

        :param pagination:
            Type: ``fireant.slicer.pagination.Paginator`` object
            An object detailing the pagination to apply to the query

        :return:
            A transformed response that is queried based on the slicer and the format.
        """
        if operations and pagination:
            raise SlicerException(
                'Pagination cannot be used when operations are defined!')

        metrics, dimensions = map(utils.filter_duplicates,
                                  (utils.flatten(metrics), dimensions))

        query_schema = self.data_query_schema(
            metrics=metrics,
            dimensions=dimensions,
            metric_filters=metric_filters,
            dimension_filters=dimension_filters,
            references=references,
            operations=operations,
            pagination=pagination)
        operation_schema = self.operation_schema(operations)

        dataframe = self.query_data(**query_schema)
        dataframe = self.post_process(dataframe, operation_schema)

        # Filter additional metrics from the dataframe that were needed for operations
        final_columns = metrics + [
            '%s_%s' % (os['metric'], os['key']) for os in operation_schema
        ]
        if not references:
            return dataframe[final_columns]

        reference_columns = [''] + [r.key for r in references]
        return dataframe[list(
            itertools.product(reference_columns, final_columns))]