Ejemplo n.º 1
0
 def test_create_grouping_invalid_grouping(self):
     with self.assertRaises(Exception):
         _ = create_grouping('invalid_grouping')
Ejemplo n.º 2
0
 def test_create_grouping_by_attribute_no_attribute(self):
     with self.assertRaises(Exception):
         _ = create_grouping('byAttribute')
Ejemplo n.º 3
0
 def test_create_grouping_by_time_no_date_type(self):
     with self.assertRaises(Exception):
         _ = create_grouping('byMonth')
Ejemplo n.º 4
0
    def test_create_grouping_by_attribute(self):
        actual = create_grouping('byAttribute', attribute='Country')

        expected = {'type': 'byAttribute', 'attribute': 'Country'}

        self.assertDictEqual(actual, expected)
Ejemplo n.º 5
0
    def aggregate(self,
                  log_id: str,
                  metric: str,
                  grouping: str = None,
                  secondary_grouping: str = None,
                  max_amount_attributes: int = 10,
                  trace_filter_sequence: list = [],
                  activity_exclusion_filter: list = [],
                  value_sorting: str = 'caseCount',
                  sorting_order: str = 'descending',
                  values_from: str = 'allCases',
                  aggregation_function: Optional[str] = None,
                  percentile: Optional[float] = None,
                  attribute: Optional[str] = None,
                  secondary_attribute: Optional[str] = None,
                  activities: Optional[list] = None,
                  secondary_activities: Optional[list] = None,
                  date_type: Optional[str] = None,
                  secondary_date_type: Optional[str] = None,
                  **kwargs) -> pd.DataFrame:
        """
        An aggregation function for the computation of KPIs and grouping by
        metrics allowing the creation of bar-charts, line-charts, and other
        visualizations.

        Args:
            log_id:
                A string denoting the id of the log to aggregate.
            metric:
                A string denoting the metric to use. For the value
                "frequency", a frequency metric is returned and for "duration" a
                duration metric is returned. Otherwise the value is interpreted
                as a numeric attribute metric.
            grouping:
                A string denoting the grouping to use. For the value
                "byDuration", a duration grouping is returned. For "byAttribute"
                a grouping by a categorical attribute (the variable attribute
                needs to be passed) is returned. For one of ["byYear", "byMonth",
                "byQuarter", "byDayOfWeek", "byDayOfYear","byHourOfDay"] a time
                grouping is returned (date_type also needs to be set). If the
                activity aggregation "byActivity" is used, the activities to
                aggregate over need to be passed as list.
            secondary_grouping:
                A string denoting the optional secondary grouping to use.
                For the value "byDuration", a duration grouping is returned. For
                "byAttribute" a grouping by a categorical attribute (the variable
                secondary_attribute needs to be passed) is returned. For one of
                ["byYear", "byMonth", "byQuarter", "byDayOfWeek", "byDayOfYear",
                "byHourOfDay"] a time grouping is returned (secpmdary_date_type
                also needs to be set). If the activity aggregation "byActivity"
                is used, the secondary_activities to aggregate over need to be
                passed as list.
            max_amount_attributes:
                An integer denoting the maximum amount of attributes to return.
            trace_filter_sequence:
                A list containing the sequence of filters to apply.
            activity_exclusion_filter:
                A list containing the activities to exclude.
            value_sorting:
                A string denoting the metric to sort the aggregation by.
            sorting_order:
                A string denoting the order of the sorting.
            values_from:
                A string denoting which values to consider for the aggregation.
            aggregation_function:
                An optional string denoting the aggregation function to use for
                numeric attribute metrics.
                Can be one of ["min", "max", "sum", "mean", "median", "variance",
                "standardDeviation"].
            percentile:
                An optional float denoting the percentile to use if instead of
                the available aggregation types listed for aggregation_function
                a percentile aggregation should be used.
            attribute:
                An optional string denoting the attribute to use when grouping
                is set to 'byAttribute'.
            activities:
                An optional list denoting the activities to use when grouping
                is set to 'byActivity'.
            date_type:
                An optional string denoting the date type to use when a time
                grouping is used. It has to be 'startDate' or 'endDate'.
            secondary_attribute:
                An optional string denoting the attribute to use when secondary
                grouping is set to 'byAttribute'.
            secondary_activities:
                An optional list denoting the activities to use when secondary
                grouping is set to 'byActivity'.
            secondary_date_type:
                An optional string denoting the date type to use when a secondary
                time grouping is used. It has to be 'startDate' or 'endDate'.
            **kwargs:
                Keyword arguments passed to requests functions.

        Returns:
            A pandas DataFrame containing the aggregated data.
        """
        request_data = {
            'metric': create_metric(metric, aggregation_function, percentile),
            'valuesFrom': {
                'type': values_from
            },
            'miningRequest': {
                'logId': log_id,
                'activityExclusionFilter': activity_exclusion_filter,
                'traceFilterSequence': trace_filter_sequence
            },
            'options': {
                'maxAmountAttributes': max_amount_attributes,
                'valueSorting': value_sorting,
                'sortingOrder': sorting_order
            }
        }

        if grouping is not None:
            request_data['grouping'] = create_grouping(grouping, date_type,
                                                       activities, attribute)

        if secondary_grouping is not None:
            request_data['secondaryGrouping'] = create_grouping(
                secondary_grouping, secondary_date_type, secondary_activities,
                secondary_attribute)

        aggregate_response = self.post('/api/v2/aggregate-data',
                                       json=request_data,
                                       **kwargs)

        if aggregate_response.status_code >= 400:
            return pd.DataFrame()

        response_df = extract_chart_values(aggregate_response.json())
        if secondary_grouping is not None:
            response_df = normalise_chart_values(response_df, 'values')

        response_df = response_df.rename(
            columns={
                'xAxis':
                attribute if attribute is not None else grouping,
                'yAxis':
                metric,
                'zAxis':
                secondary_attribute
                if secondary_attribute is not None else secondary_grouping
            })
        return response_df