Esempio n. 1
0
    def test_create_metric_attribute_with_agg_function(self):
        actual = create_metric('Cost', 'sum')

        expected = {
            'type': 'attribute',
            'attribute': 'Cost',
            'aggregationFunction': 'sum'
        }

        self.assertDictEqual(actual, expected)
Esempio n. 2
0
    def test_create_metric_duration_with_percentile(self):
        actual = create_metric('duration', None, 12)

        expected = {
            'type': 'duration',
            'aggregationFunction': {
                'type': 'percentile',
                'percentile': 12
            }
        }

        self.assertDictEqual(actual, expected)
Esempio n. 3
0
    def test_create_metric_attribute_with_percentile(self):
        actual = create_metric('Cost', aggregator=None, percentile=12)

        expected = {
            'type': 'attribute',
            'attribute': 'Cost',
            'aggregationFunction': {
                'type': 'percentile',
                'percentile': 12
            }
        }

        self.assertDictEqual(actual, expected)
Esempio n. 4
0
 def test_create_metric_attribute_with_agg_function_and_percentile(self):
     with self.assertRaises(Exception):
         _ = create_metric('Cost', aggregator='sum', percentile=12)
Esempio n. 5
0
    def test_create_metric_frequency(self):
        actual = create_metric('frequency')

        expected = {'type': 'frequency'}

        self.assertDictEqual(actual, expected)
Esempio n. 6
0
 def test_create_metric_attribute(self):
     with self.assertRaises(Exception):
         _ = create_metric('Cost')
Esempio n. 7
0
    def test_create_metric_duration_with_agg_function(self):
        actual = create_metric('duration', 'sum')

        expected = {'type': 'duration', 'aggregationFunction': 'sum'}

        self.assertDictEqual(actual, expected)
Esempio n. 8
0
 def test_create_metric_duration(self):
     with self.assertRaises(Exception):
         _ = create_metric('duration')
Esempio n. 9
0
    def test_create_metric_frequency_with_percentile(self):
        actual = create_metric('frequency', None, 12)

        expected = {'type': 'frequency'}

        self.assertDictEqual(actual, expected)
Esempio n. 10
0
    def test_create_metric_frequency_with_agg_function(self):
        actual = create_metric('frequency', 'sum')

        expected = {'type': 'frequency'}

        self.assertDictEqual(actual, expected)
Esempio n. 11
0
    def aggregate(self,
                  log_id: str,
                  metric: str,
                  grouping: str = None,
                  secondary_grouping: str = None,
                  max_amount_attributes: int = 10,
                  trace_filter_sequence: list = [],
                  activity_exclusion_filter: list = [],
                  value_sorting: str = 'caseCount',
                  sorting_order: str = 'descending',
                  values_from: str = 'allCases',
                  aggregation_function: Optional[str] = None,
                  percentile: Optional[float] = None,
                  attribute: Optional[str] = None,
                  secondary_attribute: Optional[str] = None,
                  activities: Optional[list] = None,
                  secondary_activities: Optional[list] = None,
                  date_type: Optional[str] = None,
                  secondary_date_type: Optional[str] = None,
                  **kwargs) -> pd.DataFrame:
        """
        An aggregation function for the computation of KPIs and grouping by
        metrics allowing the creation of bar-charts, line-charts, and other
        visualizations.

        Args:
            log_id:
                A string denoting the id of the log to aggregate.
            metric:
                A string denoting the metric to use. For the value
                "frequency", a frequency metric is returned and for "duration" a
                duration metric is returned. Otherwise the value is interpreted
                as a numeric attribute metric.
            grouping:
                A string denoting the grouping to use. For the value
                "byDuration", a duration grouping is returned. For "byAttribute"
                a grouping by a categorical attribute (the variable attribute
                needs to be passed) is returned. For one of ["byYear", "byMonth",
                "byQuarter", "byDayOfWeek", "byDayOfYear","byHourOfDay"] a time
                grouping is returned (date_type also needs to be set). If the
                activity aggregation "byActivity" is used, the activities to
                aggregate over need to be passed as list.
            secondary_grouping:
                A string denoting the optional secondary grouping to use.
                For the value "byDuration", a duration grouping is returned. For
                "byAttribute" a grouping by a categorical attribute (the variable
                secondary_attribute needs to be passed) is returned. For one of
                ["byYear", "byMonth", "byQuarter", "byDayOfWeek", "byDayOfYear",
                "byHourOfDay"] a time grouping is returned (secpmdary_date_type
                also needs to be set). If the activity aggregation "byActivity"
                is used, the secondary_activities to aggregate over need to be
                passed as list.
            max_amount_attributes:
                An integer denoting the maximum amount of attributes to return.
            trace_filter_sequence:
                A list containing the sequence of filters to apply.
            activity_exclusion_filter:
                A list containing the activities to exclude.
            value_sorting:
                A string denoting the metric to sort the aggregation by.
            sorting_order:
                A string denoting the order of the sorting.
            values_from:
                A string denoting which values to consider for the aggregation.
            aggregation_function:
                An optional string denoting the aggregation function to use for
                numeric attribute metrics.
                Can be one of ["min", "max", "sum", "mean", "median", "variance",
                "standardDeviation"].
            percentile:
                An optional float denoting the percentile to use if instead of
                the available aggregation types listed for aggregation_function
                a percentile aggregation should be used.
            attribute:
                An optional string denoting the attribute to use when grouping
                is set to 'byAttribute'.
            activities:
                An optional list denoting the activities to use when grouping
                is set to 'byActivity'.
            date_type:
                An optional string denoting the date type to use when a time
                grouping is used. It has to be 'startDate' or 'endDate'.
            secondary_attribute:
                An optional string denoting the attribute to use when secondary
                grouping is set to 'byAttribute'.
            secondary_activities:
                An optional list denoting the activities to use when secondary
                grouping is set to 'byActivity'.
            secondary_date_type:
                An optional string denoting the date type to use when a secondary
                time grouping is used. It has to be 'startDate' or 'endDate'.
            **kwargs:
                Keyword arguments passed to requests functions.

        Returns:
            A pandas DataFrame containing the aggregated data.
        """
        request_data = {
            'metric': create_metric(metric, aggregation_function, percentile),
            'valuesFrom': {
                'type': values_from
            },
            'miningRequest': {
                'logId': log_id,
                'activityExclusionFilter': activity_exclusion_filter,
                'traceFilterSequence': trace_filter_sequence
            },
            'options': {
                'maxAmountAttributes': max_amount_attributes,
                'valueSorting': value_sorting,
                'sortingOrder': sorting_order
            }
        }

        if grouping is not None:
            request_data['grouping'] = create_grouping(grouping, date_type,
                                                       activities, attribute)

        if secondary_grouping is not None:
            request_data['secondaryGrouping'] = create_grouping(
                secondary_grouping, secondary_date_type, secondary_activities,
                secondary_attribute)

        aggregate_response = self.post('/api/v2/aggregate-data',
                                       json=request_data,
                                       **kwargs)

        if aggregate_response.status_code >= 400:
            return pd.DataFrame()

        response_df = extract_chart_values(aggregate_response.json())
        if secondary_grouping is not None:
            response_df = normalise_chart_values(response_df, 'values')

        response_df = response_df.rename(
            columns={
                'xAxis':
                attribute if attribute is not None else grouping,
                'yAxis':
                metric,
                'zAxis':
                secondary_attribute
                if secondary_attribute is not None else secondary_grouping
            })
        return response_df