def test_create_metric_attribute_with_agg_function(self): actual = create_metric('Cost', 'sum') expected = { 'type': 'attribute', 'attribute': 'Cost', 'aggregationFunction': 'sum' } self.assertDictEqual(actual, expected)
def test_create_metric_duration_with_percentile(self): actual = create_metric('duration', None, 12) expected = { 'type': 'duration', 'aggregationFunction': { 'type': 'percentile', 'percentile': 12 } } self.assertDictEqual(actual, expected)
def test_create_metric_attribute_with_percentile(self): actual = create_metric('Cost', aggregator=None, percentile=12) expected = { 'type': 'attribute', 'attribute': 'Cost', 'aggregationFunction': { 'type': 'percentile', 'percentile': 12 } } self.assertDictEqual(actual, expected)
def test_create_metric_attribute_with_agg_function_and_percentile(self): with self.assertRaises(Exception): _ = create_metric('Cost', aggregator='sum', percentile=12)
def test_create_metric_frequency(self): actual = create_metric('frequency') expected = {'type': 'frequency'} self.assertDictEqual(actual, expected)
def test_create_metric_attribute(self): with self.assertRaises(Exception): _ = create_metric('Cost')
def test_create_metric_duration_with_agg_function(self): actual = create_metric('duration', 'sum') expected = {'type': 'duration', 'aggregationFunction': 'sum'} self.assertDictEqual(actual, expected)
def test_create_metric_duration(self): with self.assertRaises(Exception): _ = create_metric('duration')
def test_create_metric_frequency_with_percentile(self): actual = create_metric('frequency', None, 12) expected = {'type': 'frequency'} self.assertDictEqual(actual, expected)
def test_create_metric_frequency_with_agg_function(self): actual = create_metric('frequency', 'sum') expected = {'type': 'frequency'} self.assertDictEqual(actual, expected)
def aggregate(self, log_id: str, metric: str, grouping: str = None, secondary_grouping: str = None, max_amount_attributes: int = 10, trace_filter_sequence: list = [], activity_exclusion_filter: list = [], value_sorting: str = 'caseCount', sorting_order: str = 'descending', values_from: str = 'allCases', aggregation_function: Optional[str] = None, percentile: Optional[float] = None, attribute: Optional[str] = None, secondary_attribute: Optional[str] = None, activities: Optional[list] = None, secondary_activities: Optional[list] = None, date_type: Optional[str] = None, secondary_date_type: Optional[str] = None, **kwargs) -> pd.DataFrame: """ An aggregation function for the computation of KPIs and grouping by metrics allowing the creation of bar-charts, line-charts, and other visualizations. Args: log_id: A string denoting the id of the log to aggregate. metric: A string denoting the metric to use. For the value "frequency", a frequency metric is returned and for "duration" a duration metric is returned. Otherwise the value is interpreted as a numeric attribute metric. grouping: A string denoting the grouping to use. For the value "byDuration", a duration grouping is returned. For "byAttribute" a grouping by a categorical attribute (the variable attribute needs to be passed) is returned. For one of ["byYear", "byMonth", "byQuarter", "byDayOfWeek", "byDayOfYear","byHourOfDay"] a time grouping is returned (date_type also needs to be set). If the activity aggregation "byActivity" is used, the activities to aggregate over need to be passed as list. secondary_grouping: A string denoting the optional secondary grouping to use. For the value "byDuration", a duration grouping is returned. For "byAttribute" a grouping by a categorical attribute (the variable secondary_attribute needs to be passed) is returned. For one of ["byYear", "byMonth", "byQuarter", "byDayOfWeek", "byDayOfYear", "byHourOfDay"] a time grouping is returned (secpmdary_date_type also needs to be set). If the activity aggregation "byActivity" is used, the secondary_activities to aggregate over need to be passed as list. max_amount_attributes: An integer denoting the maximum amount of attributes to return. trace_filter_sequence: A list containing the sequence of filters to apply. activity_exclusion_filter: A list containing the activities to exclude. value_sorting: A string denoting the metric to sort the aggregation by. sorting_order: A string denoting the order of the sorting. values_from: A string denoting which values to consider for the aggregation. aggregation_function: An optional string denoting the aggregation function to use for numeric attribute metrics. Can be one of ["min", "max", "sum", "mean", "median", "variance", "standardDeviation"]. percentile: An optional float denoting the percentile to use if instead of the available aggregation types listed for aggregation_function a percentile aggregation should be used. attribute: An optional string denoting the attribute to use when grouping is set to 'byAttribute'. activities: An optional list denoting the activities to use when grouping is set to 'byActivity'. date_type: An optional string denoting the date type to use when a time grouping is used. It has to be 'startDate' or 'endDate'. secondary_attribute: An optional string denoting the attribute to use when secondary grouping is set to 'byAttribute'. secondary_activities: An optional list denoting the activities to use when secondary grouping is set to 'byActivity'. secondary_date_type: An optional string denoting the date type to use when a secondary time grouping is used. It has to be 'startDate' or 'endDate'. **kwargs: Keyword arguments passed to requests functions. Returns: A pandas DataFrame containing the aggregated data. """ request_data = { 'metric': create_metric(metric, aggregation_function, percentile), 'valuesFrom': { 'type': values_from }, 'miningRequest': { 'logId': log_id, 'activityExclusionFilter': activity_exclusion_filter, 'traceFilterSequence': trace_filter_sequence }, 'options': { 'maxAmountAttributes': max_amount_attributes, 'valueSorting': value_sorting, 'sortingOrder': sorting_order } } if grouping is not None: request_data['grouping'] = create_grouping(grouping, date_type, activities, attribute) if secondary_grouping is not None: request_data['secondaryGrouping'] = create_grouping( secondary_grouping, secondary_date_type, secondary_activities, secondary_attribute) aggregate_response = self.post('/api/v2/aggregate-data', json=request_data, **kwargs) if aggregate_response.status_code >= 400: return pd.DataFrame() response_df = extract_chart_values(aggregate_response.json()) if secondary_grouping is not None: response_df = normalise_chart_values(response_df, 'values') response_df = response_df.rename( columns={ 'xAxis': attribute if attribute is not None else grouping, 'yAxis': metric, 'zAxis': secondary_attribute if secondary_attribute is not None else secondary_grouping }) return response_df