def discover_dfg(log: Union[EventLog, pd.DataFrame]) -> Tuple[dict, dict, dict]: """ Discovers a DFG from a log Parameters -------------- log Event log Returns -------------- dfg DFG start_activities Start activities end_activities End activities """ if check_is_dataframe(log): check_dataframe_columns(log) from pm4py.objects.dfg.retrieval.pandas import get_dfg_graph dfg = get_dfg_graph(log) from pm4py.statistics.start_activities.pandas import get as start_activities_module from pm4py.statistics.end_activities.pandas import get as end_activities_module start_activities = start_activities_module.get_start_activities(log) end_activities = end_activities_module.get_end_activities(log) else: from pm4py.algo.discovery.dfg import algorithm as dfg_discovery dfg = dfg_discovery.apply(log) from pm4py.statistics.start_activities.log import get as start_activities_module from pm4py.statistics.end_activities.log import get as end_activities_module start_activities = start_activities_module.get_start_activities(log) end_activities = end_activities_module.get_end_activities(log) return dfg, start_activities, end_activities
def filter_trace_attribute_values(log: Union[EventLog, pd.DataFrame], attribute_key: str, values: List[str], retain: bool = True) -> Union[EventLog, pd.DataFrame]: """ Filter a log on the values of a trace attribute Parameters -------------- log Event log attribute_key Attribute to filter values Values to filter (list of) retain Boolean value (keep/discard matching traces) Returns -------------- filtered_log Filtered event log """ if check_is_dataframe(log): check_dataframe_columns(log) from pm4py.algo.filtering.pandas.attributes import attributes_filter return attributes_filter.apply(log, values, parameters={attributes_filter.Parameters.ATTRIBUTE_KEY: attribute_key, attributes_filter.Parameters.POSITIVE: retain}) else: from pm4py.algo.filtering.log.attributes import attributes_filter return attributes_filter.apply_trace_attribute(log, values, parameters={ attributes_filter.Parameters.ATTRIBUTE_KEY: attribute_key, attributes_filter.Parameters.POSITIVE: retain})
def get_variants(log: Union[EventLog, pd.DataFrame]) -> Dict[str, List[Trace]]: """ Gets the variants from the log Parameters -------------- log Event log Returns -------------- variants Dictionary of variants along with their count """ import pm4py if pm4py.util.variants_util.VARIANT_SPECIFICATION == pm4py.util.variants_util.VariantsSpecifications.STRING: import warnings warnings.warn('pm4py.get_variants is deprecated. Please use pm4py.get_variants_as_tuples instead.') if pm4py.util.variants_util.VARIANT_SPECIFICATION == pm4py.util.variants_util.VariantsSpecifications.LIST: raise Exception('Please use pm4py.get_variants_as_tuples') if check_is_dataframe(log): check_dataframe_columns(log) from pm4py.statistics.variants.pandas import get return get.get_variants_count(log) else: from pm4py.statistics.variants.log import get return get.get_variants(log)
def get_variants_as_tuples(log: Union[EventLog, pd.DataFrame]) -> Dict[Tuple[str], List[Trace]]: """ Gets the variants from the log (where the keys are tuples and not strings) Parameters -------------- log Event log Returns -------------- variants Dictionary of variants along with their count """ import pm4py # the behavior of PM4Py is changed to allow this to work pm4py.util.variants_util.VARIANT_SPECIFICATION = pm4py.util.variants_util.VariantsSpecifications.LIST if check_is_dataframe(log): check_dataframe_columns(log) from pm4py.statistics.variants.pandas import get return get.get_variants_count(log) else: from pm4py.statistics.variants.log import get return get.get_variants(log)
def filter_end_activities(log, activities, retain=True): """ Filter cases having an end activity in the provided list Parameters --------------- log Log object activities List of admitted end activities retain if True, we retain the traces containing the given activities, if false, we drop the traces Returns --------------- filtered_log Filtered log object """ if check_is_dataframe(log): check_dataframe_columns(log) from pm4py.algo.filtering.pandas.end_activities import end_activities_filter return end_activities_filter.apply( log, activities, parameters={end_activities_filter.Parameters.POSITIVE: retain}) else: from pm4py.algo.filtering.log.end_activities import end_activities_filter return end_activities_filter.apply( log, activities, parameters={end_activities_filter.Parameters.POSITIVE: retain})
def filter_start_activities( log: Union[EventLog, pd.DataFrame], activities: List[str], retain: bool = True) -> Union[EventLog, pd.DataFrame]: """ Filter cases having a start activity in the provided list Parameters -------------- log Log object activities List start activities retain if True, we retain the traces containing the given activities, if false, we drop the traces Returns -------------- filtered_log Filtered log object """ if check_is_dataframe(log): check_dataframe_columns(log) from pm4py.algo.filtering.pandas.start_activities import start_activities_filter return start_activities_filter.apply( log, activities, parameters={start_activities_filter.Parameters.POSITIVE: retain}) else: from pm4py.algo.filtering.log.start_activities import start_activities_filter return start_activities_filter.apply( log, activities, parameters={start_activities_filter.Parameters.POSITIVE: retain})
def filter_variants(log, variants, retain=True): """ Filter a log on a specified set of variants Parameters --------------- log Event log variants collection of variants to filter; A variant should be specified as a list of activity names, e.g., ['a','b','c'] retain boolean; if True all traces conforming to the specified variants are retained; if False, all those traces are removed Returns -------------- filtered_log Filtered log object """ if check_is_dataframe(log): check_dataframe_columns(log) from pm4py.algo.filtering.pandas.variants import variants_filter return variants_filter.apply( log, [",".join(v) for v in variants], parameters={variants_filter.Parameters.POSITIVE: retain}) else: from pm4py.algo.filtering.log.variants import variants_filter return variants_filter.apply( log, [",".join(v) for v in variants], parameters={variants_filter.Parameters.POSITIVE: retain})
def filter_trace_attribute(log, attribute, values, positive=True): """ Filter a log_skeleton on the values of a trace attribute Parameters -------------- log Event log_skeleton attribute Attribute to filter values Values to filter (list of) positive Boolean value (keep/discard cases) Returns -------------- filtered_log Filtered event log_skeleton """ if check_is_dataframe(log): check_dataframe_columns(log) from pm4py.algo.filtering.pandas.attributes import attributes_filter return attributes_filter.apply(log, values, parameters={attributes_filter.Parameters.ATTRIBUTE_KEY: attribute, attributes_filter.Parameters.POSITIVE: positive}) else: from pm4py.algo.filtering.log.attributes import attributes_filter return attributes_filter.apply_trace_attribute(log, values, parameters={ attributes_filter.Parameters.ATTRIBUTE_KEY: attribute, attributes_filter.Parameters.POSITIVE: positive})
def discover_handover_of_work_network(log: Union[EventLog, pd.DataFrame], beta=0): """ Calculates the handover of work network of the event log. The handover of work network is essentially the DFG of the event log, however, using the resource as a node of the graph, instead of the activity. As such, to use this, resource information should be present in the event log. Parameters --------------- log Event log or Pandas dataframe beta beta parameter for Handover metric Returns --------------- metric_values Values of the metric """ from pm4py.algo.organizational_mining.sna import algorithm as sna if check_is_dataframe(log): check_dataframe_columns(log) return sna.apply(log, variant=sna.Variants.HANDOVER_PANDAS, parameters={"beta": beta}) else: return sna.apply(log, variant=sna.Variants.HANDOVER_LOG, parameters={"beta": beta})
def filter_event_attribute_values(log: Union[EventLog, pd.DataFrame], attribute_key: str, values: List[str], level: str = "case", retain: bool = True) -> Union[EventLog, pd.DataFrame]: """ Filter a log object on the values of some event attribute Parameters -------------- log Log object attribute_key Attribute to filter values Admitted (or forbidden) values level Specifies how the filter should be applied ('case' filters the cases where at least one occurrence happens, 'event' filter the events eventually trimming the cases) retain Specified if the values should be kept or removed Returns -------------- filtered_log Filtered log object """ if check_is_dataframe(log): check_dataframe_columns(log) from pm4py.algo.filtering.pandas.attributes import attributes_filter if level == "event": return attributes_filter.apply_events(log, values, parameters={constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY: attribute_key, attributes_filter.Parameters.POSITIVE: retain}) elif level == "case": return attributes_filter.apply(log, values, parameters={ constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY: attribute_key, attributes_filter.Parameters.POSITIVE: retain}) else: from pm4py.algo.filtering.log.attributes import attributes_filter if level == "event": return attributes_filter.apply_events(log, values, parameters={constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY: attribute_key, attributes_filter.Parameters.POSITIVE: retain}) elif level == "case": return attributes_filter.apply(log, values, parameters={ constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY: attribute_key, attributes_filter.Parameters.POSITIVE: retain})
def filter_attribute_values(log, attribute, values, how="cases", positive=True): """ Filter a log_skeleton object on the values of some attribute Parameters -------------- log Log object attribute Attribute to filter values Admitted (or forbidden) values how Specifies how the filter should be applied (cases filters the cases where at least one occurrence happens, events filter the events eventually trimming the cases) positive Specified if the values should be kept or removed Returns -------------- filtered_log Filtered log_skeleton object """ if check_is_dataframe(log): check_dataframe_columns(log) from pm4py.algo.filtering.pandas.attributes import attributes_filter if how == "events": return attributes_filter.apply_events(log, values, parameters={constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY: attribute, attributes_filter.Parameters.POSITIVE: positive}) elif how == "cases": return attributes_filter.apply(log, values, parameters={ constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY: attribute, attributes_filter.Parameters.POSITIVE: positive}) else: from pm4py.algo.filtering.log.attributes import attributes_filter if how == "events": return attributes_filter.apply_events(log, values, parameters={constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY: attribute, attributes_filter.Parameters.POSITIVE: positive}) else: return attributes_filter.apply(log, values, parameters={ constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY: attribute, attributes_filter.Parameters.POSITIVE: positive})
def get_attributes(log): """ Returns the attributes at the event level of the log Parameters --------------- log Log object Returns --------------- attributes_list List of attributes contained in the log """ if check_is_dataframe(log): check_dataframe_columns(log) return list(log.columns) else: from pm4py.statistics.attributes.log import get return list(get.get_all_event_attributes_from_log(log))
def discover_activity_based_resource_similarity(log: Union[EventLog, pd.DataFrame]): """ Calculates similarity between the resources in the event log, based on their activity profiles. Parameters --------------- log Event log or Pandas dataframe Returns --------------- metric_values Values of the metric """ from pm4py.algo.organizational_mining.sna import algorithm as sna if check_is_dataframe(log): check_dataframe_columns(log) return sna.apply(log, variant=sna.Variants.JOINTACTIVITIES_PANDAS) else: return sna.apply(log, variant=sna.Variants.JOINTACTIVITIES_LOG)
def discover_eventually_follows_graph(log: Union[EventLog, pd.DataFrame]) -> Dict[Tuple[str, str], int]: """ Gets the eventually follows graph from a log object Parameters --------------- log Log object Returns --------------- eventually_follows_graph Dictionary of tuples of activities that eventually follows each other; along with the number of occurrences """ if check_is_dataframe(log): check_dataframe_columns(log) from pm4py.statistics.eventually_follows.pandas import get return get.apply(log) else: from pm4py.statistics.eventually_follows.log import get return get.apply(log)
def get_case_arrival_average(log: Union[EventLog, pd.DataFrame]) -> float: """ Gets the average difference between the start times of two consecutive cases Parameters --------------- log Log object Returns --------------- case_arrival_average Average difference between the start times of two consecutive cases """ if check_is_dataframe(log): check_dataframe_columns(log) from pm4py.statistics.traces.pandas import case_arrival return case_arrival.get_case_arrival_avg(log) else: from pm4py.statistics.traces.log import case_arrival return case_arrival.get_case_arrival_avg(log)
def discover_working_together_network(log: Union[EventLog, pd.DataFrame]): """ Calculates the working together network of the process. Two nodes resources are connected in the graph if the resources collaborate on an instance of the process. Parameters --------------- log Event log or Pandas dataframe Returns --------------- metric_values Values of the metric """ from pm4py.algo.organizational_mining.sna import algorithm as sna if check_is_dataframe(log): check_dataframe_columns(log) return sna.apply(log, variant=sna.Variants.WORKING_TOGETHER_PANDAS) else: return sna.apply(log, variant=sna.Variants.WORKING_TOGETHER_LOG)
def get_end_activities(log): """ Returns the end activities of a log Parameters --------------- log Lob object Returns --------------- end_activities Dictionary of end activities along with their count """ if check_is_dataframe(log): check_dataframe_columns(log) from pm4py.statistics.end_activities.pandas import get return get.get_end_activities(log) else: from pm4py.statistics.end_activities.log import get return get.get_end_activities(log)
def get_start_activities(log): """ Returns the start activities from a log object Parameters --------------- log Log object Returns --------------- start_activities Dictionary of start activities along with their count """ if check_is_dataframe(log): check_dataframe_columns(log) from pm4py.statistics.start_activities.pandas import get return get.get_start_activities(log) else: from pm4py.statistics.start_activities.log import get return get.get_start_activities(log)
def get_trace_attributes(log): """ Gets the attributes at the trace level of a log object Parameters ---------------- log Log object Returns --------------- trace_attributes_list List of attributes at the trace level """ from pm4py.util import constants if check_is_dataframe(log): check_dataframe_columns(log) return [x for x in list(log.columns) if x.startswith(constants.CASE_ATTRIBUTE_PREFIX)] else: from pm4py.statistics.attributes.log import get return list(get.get_all_trace_attributes_from_log(log))
def get_variants(log): """ Gets the variants from the log Parameters -------------- log Event log Returns -------------- variants Dictionary of variants along with their count """ if check_is_dataframe(log): check_dataframe_columns(log) from pm4py.statistics.variants.pandas import get return get.get_variants_count(log) else: from pm4py.statistics.variants.log import get return get.get_variants(log)
def view_events_per_time_graph(log: Union[EventLog, pd.DataFrame], format: str = "png"): """ Visualizes the events per time graph Parameters ----------------- log Log object format Format of the visualization (png, svg, ...) """ if check_is_dataframe(log): check_dataframe_columns(log) from pm4py.statistics.attributes.pandas import get as attributes_get graph = attributes_get.get_kde_date_attribute(log) else: from pm4py.statistics.attributes.log import get as attributes_get graph = attributes_get.get_kde_date_attribute(log) from pm4py.visualization.graphs import visualizer as graphs_visualizer graph_vis = graphs_visualizer.apply(graph[0], graph[1], variant=graphs_visualizer.Variants.DATES, parameters={"format": format}) graphs_visualizer.view(graph_vis)
def view_case_duration_graph(log: Union[EventLog, pd.DataFrame], format: str = "png"): """ Visualizes the case duration graph Parameters ----------------- log Log object format Format of the visualization (png, svg, ...) """ if check_is_dataframe(log): check_dataframe_columns(log) from pm4py.statistics.traces.pandas import case_statistics graph = case_statistics.get_kde_caseduration(log) else: from pm4py.statistics.traces.log import case_statistics graph = case_statistics.get_kde_caseduration(log) from pm4py.visualization.graphs import visualizer as graphs_visualizer graph_vis = graphs_visualizer.apply(graph[0], graph[1], variant=graphs_visualizer.Variants.CASES, parameters={"format": format}) graphs_visualizer.view(graph_vis)
def discover_subcontracting_network(log: Union[EventLog, pd.DataFrame], n=2): """ Calculates the subcontracting network of the process. Parameters --------------- log Event log or Pandas dataframe n n parameter for Subcontracting metric Returns --------------- metric_values Values of the metric """ from pm4py.algo.organizational_mining.sna import algorithm as sna if check_is_dataframe(log): check_dataframe_columns(log) return sna.apply(log, variant=sna.Variants.SUBCONTRACTING_PANDAS, parameters={"n": n}) else: return sna.apply(log, variant=sna.Variants.SUBCONTRACTING_LOG, parameters={"n": n})
def get_attribute_values(log, attribute): """ Returns the values for a specified attribute Parameters --------------- log Log object attribute Attribute Returns --------------- attribute_values Dictionary of values along with their count """ if check_is_dataframe(log): check_dataframe_columns(log) from pm4py.statistics.attributes.pandas import get return get.get_attribute_values(log, attribute) else: from pm4py.statistics.attributes.log import get return get.get_attribute_values(log, attribute)
def save_vis_events_per_time_graph(log: Union[EventLog, pd.DataFrame], file_path: str): """ Saves the events per time graph in the specified path Parameters ---------------- log Log object file_path Destination path """ if check_is_dataframe(log): check_dataframe_columns(log) from pm4py.statistics.attributes.pandas import get as attributes_get graph = attributes_get.get_kde_date_attribute(log) else: from pm4py.statistics.attributes.log import get as attributes_get graph = attributes_get.get_kde_date_attribute(log) format = file_path[file_path.index(".") + 1:].lower() from pm4py.visualization.graphs import visualizer as graphs_visualizer graph_vis = graphs_visualizer.apply(graph[0], graph[1], variant=graphs_visualizer.Variants.DATES, parameters={"format": format}) graphs_visualizer.save(graph_vis, file_path)
def save_vis_case_duration_graph(log: Union[EventLog, pd.DataFrame], file_path: str): """ Saves the case duration graph in the specified path Parameters ---------------- log Log object file_path Destination path """ if check_is_dataframe(log): check_dataframe_columns(log) from pm4py.statistics.traces.pandas import case_statistics graph = case_statistics.get_kde_caseduration(log) else: from pm4py.statistics.traces.log import case_statistics graph = case_statistics.get_kde_caseduration(log) format = file_path[file_path.index(".") + 1:].lower() from pm4py.visualization.graphs import visualizer as graphs_visualizer graph_vis = graphs_visualizer.apply(graph[0], graph[1], variant=graphs_visualizer.Variants.CASES, parameters={"format": format}) graphs_visualizer.save(graph_vis, file_path)
def get_trace_attribute_values(log: Union[EventLog, pd.DataFrame], attribute: str) -> Dict[str, int]: """ Returns the values for a specified trace attribute Parameters --------------- log Log object attribute Attribute Returns --------------- attribute_values Dictionary of values along with their count """ if check_is_dataframe(log): check_dataframe_columns(log) from pm4py.statistics.attributes.pandas import get return get.get_attribute_values(log, attribute) else: from pm4py.statistics.attributes.log import get return get.get_trace_attribute_values(log, attribute)
def filter_start_activities(log, admitted_start_activities): """ Filter cases having a start activity in the provided list Parameters -------------- log Log object admitted_start_activities List of admitted start activities Returns -------------- filtered_log Filtered log_skeleton object """ if check_is_dataframe(log): check_dataframe_columns(log) from pm4py.algo.filtering.pandas.start_activities import start_activities_filter return start_activities_filter.apply(log, admitted_start_activities) else: from pm4py.algo.filtering.log.start_activities import start_activities_filter return start_activities_filter.apply(log, admitted_start_activities)
def filter_variants(log, admitted_variants): """ Filter a log_skeleton on a specified set of variants Parameters --------------- log Event log_skeleton admitted_variants List of variants to filter Returns -------------- filtered_log Filtered log_skeleton object """ if check_is_dataframe(log): check_dataframe_columns(log) from pm4py.algo.filtering.pandas.variants import variants_filter return variants_filter.apply(log, admitted_variants) else: from pm4py.algo.filtering.log.variants import variants_filter return variants_filter.apply(log, admitted_variants)
def discover_organizational_roles(log: Union[EventLog, pd.DataFrame]): """ Mines the organizational roles Parameters --------------- log Event log or Pandas dataframe Returns --------------- roles Organizational roles. List where each role is a sublist with two elements: - The first element of the sublist is the list of activities belonging to a role. Each activity belongs to a single role - The second element of the sublist is a dictionary containing the resources of the role and the number of times they executed activities belonging to the role. """ from pm4py.algo.organizational_mining.roles import algorithm as roles if check_is_dataframe(log): check_dataframe_columns(log) return roles.apply(log, variant=roles.Variants.PANDAS) else: return roles.apply(log, variant=roles.Variants.LOG)