def save_vis_events_per_time_graph(log: Union[EventLog, pd.DataFrame], file_path: str): """ Saves the events per time graph in the specified path Parameters ---------------- log Log object file_path Destination path """ if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception( "the method can be applied only to a traditional event log!") if check_is_pandas_dataframe(log): check_pandas_dataframe_columns(log) from pm4py.statistics.attributes.pandas import get as attributes_get graph = attributes_get.get_kde_date_attribute( log, parameters=get_properties(log)) else: from pm4py.statistics.attributes.log import get as attributes_get graph = attributes_get.get_kde_date_attribute( log, parameters=get_properties(log)) format = os.path.splitext(file_path)[1][1:] from pm4py.visualization.graphs import visualizer as graphs_visualizer graph_vis = graphs_visualizer.apply( graph[0], graph[1], variant=graphs_visualizer.Variants.DATES, parameters={"format": format}) graphs_visualizer.save(graph_vis, file_path)
def view_events_per_time_graph(log: Union[EventLog, pd.DataFrame], format: str = "png"): """ Visualizes the events per time graph Parameters ----------------- log Log object format Format of the visualization (png, svg, ...) """ if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception( "the method can be applied only to a traditional event log!") if check_is_pandas_dataframe(log): check_pandas_dataframe_columns(log) from pm4py.statistics.attributes.pandas import get as attributes_get graph = attributes_get.get_kde_date_attribute( log, parameters=get_properties(log)) else: from pm4py.statistics.attributes.log import get as attributes_get graph = attributes_get.get_kde_date_attribute( log, parameters=get_properties(log)) from pm4py.visualization.graphs import visualizer as graphs_visualizer graph_vis = graphs_visualizer.apply( graph[0], graph[1], variant=graphs_visualizer.Variants.DATES, parameters={"format": format}) graphs_visualizer.view(graph_vis)
def discover_organizational_roles(log: Union[EventLog, pd.DataFrame]): """ Mines the organizational roles Parameters --------------- log Event log or Pandas dataframe Returns --------------- roles Organizational roles. List where each role is a sublist with two elements: - The first element of the sublist is the list of activities belonging to a role. Each activity belongs to a single role - The second element of the sublist is a dictionary containing the resources of the role and the number of times they executed activities belonging to the role. """ if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!") from pm4py.algo.organizational_mining.roles import algorithm as roles if check_is_pandas_dataframe(log): check_pandas_dataframe_columns(log) return roles.apply(log, variant=roles.Variants.PANDAS, parameters=get_properties(log)) else: return roles.apply(log, variant=roles.Variants.LOG, parameters=get_properties(log))
def get_cycle_time(log: Union[EventLog, pd.DataFrame]) -> float: """ Calculates the cycle time of the event log. The definition that has been followed is the one proposed in: https://www.presentationeze.com/presentations/lean-manufacturing-just-in-time/lean-manufacturing-just-in-time-full-details/process-cycle-time-analysis/calculate-cycle-time/#:~:text=Cycle%20time%20%3D%20Average%20time%20between,is%2024%20minutes%20on%20average. So: Cycle time = Average time between completion of units. Example taken from the website: Consider a manufacturing facility, which is producing 100 units of product per 40 hour week. The average throughput rate is 1 unit per 0.4 hours, which is one unit every 24 minutes. Therefore the cycle time is 24 minutes on average. Parameters ----------------- log Log object Returns ----------------- cycle_time Cycle time (calculated with the aforementioned formula). """ if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!") if check_is_pandas_dataframe(log): check_pandas_dataframe_columns(log) from pm4py.statistics.traces.cycle_time.pandas import get as cycle_time return cycle_time.apply(log, parameters=get_properties(log)) else: from pm4py.statistics.traces.cycle_time.log import get as cycle_time return cycle_time.apply(log, parameters=get_properties(log))
def get_rework_cases_per_activity(log: Union[EventLog, pd.DataFrame]) -> Dict[str, int]: """ Find out for which activities of the log the rework (more than one occurrence in the trace for the activity) occurs. The output is a dictionary associating to each of the aforementioned activities the number of cases for which the rework occurred. Parameters ------------------ log Log object Returns ------------------ rework_dictionary Dictionary associating to each of the aforementioned activities the number of cases for which the rework occurred. """ if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!") if check_is_pandas_dataframe(log): check_pandas_dataframe_columns(log) from pm4py.statistics.rework.pandas import get as rework_get return rework_get.apply(log, parameters=get_properties(log)) else: from pm4py.statistics.rework.log import get as rework_get return rework_get.apply(log, parameters=get_properties(log))
def get_variants_as_tuples(log: Union[EventLog, pd.DataFrame]) -> Dict[Tuple[str], List[Trace]]: """ Gets the variants from the log (where the keys are tuples and not strings) Parameters -------------- log Event log Returns -------------- variants Dictionary of variants along with their count """ if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!") import pm4py # the behavior of PM4Py is changed to allow this to work pm4py.util.variants_util.VARIANT_SPECIFICATION = pm4py.util.variants_util.VariantsSpecifications.LIST if check_is_pandas_dataframe(log): check_pandas_dataframe_columns(log) from pm4py.statistics.variants.pandas import get return get.get_variants_count(log, parameters=get_properties(log)) else: from pm4py.statistics.variants.log import get return get.get_variants(log, parameters=get_properties(log))
def get_variants(log: Union[EventLog, pd.DataFrame]) -> Dict[str, List[Trace]]: """ Gets the variants from the log Parameters -------------- log Event log Returns -------------- variants Dictionary of variants along with their count """ if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!") import pm4py if pm4py.util.variants_util.VARIANT_SPECIFICATION == pm4py.util.variants_util.VariantsSpecifications.STRING: import warnings warnings.warn('pm4py.get_variants is deprecated. Please use pm4py.get_variants_as_tuples instead.') if pm4py.util.variants_util.VARIANT_SPECIFICATION == pm4py.util.variants_util.VariantsSpecifications.LIST: raise Exception('Please use pm4py.get_variants_as_tuples') if check_is_pandas_dataframe(log): check_pandas_dataframe_columns(log) from pm4py.statistics.variants.pandas import get return get.get_variants_count(log, parameters=get_properties(log)) else: from pm4py.statistics.variants.log import get return get.get_variants(log, parameters=get_properties(log))
def discover_performance_dfg(log: Union[EventLog, pd.DataFrame], business_hours: bool = False, worktiming: List[int] = [7, 17], weekends: List[int] = [6, 7], workcalendar=constants.DEFAULT_BUSINESS_HOURS_WORKCALENDAR) -> Tuple[dict, dict, dict]: """ Discovers a performance directly-follows graph from an event log Parameters --------------- log Event log business_hours Enables/disables the computation based on the business hours (default: False) worktiming (If the business hours are enabled) The hour range in which the resources of the log are working (default: 7 to 17) weekends (If the business hours are enabled) The weekends days (default: Saturday (6), Sunday (7)) Returns --------------- performance_dfg Performance DFG start_activities Start activities end_activities End activities """ if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!") if check_is_pandas_dataframe(log): check_pandas_dataframe_columns(log) from pm4py.util import constants properties = get_properties(log) from pm4py.algo.discovery.dfg.adapters.pandas.df_statistics import get_dfg_graph activity_key = properties[constants.PARAMETER_CONSTANT_ACTIVITY_KEY] if constants.PARAMETER_CONSTANT_ACTIVITY_KEY in properties else xes_constants.DEFAULT_NAME_KEY timestamp_key = properties[constants.PARAMETER_CONSTANT_TIMESTAMP_KEY] if constants.PARAMETER_CONSTANT_TIMESTAMP_KEY in properties else xes_constants.DEFAULT_TIMESTAMP_KEY case_id_key = properties[constants.PARAMETER_CONSTANT_CASEID_KEY] if constants.PARAMETER_CONSTANT_CASEID_KEY in properties else constants.CASE_CONCEPT_NAME dfg = get_dfg_graph(log, activity_key=activity_key, timestamp_key=timestamp_key, case_id_glue=case_id_key, measure="performance", perf_aggregation_key="all", business_hours=business_hours, worktiming=worktiming, weekends=weekends, workcalendar=workcalendar) from pm4py.statistics.start_activities.pandas import get as start_activities_module from pm4py.statistics.end_activities.pandas import get as end_activities_module start_activities = start_activities_module.get_start_activities(log, parameters=properties) end_activities = end_activities_module.get_end_activities(log, parameters=properties) else: from pm4py.algo.discovery.dfg.variants import performance as dfg_discovery properties = get_properties(log) properties[dfg_discovery.Parameters.AGGREGATION_MEASURE] = "all" properties[dfg_discovery.Parameters.BUSINESS_HOURS] = business_hours properties[dfg_discovery.Parameters.WORKTIMING] = worktiming properties[dfg_discovery.Parameters.WEEKENDS] = weekends dfg = dfg_discovery.apply(log, parameters=properties) from pm4py.statistics.start_activities.log import get as start_activities_module from pm4py.statistics.end_activities.log import get as end_activities_module start_activities = start_activities_module.get_start_activities(log, parameters=properties) end_activities = end_activities_module.get_end_activities(log, parameters=properties) return dfg, start_activities, end_activities
def conformance_diagnostics_alignments(log: EventLog, *args, multi_processing: bool = False) -> List[Dict[str, Any]]: """ Apply the alignments algorithm between a log and a process model. The methods return the full alignment diagnostics. Parameters ------------- log Event log args Specification of the process model multi_processing Boolean value that enables the multiprocessing (default: False) Returns ------------- aligned_traces A list of alignments for each trace of the log (in the same order as the traces in the event log) """ if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!") if len(args) == 3: if type(args[0]) is PetriNet: # Petri net alignments from pm4py.algo.conformance.alignments.petri_net import algorithm as alignments if multi_processing: return alignments.apply_multiprocessing(log, args[0], args[1], args[2], parameters=get_properties(log)) else: return alignments.apply(log, args[0], args[1], args[2], parameters=get_properties(log)) elif type(args[0]) is dict or type(args[0]) is Counter: # DFG alignments from pm4py.algo.conformance.alignments.dfg import algorithm as dfg_alignment return dfg_alignment.apply(log, args[0], args[1], args[2], parameters=get_properties(log)) elif len(args) == 1: if type(args[0]) is ProcessTree: # process tree alignments from pm4py.algo.conformance.alignments.process_tree.variants import search_graph_pt if multi_processing: return search_graph_pt.apply_multiprocessing(log, args[0], parameters=get_properties(log)) else: return search_graph_pt.apply(log, args[0], parameters=get_properties(log)) # try to convert to Petri net import pm4py from pm4py.algo.conformance.alignments.petri_net import algorithm as alignments net, im, fm = pm4py.convert_to_petri_net(*args) if multi_processing: return alignments.apply_multiprocessing(log, net, im, fm, parameters=get_properties(log)) else: return alignments.apply(log, net, im, fm, parameters=get_properties(log))
def filter_directly_follows_relation(log: Union[EventLog, pd.DataFrame], relations: List[str], retain: bool = True) -> \ Union[EventLog, pd.DataFrame]: """ Retain traces that contain any of the specified 'directly follows' relations. For example, if relations == [('a','b'),('a','c')] and log [<a,b,c>,<a,c,b>,<a,d,b>] the resulting log will contain traces describing [<a,b,c>,<a,c,b>]. Parameters --------------- log Log object relations List of activity name pairs, which are allowed/forbidden paths retain Parameter that says whether the paths should be kept/removed Returns ---------------- filtered_log Filtered log object """ if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception( "the method can be applied only to a traditional event log!") parameters = get_properties(log) if check_is_pandas_dataframe(log): from pm4py.algo.filtering.pandas.paths import paths_filter parameters[paths_filter.Parameters.POSITIVE] = retain return paths_filter.apply(log, relations, parameters=parameters) else: from pm4py.algo.filtering.log.paths import paths_filter parameters[paths_filter.Parameters.POSITIVE] = retain return paths_filter.apply(log, relations, parameters=parameters)
def filter_variants_percentage( log: Union[EventLog, pd.DataFrame], threshold: float = 0.8) -> Union[EventLog, pd.DataFrame]: """ Filter a log on the percentage of variants Parameters --------------- log Event log threshold Percentage (scale 0.1) of admitted variants Returns -------------- filtered_log Filtered log object """ if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception( "the method can be applied only to a traditional event log!") if check_is_pandas_dataframe(log): raise Exception( "filtering variants percentage on Pandas dataframe is currently not available! please convert the dataframe to event log with the method: log = pm4py.convert_to_event_log(df)" ) else: from pm4py.algo.filtering.log.variants import variants_filter return variants_filter.filter_log_variants_percentage( log, percentage=threshold, parameters=get_properties(log))
def discover_handover_of_work_network(log: Union[EventLog, pd.DataFrame], beta=0): """ Calculates the handover of work network of the event log. The handover of work network is essentially the DFG of the event log, however, using the resource as a node of the graph, instead of the activity. As such, to use this, resource information should be present in the event log. Parameters --------------- log Event log or Pandas dataframe beta beta parameter for Handover metric Returns --------------- metric_values Values of the metric """ if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!") from pm4py.algo.organizational_mining.sna import algorithm as sna parameters = get_properties(log) parameters["beta"] = beta if check_is_pandas_dataframe(log): check_pandas_dataframe_columns(log) return sna.apply(log, variant=sna.Variants.HANDOVER_PANDAS, parameters=parameters) else: return sna.apply(log, variant=sna.Variants.HANDOVER_LOG, parameters=parameters)
def view_dfg(dfg: dict, start_activities: dict, end_activities: dict, format: str = "png", log: Optional[EventLog] = None): """ Views a (composite) DFG Parameters ------------- dfg DFG object start_activities Start activities end_activities End activities format Format of the output picture (default: png) """ from pm4py.visualization.dfg import visualizer as dfg_visualizer dfg_parameters = dfg_visualizer.Variants.FREQUENCY.value.Parameters parameters = get_properties(log) parameters[dfg_parameters.FORMAT] = format parameters[dfg_parameters.START_ACTIVITIES] = start_activities parameters[dfg_parameters.END_ACTIVITIES] = end_activities gviz = dfg_visualizer.apply(dfg, log=log, variant=dfg_visualizer.Variants.FREQUENCY, parameters=parameters) dfg_visualizer.view(gviz)
def insert_artificial_start_end( log: Union[EventLog, pd.DataFrame]) -> Union[EventLog, pd.DataFrame]: """ Inserts the artificial start/end activities in an event log / Pandas dataframe Parameters ------------------ log Event log / Pandas dataframe Returns ------------------ log Event log / Pandas dataframe with artificial start / end activities """ properties = get_properties(log) if check_is_pandas_dataframe(log): check_pandas_dataframe_columns(log) from pm4py.objects.log.util import dataframe_utils return dataframe_utils.insert_artificial_start_end( log, parameters=properties) else: from pm4py.objects.log.util import artificial return artificial.insert_artificial_start_end(log, parameters=properties)
def get_all_case_durations(log: Union[EventLog, pd.DataFrame], business_hours: bool = False, worktiming: List[int] = [7, 17], weekends: List[int] = [6, 7]) -> List[float]: """ Gets the durations of the cases in the event log Parameters --------------- log Event log business_hours Enables/disables the computation based on the business hours (default: False) worktiming (If the business hours are enabled) The hour range in which the resources of the log are working (default: 7 to 17) weekends (If the business hours are enabled) The weekends days (default: Saturday (6), Sunday (7)) Returns --------------- durations Case durations (as list) """ if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!") properties = copy(get_properties(log)) properties["business_hours"] = business_hours properties["worktiming"] = worktiming properties["weekends"] = weekends if check_is_pandas_dataframe(log): check_pandas_dataframe_columns(log) from pm4py.statistics.traces.generic.pandas import case_statistics cd = case_statistics.get_cases_description(log, parameters=properties) return sorted([x["caseDuration"] for x in cd.values()]) else: from pm4py.statistics.traces.generic.log import case_statistics return case_statistics.get_all_case_durations(log, parameters=properties)
def filter_variants_top_k(log: Union[EventLog, pd.DataFrame], k: int) -> Union[EventLog, pd.DataFrame]: """ Keeps the top-k variants of the log Parameters ------------- log Event log k Number of variants that should be kept parameters Parameters Returns ------------- filtered_log Filtered log """ if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception( "the method can be applied only to a traditional event log!") parameters = get_properties(log) if check_is_pandas_dataframe(log): check_pandas_dataframe_columns(log) from pm4py.algo.filtering.pandas.variants import variants_filter return variants_filter.filter_variants_top_k(log, k, parameters=parameters) else: from pm4py.algo.filtering.log.variants import variants_filter return variants_filter.filter_variants_top_k(log, k, parameters=parameters)
def discover_subcontracting_network(log: Union[EventLog, pd.DataFrame], n=2): """ Calculates the subcontracting network of the process. Parameters --------------- log Event log or Pandas dataframe n n parameter for Subcontracting metric Returns --------------- metric_values Values of the metric """ if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!") from pm4py.algo.organizational_mining.sna import algorithm as sna parameters = get_properties(log) parameters["n"] = n if check_is_pandas_dataframe(log): check_pandas_dataframe_columns(log) return sna.apply(log, variant=sna.Variants.SUBCONTRACTING_PANDAS, parameters=parameters) else: return sna.apply(log, variant=sna.Variants.SUBCONTRACTING_LOG, parameters=parameters)
def filter_activities_rework( log: Union[EventLog, pd.DataFrame], activity: str, min_occurrences: int = 2) -> Union[EventLog, pd.DataFrame]: """ Filters the event log, keeping the cases where the specified activity occurs at least min_occurrences times. Parameters ----------------- log Event log / Pandas dataframe activity Activity min_occurrences Minimum desidered number of occurrences Returns ----------------- filtered_log Log with cases having at least min_occurrences occurrences of the given activity """ if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception( "the method can be applied only to a traditional event log!") parameters = get_properties(log) parameters["min_occurrences"] = min_occurrences if check_is_pandas_dataframe(log): check_pandas_dataframe_columns(log) from pm4py.algo.filtering.pandas.rework import rework_filter return rework_filter.apply(log, activity, parameters=parameters) else: from pm4py.algo.filtering.log.rework import rework_filter return rework_filter.apply(log, activity, parameters=parameters)
def get_event_attribute_values(log: Union[EventLog, pd.DataFrame], attribute: str, count_once_per_case=False) -> Dict[str, int]: """ Returns the values for a specified attribute Parameters --------------- log Log object attribute Attribute count_once_per_case If True, consider only an occurrence of the given attribute value inside a case (if there are multiple events sharing the same attribute value, count only 1 occurrence) Returns --------------- attribute_values Dictionary of values along with their count """ if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!") parameters = get_properties(log) parameters["keep_once_per_case"] = count_once_per_case if check_is_pandas_dataframe(log): check_pandas_dataframe_columns(log) from pm4py.statistics.attributes.pandas import get return get.get_attribute_values(log, attribute, parameters=parameters) else: from pm4py.statistics.attributes.log import get return get.get_attribute_values(log, attribute, parameters=parameters)
def fitness_alignments(log: EventLog, petri_net: PetriNet, initial_marking: Marking, final_marking: Marking, multi_processing: bool = False) -> \ Dict[str, float]: """ Calculates the fitness using alignments Parameters -------------- log Event log petri_net Petri net object initial_marking Initial marking final_marking Final marking multi_processing Boolean value that enables the multiprocessing (default: False) Returns --------------- fitness_dictionary dictionary describing average fitness (key: average_trace_fitness) and the percentage of fitting traces (key: percentage_of_fitting_traces) """ if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!") from pm4py.algo.evaluation.replay_fitness import algorithm as replay_fitness parameters = get_properties(log) parameters["multiprocessing"] = multi_processing return replay_fitness.apply(log, petri_net, initial_marking, final_marking, variant=replay_fitness.Variants.ALIGNMENT_BASED, parameters=parameters)
def precision_alignments(log: EventLog, petri_net: PetriNet, initial_marking: Marking, final_marking: Marking, multi_processing: bool = False) -> float: """ Calculates the precision of the model w.r.t. the event log using alignments Parameters -------------- log Event log petri_net Petri net object initial_marking Initial marking final_marking Final marking multi_processing Boolean value that enables the multiprocessing (default: False) Returns -------------- precision float representing the precision value """ if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!") from pm4py.algo.evaluation.precision import algorithm as precision_evaluator parameters = get_properties(log) parameters["multiprocessing"] = multi_processing return precision_evaluator.apply(log, petri_net, initial_marking, final_marking, variant=precision_evaluator.Variants.ALIGN_ETCONFORMANCE, parameters=parameters)
def discover_heuristics_net(log: Union[EventLog, pd.DataFrame], dependency_threshold: float = 0.5, and_threshold: float = 0.65, loop_two_threshold: float = 0.5) -> HeuristicsNet: """ Discovers an heuristics net Parameters --------------- log Event log dependency_threshold Dependency threshold (default: 0.5) and_threshold AND threshold (default: 0.65) loop_two_threshold Loop two threshold (default: 0.5) Returns -------------- heu_net Heuristics net """ if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!") from pm4py.algo.discovery.heuristics import algorithm as heuristics_miner heu_parameters = heuristics_miner.Variants.CLASSIC.value.Parameters parameters = get_properties(log) parameters[heu_parameters.DEPENDENCY_THRESH] = dependency_threshold parameters[heu_parameters.AND_MEASURE_THRESH] = and_threshold parameters[heu_parameters.LOOP_LENGTH_TWO_THRESH] = loop_two_threshold return heuristics_miner.apply_heu(log, variant=heuristics_miner.Variants.CLASSIC, parameters=parameters)
def save_vis_performance_spectrum(log: Union[EventLog, pd.DataFrame], activities: List[str], file_path: str): """ Saves the visualization of the performance spectrum to a file Parameters --------------- log Event log activities List of activities (in order) that is used to build the performance spectrum file_path Destination path (including the extension) """ if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception( "the method can be applied only to a traditional event log!") from pm4py.algo.discovery.performance_spectrum import algorithm as performance_spectrum perf_spectrum = performance_spectrum.apply(log, activities, parameters=get_properties(log)) from pm4py.visualization.performance_spectrum import visualizer as perf_spectrum_visualizer from pm4py.visualization.performance_spectrum.variants import neato format = os.path.splitext(file_path)[1][1:] gviz = perf_spectrum_visualizer.apply( perf_spectrum, parameters={neato.Parameters.FORMAT.value: format}) perf_spectrum_visualizer.save(gviz, file_path)
def view_performance_spectrum(log: Union[EventLog, pd.DataFrame], activities: List[str], format: str = "png"): """ Displays the performance spectrum Parameters ---------------- perf_spectrum Performance spectrum format Format of the visualization (png, svg ...) """ if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception( "the method can be applied only to a traditional event log!") from pm4py.algo.discovery.performance_spectrum import algorithm as performance_spectrum perf_spectrum = performance_spectrum.apply(log, activities, parameters=get_properties(log)) from pm4py.visualization.performance_spectrum import visualizer as perf_spectrum_visualizer from pm4py.visualization.performance_spectrum.variants import neato gviz = perf_spectrum_visualizer.apply( perf_spectrum, parameters={neato.Parameters.FORMAT.value: format}) perf_spectrum_visualizer.view(gviz)
def __builds_events_distribution_graph(log: Union[EventLog, pd.DataFrame], distr_type: str = "days_week"): """ Internal method to build the events distribution graph """ if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception( "the method can be applied only to a traditional event log!") if distr_type == "days_month": title = "Distribution of the Events over the Days of a Month" x_axis = "Day of month" y_axis = "Number of Events" elif distr_type == "months": title = "Distribution of the Events over the Months" x_axis = "Month" y_axis = "Number of Events" elif distr_type == "years": title = "Distribution of the Events over the Years" x_axis = "Year" y_axis = "Number of Events" elif distr_type == "hours": title = "Distribution of the Events over the Hours" x_axis = "Hour (of day)" y_axis = "Number of Events" elif distr_type == "days_week": title = "Distribution of the Events over the Days of a Week" x_axis = "Day of the Week" y_axis = "Number of Events" elif distr_type == "weeks": title = "Distribution of the Events over the Weeks of a Year" x_axis = "Week of the Year" y_axis = "Number of Events" else: raise Exception("unsupported distribution specified.") if check_is_pandas_dataframe(log): check_pandas_dataframe_columns(log) from pm4py.statistics.attributes.pandas import get as attributes_get x, y = attributes_get.get_events_distribution( log, distr_type=distr_type, parameters=get_properties(log)) else: from pm4py.statistics.attributes.log import get as attributes_get x, y = attributes_get.get_events_distribution( log, distr_type=distr_type, parameters=get_properties(log)) return title, x_axis, y_axis, x, y
def discover_dfg(log: Union[EventLog, pd.DataFrame]) -> Tuple[dict, dict, dict]: """ Discovers a DFG from a log Parameters -------------- log Event log Returns -------------- dfg DFG start_activities Start activities end_activities End activities """ if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!") if check_is_pandas_dataframe(log): check_pandas_dataframe_columns(log) from pm4py.util import constants properties = get_properties(log) from pm4py.algo.discovery.dfg.adapters.pandas.df_statistics import get_dfg_graph activity_key = properties[constants.PARAMETER_CONSTANT_ACTIVITY_KEY] if constants.PARAMETER_CONSTANT_ACTIVITY_KEY in properties else xes_constants.DEFAULT_NAME_KEY timestamp_key = properties[constants.PARAMETER_CONSTANT_TIMESTAMP_KEY] if constants.PARAMETER_CONSTANT_TIMESTAMP_KEY in properties else xes_constants.DEFAULT_TIMESTAMP_KEY case_id_key = properties[constants.PARAMETER_CONSTANT_CASEID_KEY] if constants.PARAMETER_CONSTANT_CASEID_KEY in properties else constants.CASE_CONCEPT_NAME dfg = get_dfg_graph(log, activity_key=activity_key, timestamp_key=timestamp_key, case_id_glue=case_id_key) from pm4py.statistics.start_activities.pandas import get as start_activities_module from pm4py.statistics.end_activities.pandas import get as end_activities_module start_activities = start_activities_module.get_start_activities(log, parameters=properties) end_activities = end_activities_module.get_end_activities(log, parameters=properties) else: from pm4py.algo.discovery.dfg import algorithm as dfg_discovery dfg = dfg_discovery.apply(log, parameters=get_properties(log)) from pm4py.statistics.start_activities.log import get as start_activities_module from pm4py.statistics.end_activities.log import get as end_activities_module start_activities = start_activities_module.get_start_activities(log, parameters=get_properties(log)) end_activities = end_activities_module.get_end_activities(log, parameters=get_properties(log)) return dfg, start_activities, end_activities
def filter_event_attribute_values( log: Union[EventLog, pd.DataFrame], attribute_key: str, values: Union[Set[str], List[str]], level: str = "case", retain: bool = True) -> Union[EventLog, pd.DataFrame]: """ Filter a log object on the values of some event attribute Parameters -------------- log Log object attribute_key Attribute to filter values Admitted (or forbidden) values level Specifies how the filter should be applied ('case' filters the cases where at least one occurrence happens, 'event' filter the events eventually trimming the cases) retain Specified if the values should be kept or removed Returns -------------- filtered_log Filtered log object """ if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception( "the method can be applied only to a traditional event log!") parameters = get_properties(log) parameters[constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY] = attribute_key if check_is_pandas_dataframe(log): check_pandas_dataframe_columns(log) from pm4py.algo.filtering.pandas.attributes import attributes_filter if level == "event": parameters[attributes_filter.Parameters.POSITIVE] = retain return attributes_filter.apply_events(log, values, parameters=parameters) elif level == "case": parameters[attributes_filter.Parameters.POSITIVE] = retain return attributes_filter.apply(log, values, parameters=parameters) else: from pm4py.algo.filtering.log.attributes import attributes_filter if level == "event": parameters[attributes_filter.Parameters.POSITIVE] = retain return attributes_filter.apply_events(log, values, parameters=parameters) elif level == "case": parameters[attributes_filter.Parameters.POSITIVE] = retain return attributes_filter.apply(log, values, parameters=parameters)
def filter_time_range(log: Union[EventLog, pd.DataFrame], dt1: str, dt2: str, mode="events") -> Union[EventLog, pd.DataFrame]: """ Filter a log on a time interval Parameters ---------------- log Log object dt1 Left extreme of the interval dt2 Right extreme of the interval mode Modality of filtering (events, traces_contained, traces_intersecting) events: any event that fits the time frame is retained traces_contained: any trace completely contained in the timeframe is retained traces_intersecting: any trace intersecting with the time-frame is retained. Returns ---------------- filtered_log Filtered log """ if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception( "the method can be applied only to a traditional event log!") if check_is_pandas_dataframe(log): from pm4py.algo.filtering.pandas.timestamp import timestamp_filter if mode == "events": return timestamp_filter.apply_events( log, dt1, dt2, parameters=get_properties(log)) elif mode == "traces_contained": return timestamp_filter.filter_traces_contained( log, dt1, dt2, parameters=get_properties(log)) elif mode == "traces_intersecting": return timestamp_filter.filter_traces_intersecting( log, dt1, dt2, parameters=get_properties(log)) else: warnings.warn('mode provided: ' + mode + ' is not recognized; original log returned!') return log else: from pm4py.algo.filtering.log.timestamp import timestamp_filter if mode == "events": return timestamp_filter.apply_events( log, dt1, dt2, parameters=get_properties(log)) elif mode == "traces_contained": return timestamp_filter.filter_traces_contained( log, dt1, dt2, parameters=get_properties(log)) elif mode == "traces_intersecting": return timestamp_filter.filter_traces_intersecting( log, dt1, dt2, parameters=get_properties(log)) else: warnings.warn('mode provided: ' + mode + ' is not recognized; original log returned!') return log
def get_end_activities(log: Union[EventLog, pd.DataFrame]) -> Dict[str, int]: """ Returns the end activities of a log Parameters --------------- log Lob object Returns --------------- end_activities Dictionary of end activities along with their count """ if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!") if check_is_pandas_dataframe(log): check_pandas_dataframe_columns(log) from pm4py.statistics.end_activities.pandas import get return get.get_end_activities(log, parameters=get_properties(log)) else: from pm4py.statistics.end_activities.log import get return get.get_end_activities(log, parameters=get_properties(log))
def discover_eventually_follows_graph(log: Union[EventLog, pd.DataFrame]) -> Dict[Tuple[str, str], int]: """ Gets the eventually follows graph from a log object Parameters --------------- log Log object Returns --------------- eventually_follows_graph Dictionary of tuples of activities that eventually follows each other; along with the number of occurrences """ if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!") if check_is_pandas_dataframe(log): check_pandas_dataframe_columns(log) from pm4py.statistics.eventually_follows.pandas import get return get.apply(log, parameters=get_properties(log)) else: from pm4py.statistics.eventually_follows.log import get return get.apply(log, parameters=get_properties(log))