def test_prefiltering_dataframe(self): # to avoid static method warnings in tests, # that by construction of the unittest package have to be expressed in such way self.dummy_variable = "dummy_value" input_log = os.path.join(INPUT_DATA_DIR, "running-example.csv") dataframe = csv_import_adapter.import_dataframe_from_path_wo_timeconversion(input_log, sep=',') dataframe = attributes_filter.filter_df_keeping_spno_activities(dataframe, activity_key="concept:name") dataframe = case_filter.filter_on_ncases(dataframe, case_id_glue="case:concept:name") dataframe = csv_import_adapter.convert_timestamp_columns_in_df(dataframe) dataframe = dataframe.sort_values('time:timestamp') event_log = log_conv_fact.apply(dataframe, variant=log_conv_fact.TO_EVENT_STREAM) log = log_conv_fact.apply(event_log) del log
def execute_script(): aa = time.time() dataframe = csv_import_adapter.import_dataframe_from_path_wo_timeconversion( inputLog, sep=',') dataframe = csv_import_adapter.convert_caseid_column_to_str( dataframe, case_id_glue=CASEID_GLUE) dataframe = csv_import_adapter.convert_timestamp_columns_in_df( dataframe, timest_format=TIMEST_FORMAT, timest_columns=TIMEST_COLUMNS) dataframe = dataframe.sort_values([CASEID_GLUE, TIMEST_KEY]) dataframe_fa = attributes_filter.filter_df_keeping_spno_activities( dataframe, activity_key=ACTIVITY_KEY, max_no_activities=MAX_NO_ACTIVITIES) bb = time.time() print("importing log time=", (bb - aa)) parameters_cde = { constants.PARAMETER_CONSTANT_CASEID_KEY: CASEID_GLUE, constants.PARAMETER_CONSTANT_TIMESTAMP_KEY: TIMEST_KEY, "sort_by_column": "caseDuration", "sort_ascending": False, "max_ret_cases": 1000 } cases_desc = case_statistics.get_cases_description( dataframe, parameters=parameters_cde) print(cases_desc) bb2 = time.time() print("calculating and printing cases_desc = ", (bb2 - bb)) calculate_process_schema_from_df(dataframe_fa, "NOFILTERS_FREQUENCY.svg", "NOFILTERS_PERFORMANCE.svg") GENERATED_IMAGES.append("NOFILTERS_FREQUENCY.svg") GENERATED_IMAGES.append("NOFILTERS_PERFORMANCE.svg") if DELETE_VARIABLES: del dataframe_fa cc = time.time() print( "saving initial Inductive Miner process schema along with frequency metrics=", (cc - bb2)) dataframe_cp = case_filter.filter_on_case_performance( dataframe, case_id_glue=CASEID_GLUE, timestamp_key=TIMEST_KEY, min_case_performance=100000, max_case_performance=10000000) dataframe_cp_fa = attributes_filter.filter_df_keeping_spno_activities( dataframe_cp, activity_key=ACTIVITY_KEY, max_no_activities=MAX_NO_ACTIVITIES) dataframe_cp = None if DELETE_VARIABLES: del dataframe_cp calculate_process_schema_from_df(dataframe_cp_fa, "FILTER_CP_FREQUENCY.svg", "FILTER_CP_PERFORMANCE.svg") GENERATED_IMAGES.append("FILTER_CP_FREQUENCY.svg") GENERATED_IMAGES.append("FILTER_CP_PERFORMANCE.svg") if DELETE_VARIABLES: del dataframe_cp_fa dd = time.time() print("filtering on case performance and generating process schema=", (dd - cc)) if ENABLE_ATTRIBUTE_FILTER: parameters_att = { constants.PARAMETER_CONSTANT_CASEID_KEY: CASEID_GLUE, constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY: ATTRIBUTE_TO_FILTER, constants.PARAMETER_CONSTANT_ACTIVITY_KEY: ATTRIBUTE_TO_FILTER, "positive": True } dataframe_att = attributes_filter.apply(dataframe, ATTRIBUTE_VALUES_TO_FILTER, parameters=parameters_att) # dataframe_att = attributes_filter.apply_auto_filter(dataframe, parameters=parameters_att) print( "all the activities in the log", attributes_filter.get_attribute_values(dataframe_att, ACTIVITY_KEY)) dataframe_att_fa = attributes_filter.filter_df_keeping_spno_activities( dataframe_att, activity_key=ACTIVITY_KEY, max_no_activities=MAX_NO_ACTIVITIES) if DELETE_VARIABLES: del dataframe_att calculate_process_schema_from_df(dataframe_att_fa, "FILTER_ATT_FREQUENCY.svg", "FILTER_ATT_PERFORMANCE.svg") GENERATED_IMAGES.append("FILTER_ATT_FREQUENCY.svg") GENERATED_IMAGES.append("FILTER_ATT_PERFORMANCE.svg") if DELETE_VARIABLES: del dataframe_att_fa ee = time.time() print("filtering on attribute values and generating process schema=", (ee - dd)) ee = time.time() parameters_sa = { constants.PARAMETER_CONSTANT_CASEID_KEY: CASEID_GLUE, constants.PARAMETER_CONSTANT_ACTIVITY_KEY: ACTIVITY_KEY } parameters_ea = { constants.PARAMETER_CONSTANT_CASEID_KEY: CASEID_GLUE, constants.PARAMETER_CONSTANT_ACTIVITY_KEY: ACTIVITY_KEY } start_act = start_activities_filter.get_start_activities( dataframe, parameters=parameters_sa) print("start activities in the log = ", start_act) end_act = end_activities_filter.get_end_activities( dataframe, parameters=parameters_ea) print("end activities in the log = ", end_act) ff = time.time() print("finding start and end activities along with their count", (ff - ee)) if ENABLE_STARTACT_FILTER: dataframe_sa = start_activities_filter.apply(dataframe, STARTACT_TO_FILTER, parameters=parameters_sa) # dataframe_sa = start_activities_filter.apply_auto_filter(dataframe, parameters=parameters_sa) start_act = start_activities_filter.get_start_activities( dataframe_sa, parameters=parameters_sa) print("start activities in the filtered log = ", start_act) dataframe_sa_fa = attributes_filter.filter_df_keeping_spno_activities( dataframe_sa, activity_key=ACTIVITY_KEY, max_no_activities=MAX_NO_ACTIVITIES) if DELETE_VARIABLES: del dataframe_sa calculate_process_schema_from_df(dataframe_sa_fa, "FILTER_SA_FREQUENCY.svg", "FILTER_SA_PERFORMANCE.svg") GENERATED_IMAGES.append("FILTER_SA_FREQUENCY.svg") GENERATED_IMAGES.append("FILTER_SA_PERFORMANCE.svg") if DELETE_VARIABLES: del dataframe_sa_fa gg = time.time() if ENABLE_STARTACT_FILTER: print("filtering start activities time=", (gg - ff)) if ENABLE_ENDACT_FILTER: dataframe_ea = end_activities_filter.apply(dataframe, ENDACT_TO_FILTER, parameters=parameters_ea) # dataframe_ea = end_activities_filter.apply_auto_filter(dataframe, parameters=parameters_ea) end_act = end_activities_filter.get_end_activities( dataframe_ea, parameters=parameters_ea) print("end activities in the filtered log = ", end_act) dataframe_ea_fa = attributes_filter.filter_df_keeping_spno_activities( dataframe_ea, activity_key=ACTIVITY_KEY, max_no_activities=MAX_NO_ACTIVITIES) if DELETE_VARIABLES: del dataframe_ea calculate_process_schema_from_df(dataframe_ea_fa, "FILTER_EA_FREQUENCY.svg", "FILTER_EA_PERFORMANCE.svg") GENERATED_IMAGES.append("FILTER_EA_FREQUENCY.svg") GENERATED_IMAGES.append("FILTER_EA_PERFORMANCE.svg") if DELETE_VARIABLES: del dataframe_ea_fa hh = time.time() if ENABLE_ENDACT_FILTER: print("filtering end activities time=", (hh - gg)) if REMOVE_GENERATED_IMAGES: for image in GENERATED_IMAGES: os.remove(image)
def apply(dataframe, parameters=None): """ Gets the performance DFG Parameters ------------ dataframe Dataframe parameters Parameters of the algorithm Returns ------------ base64 Base64 of an SVG representing the model model Text representation of the model format Format of the model """ if parameters is None: parameters = {} decreasingFactor = parameters[ "decreasingFactor"] if "decreasingFactor" in parameters else constants.DEFAULT_DEC_FACTOR activity_key = parameters[ pm4_constants. PARAMETER_CONSTANT_ACTIVITY_KEY] if pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY timestamp_key = parameters[ pm4_constants. PARAMETER_CONSTANT_TIMESTAMP_KEY] if pm4_constants.PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else xes.DEFAULT_TIMESTAMP_KEY case_id_glue = parameters[ pm4_constants. PARAMETER_CONSTANT_CASEID_KEY] if pm4_constants.PARAMETER_CONSTANT_CASEID_KEY in parameters else CASE_CONCEPT_NAME parameters[pm4_constants.RETURN_EA_COUNT_DICT_AUTOFILTER] = True dataframe = attributes_filter.filter_df_keeping_spno_activities( dataframe, activity_key=activity_key, max_no_activities=constants.MAX_NO_ACTIVITIES) dataframe, end_activities = auto_filter.apply_auto_filter( dataframe, parameters=parameters) end_activities = list(end_activities.keys()) [dfg, dfg_perf ] = df_statistics.get_dfg_graph(dataframe, activity_key=activity_key, timestamp_key=timestamp_key, case_id_glue=case_id_glue, sort_caseid_required=False, sort_timestamp_along_case_id=False, measure="both") activities_count = attributes_filter.get_attribute_values( dataframe, activity_key, parameters=parameters) activities = list(activities_count.keys()) dfg = clean_dfg_based_on_noise_thresh( dfg, activities, decreasingFactor * constants.DEFAULT_DFG_CLEAN_MULTIPLIER, parameters=parameters) dfg_perf = {x: y for x, y in dfg_perf.items() if x in dfg} start_activities = list( start_activities_filter.get_start_activities( dataframe, parameters=parameters).keys()) gviz = dfg_vis_factory.apply(dfg_perf, activities_count=activities_count, variant="performance", parameters={ "format": "svg", "start_activities": start_activities, "end_activities": end_activities }) gviz_base64 = base64.b64encode(str(gviz).encode('utf-8')) ret_graph = get_graph.get_graph_from_dfg(dfg, start_activities, end_activities) net, im, fm = dfg_conv_factory.apply(dfg, parameters={ "start_activities": start_activities, "end_activities": end_activities }) return get_base64_from_gviz(gviz), export_petri_as_string( net, im, fm ), ".pnml", "parquet", activities, start_activities, end_activities, gviz_base64, ret_graph, "dfg", "perf", None, "", activity_key
def apply(dataframe, parameters=None): """ Gets the Petri net through Inductive Miner, decorated by performance metric Parameters ------------ dataframe Dataframe parameters Parameters of the algorithm Returns ------------ base64 Base64 of an SVG representing the model model Text representation of the model format Format of the model """ if parameters is None: parameters = {} decreasingFactor = parameters[ "decreasingFactor"] if "decreasingFactor" in parameters else constants.DEFAULT_DEC_FACTOR activity_key = parameters[ pm4_constants. PARAMETER_CONSTANT_ACTIVITY_KEY] if pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY timestamp_key = parameters[ pm4_constants. PARAMETER_CONSTANT_TIMESTAMP_KEY] if pm4_constants.PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else xes.DEFAULT_TIMESTAMP_KEY case_id_glue = parameters[ pm4_constants. PARAMETER_CONSTANT_CASEID_KEY] if pm4_constants.PARAMETER_CONSTANT_CASEID_KEY in parameters else CASE_CONCEPT_NAME parameters[pm4_constants.RETURN_EA_COUNT_DICT_AUTOFILTER] = True dataframe = attributes_filter.filter_df_keeping_spno_activities( dataframe, activity_key=activity_key, max_no_activities=constants.MAX_NO_ACTIVITIES) dataframe, end_activities = auto_filter.apply_auto_filter( dataframe, parameters=parameters) end_activities = list(end_activities.keys()) activities_count = attributes_filter.get_attribute_values( dataframe, activity_key, parameters=parameters) activities = list(activities_count.keys()) start_activities = list( start_activities_filter.get_start_activities( dataframe, parameters=parameters).keys()) [dfg, dfg_perf ] = df_statistics.get_dfg_graph(dataframe, activity_key=activity_key, timestamp_key=timestamp_key, case_id_glue=case_id_glue, sort_caseid_required=False, sort_timestamp_along_case_id=False, measure="both") dfg = clean_dfg_based_on_noise_thresh( dfg, activities, decreasingFactor * constants.DEFAULT_DFG_CLEAN_MULTIPLIER, parameters=parameters) dfg_perf = {x: y for x, y in dfg_perf.items() if x in dfg} net, im, fm = inductive_miner.apply_dfg(dfg, parameters, activities=activities, start_activities=start_activities, end_activities=end_activities) spaths = get_shortest_paths(net) bpmn_graph, el_corr, inv_el_corr, el_corr_keys_map = petri_to_bpmn.apply( net, im, fm) aggregated_statistics = get_decorations_from_dfg_spaths_acticount( net, dfg_perf, spaths, activities_count, variant="performance") bpmn_aggreg_statistics = convert_performance_map.convert_performance_map_to_bpmn( aggregated_statistics, inv_el_corr) #bpmn_graph = bpmn_embedding.embed_info_into_bpmn(bpmn_graph, bpmn_aggreg_statistics, "performance") bpmn_graph = bpmn_diagram_layouter.apply(bpmn_graph) bpmn_string = bpmn_exporter.get_string_from_bpmn(bpmn_graph) gviz = bpmn_vis_factory.apply_petri( net, im, fm, aggregated_statistics=aggregated_statistics, variant="performance", parameters={"format": "svg"}) gviz2 = bpmn_vis_factory.apply_petri( net, im, fm, aggregated_statistics=aggregated_statistics, variant="performance", parameters={"format": "dot"}) gviz_base64 = get_base64_from_file(gviz2.name) ret_graph = get_graph.get_graph_from_petri(net, im, fm) return get_base64_from_file(gviz.name), export_petri_as_string( net, im, fm ), ".pnml", "parquet", activities, start_activities, end_activities, gviz_base64, ret_graph, "indbpmn", "perf", bpmn_string, ".bpmn", activity_key
def apply(dataframe, parameters=None): """ Gets the Petri net through Inductive Miner, decorated by frequency metric Parameters ------------ dataframe Dataframe parameters Parameters of the algorithm Returns ------------ base64 Base64 of an SVG representing the model model Text representation of the model format Format of the model """ if parameters is None: parameters = {} decreasingFactor = parameters[ "decreasingFactor"] if "decreasingFactor" in parameters else constants.DEFAULT_DEC_FACTOR activity_key = parameters[ pm4_constants. PARAMETER_CONSTANT_ACTIVITY_KEY] if pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY timestamp_key = parameters[ pm4_constants. PARAMETER_CONSTANT_TIMESTAMP_KEY] if pm4_constants.PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else xes.DEFAULT_TIMESTAMP_KEY case_id_glue = parameters[ pm4_constants. PARAMETER_CONSTANT_CASEID_KEY] if pm4_constants.PARAMETER_CONSTANT_CASEID_KEY in parameters else CASE_CONCEPT_NAME parameters[pm4_constants.RETURN_EA_COUNT_DICT_AUTOFILTER] = True dataframe = attributes_filter.filter_df_keeping_spno_activities( dataframe, activity_key=activity_key, max_no_activities=constants.MAX_NO_ACTIVITIES) dataframe, end_activities = auto_filter.apply_auto_filter( dataframe, parameters=parameters) end_activities = list(end_activities.keys()) activities_count = attributes_filter.get_attribute_values( dataframe, activity_key, parameters=parameters) activities = list(activities_count.keys()) start_activities = list( start_activities_filter.get_start_activities( dataframe, parameters=parameters).keys()) dfg = df_statistics.get_dfg_graph(dataframe, activity_key=activity_key, timestamp_key=timestamp_key, case_id_glue=case_id_glue, sort_caseid_required=False, sort_timestamp_along_case_id=False) dfg = clean_dfg_based_on_noise_thresh( dfg, activities, decreasingFactor * constants.DEFAULT_DFG_CLEAN_MULTIPLIER, parameters=parameters) net, im, fm = inductive_miner.apply_dfg(dfg, parameters, activities=activities, start_activities=start_activities, end_activities=end_activities) spaths = get_shortest_paths(net) aggregated_statistics = get_decorations_from_dfg_spaths_acticount( net, dfg, spaths, activities_count, variant="frequency") gviz = pn_vis_factory.apply(net, im, fm, parameters={"format": "svg"}, variant="frequency", aggregated_statistics=aggregated_statistics) gviz_base64 = base64.b64encode(str(gviz).encode('utf-8')) ret_graph = get_graph.get_graph_from_petri(net, im, fm) return get_base64_from_gviz(gviz), export_petri_as_string( net, im, fm ), ".pnml", "parquet", activities, start_activities, end_activities, gviz_base64, ret_graph, "inductive", "freq", None, "", activity_key
def apply(df, parameters=None, classic_output=False): """ Gets a simple model out of a Pandas dataframe Parameters ------------- df Pandas dataframe parameters Parameters of the algorithm, including: maximum_number_activities -> Maximum number of activities to keep discovery_algorithm -> Discovery algorithm to use (alpha, inductive) desidered_output -> Desidered output of the algorithm (default: Petri) include_filtered_df -> Include the filtered dataframe in the output include_dfg_frequency -> Include the DFG of frequencies in the output include_dfg_performance -> Include the DFG of performance in the output include_filtered_dfg_frequency -> Include the filtered DFG of frequencies in the output include_filtered_dfg_performance -> Include the filtered DFG of performance in the output classic_output Determine if the output shall contains directly the objects (e.g. net, initial_marking, final_marking) or can return a more detailed dictionary """ if parameters is None: parameters = {} if PARAMETER_CONSTANT_CASEID_KEY not in parameters: parameters[PARAMETER_CONSTANT_CASEID_KEY] = CASE_CONCEPT_NAME if PARAMETER_CONSTANT_ACTIVITY_KEY not in parameters: parameters[PARAMETER_CONSTANT_ACTIVITY_KEY] = DEFAULT_NAME_KEY if PARAMETER_CONSTANT_TIMESTAMP_KEY not in parameters: parameters[PARAMETER_CONSTANT_TIMESTAMP_KEY] = DEFAULT_TIMESTAMP_KEY if PARAMETER_CONSTANT_ATTRIBUTE_KEY not in parameters: parameters[PARAMETER_CONSTANT_ATTRIBUTE_KEY] = parameters[ PARAMETER_CONSTANT_ACTIVITY_KEY] returned_dictionary = {} caseid_glue = parameters[PARAMETER_CONSTANT_CASEID_KEY] activity_key = parameters[PARAMETER_CONSTANT_ACTIVITY_KEY] timest_key = parameters[PARAMETER_CONSTANT_TIMESTAMP_KEY] net = None initial_marking = None final_marking = None bpmn_graph = None maximum_number_activities = parameters[ "maximum_number_activities"] if "maximum_number_activities" in parameters else 20 discovery_algorithm = parameters[ "discovery_algorithm"] if "discovery_algorithm" in parameters else "alphaclassic" desidered_output = parameters[ "desidered_output"] if "desidered_output" in parameters else "petri" include_filtered_df = parameters[ "include_filtered_df"] if "include_filtered_df" in parameters else True include_dfg_frequency = parameters[ "include_dfg_frequency"] if "include_dfg_frequency" in parameters else True include_dfg_performance = parameters[ "include_dfg_performance"] if "include_dfg_performance" in parameters else True include_filtered_dfg_frequency = parameters[ "include_filtered_dfg_frequency"] if "include_filtered_dfg_frequency" in parameters else True include_filtered_dfg_performance = parameters[ "include_filtered_dfg_performance"] if "include_filtered_dfg_performance" in parameters else True df = attributes_filter.filter_df_keeping_spno_activities( df, activity_key=activity_key, max_no_activities=maximum_number_activities) filtered_df = None if "alpha" in discovery_algorithm: filtered_df = start_activities_filter.apply_auto_filter( df, parameters=parameters) filtered_df = end_activities_filter.apply_auto_filter( filtered_df, parameters=parameters) filtered_df = filter_topvariants_soundmodel.apply( filtered_df, parameters=parameters) elif "inductive" in discovery_algorithm: filtered_df = auto_filter.apply_auto_filter(df, parameters=parameters) [dfg_frequency, dfg_performance] = dfg_util.get_dfg_graph(df, measure="both", perf_aggregation_key="mean", case_id_glue=caseid_glue, activity_key=activity_key, timestamp_key=timest_key) [filtered_dfg_frequency, filtered_dfg_performance ] = dfg_util.get_dfg_graph(filtered_df, measure="both", perf_aggregation_key="mean", case_id_glue=caseid_glue, activity_key=activity_key, timestamp_key=timest_key) if "alpha" in discovery_algorithm: net, initial_marking, final_marking = alpha_miner.apply_dfg( filtered_dfg_frequency, parameters=parameters) if filtered_df is not None and include_filtered_df: returned_dictionary["filtered_df"] = filtered_df if net is not None and desidered_output == "petri": returned_dictionary["net"] = net if initial_marking is not None and desidered_output == "petri": returned_dictionary["initial_marking"] = initial_marking if final_marking is not None and desidered_output == "petri": returned_dictionary["final_marking"] = final_marking if bpmn_graph is not None and desidered_output == "bpmn": returned_dictionary["bpmn_graph"] = bpmn_graph if dfg_frequency is not None and include_dfg_frequency: returned_dictionary["dfg_frequency"] = dfg_frequency if dfg_performance is not None and include_dfg_performance: returned_dictionary["dfg_performance"] = dfg_performance if filtered_dfg_frequency is not None and include_filtered_dfg_frequency: returned_dictionary["filtered_dfg_frequency"] = filtered_dfg_frequency if filtered_dfg_performance is not None and include_filtered_dfg_performance: returned_dictionary[ "filtered_dfg_performance"] = filtered_dfg_performance if classic_output: if net is not None and desidered_output == "petri": return net, initial_marking, final_marking return returned_dictionary
def apply(dataframe, parameters=None): """ Gets the performance HNet Parameters ------------ dataframe Dataframe parameters Parameters of the algorithm Returns ------------ base64 Base64 of an SVG representing the model model Text representation of the model format Format of the model """ if parameters is None: parameters = {} decreasingFactor = parameters[ "decreasingFactor"] if "decreasingFactor" in parameters else ws_constants.DEFAULT_DEC_FACTOR activity_key = parameters[ pm4_constants. PARAMETER_CONSTANT_ACTIVITY_KEY] if pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY timestamp_key = parameters[ pm4_constants. PARAMETER_CONSTANT_TIMESTAMP_KEY] if pm4_constants.PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else xes.DEFAULT_TIMESTAMP_KEY case_id_glue = parameters[ pm4_constants. PARAMETER_CONSTANT_CASEID_KEY] if pm4_constants.PARAMETER_CONSTANT_CASEID_KEY in parameters else CASE_CONCEPT_NAME parameters[pm4_constants.RETURN_EA_COUNT_DICT_AUTOFILTER] = True dataframe = attributes_filter.filter_df_keeping_spno_activities( dataframe, activity_key=activity_key, max_no_activities=ws_constants.MAX_NO_ACTIVITIES) dataframe, end_activities_count = auto_filter.apply_auto_filter( dataframe, parameters=parameters) activities_count = attributes_filter.get_attribute_values( dataframe, activity_key, parameters=parameters) start_activities_count = start_activities_filter.get_start_activities( dataframe, parameters=parameters) activities = list(activities_count.keys()) start_activities = list(start_activities_count.keys()) end_activities = list(end_activities_count.keys()) dfg_frequency, dfg_performance = df_statistics.get_dfg_graph( dataframe, case_id_glue=case_id_glue, activity_key=activity_key, timestamp_key=timestamp_key, measure="both", sort_caseid_required=False, sort_timestamp_along_case_id=False) heu_net = HeuristicsNet(dfg_frequency, performance_dfg=dfg_performance, activities=activities, start_activities=start_activities, end_activities=end_activities, activities_occurrences=activities_count) heu_net.calculate(dfg_pre_cleaning_noise_thresh=ws_constants. DEFAULT_DFG_CLEAN_MULTIPLIER * decreasingFactor) vis = heu_vis_factory.apply(heu_net, parameters={"format": "svg"}) vis2 = heu_vis_factory.apply(heu_net, parameters={"format": "dot"}) gviz_base64 = get_base64_from_file(vis2.name) return get_base64_from_file(vis.name), None, "", "parquet", activities, start_activities, end_activities, gviz_base64, [], "heuristics", "perf", None, "", activity_key
def apply(dataframe, parameters=None): """ Gets the process tree using Inductive Miner Directly-Follows Parameters ------------ dataframe Dataframe parameters Parameters of the algorithm Returns ------------ base64 Base64 of an SVG representing the model model Text representation of the model format Format of the model """ if parameters is None: parameters = {} decreasingFactor = parameters[ "decreasingFactor"] if "decreasingFactor" in parameters else constants.DEFAULT_DEC_FACTOR activity_key = parameters[ pm4_constants. PARAMETER_CONSTANT_ACTIVITY_KEY] if pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY timestamp_key = parameters[ pm4_constants. PARAMETER_CONSTANT_TIMESTAMP_KEY] if pm4_constants.PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else xes.DEFAULT_TIMESTAMP_KEY case_id_glue = parameters[ pm4_constants. PARAMETER_CONSTANT_CASEID_KEY] if pm4_constants.PARAMETER_CONSTANT_CASEID_KEY in parameters else CASE_CONCEPT_NAME parameters[pm4_constants.RETURN_EA_COUNT_DICT_AUTOFILTER] = True dataframe = attributes_filter.filter_df_keeping_spno_activities( dataframe, activity_key=activity_key, max_no_activities=constants.MAX_NO_ACTIVITIES) dataframe, end_activities = auto_filter.apply_auto_filter( dataframe, parameters=parameters) end_activities = list(end_activities.keys()) activities_count = attributes_filter.get_attribute_values( dataframe, activity_key, parameters=parameters) activities = list(activities_count.keys()) start_activities = list( start_activities_filter.get_start_activities( dataframe, parameters=parameters).keys()) dfg = df_statistics.get_dfg_graph(dataframe, activity_key=activity_key, timestamp_key=timestamp_key, case_id_glue=case_id_glue, sort_caseid_required=False, sort_timestamp_along_case_id=False) dfg = clean_dfg_based_on_noise_thresh( dfg, activities, decreasingFactor * constants.DEFAULT_DFG_CLEAN_MULTIPLIER, parameters=parameters) tree = inductive_miner.apply_tree_dfg(dfg, parameters, activities=activities, start_activities=start_activities, end_activities=end_activities) gviz = pt_vis_factory.apply(tree, parameters={"format": "svg"}) gviz_base64 = base64.b64encode(str(gviz).encode('utf-8')) return get_base64_from_gviz(gviz), None, "", "parquet", activities, start_activities, end_activities, gviz_base64, [], "tree", "freq", None, "", activity_key