def build_from_csv(self, path, parameters=None): """ Builds the handler from the specified path to CSV file Parameters ------------- path Path to the log file parameters Parameters of the algorithm """ if parameters is None: parameters = {} activity_key = parameters[ constants.PARAMETER_CONSTANT_ACTIVITY_KEY] if constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else None timestamp_key = parameters[ constants.PARAMETER_CONSTANT_TIMESTAMP_KEY] if constants.PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else None case_id_glue = parameters[ constants.PARAMETER_CONSTANT_CASEID_KEY] if constants.PARAMETER_CONSTANT_CASEID_KEY in parameters else None recognized_format = format_recognition.get_format_from_csv(path) sep = parameters["sep"] if "sep" in parameters else recognized_format.delimiter quotechar = parameters["quotechar"] if "quotechar" in parameters else recognized_format.quotechar if quotechar is not None: self.dataframe = csv_import_adapter.import_dataframe_from_path(path, sep=sep, quotechar=quotechar) else: self.dataframe = csv_import_adapter.import_dataframe_from_path(path, sep=sep) case_id_glue1, activity_key1, timestamp_key1 = assign_column_correspondence(self.dataframe) if case_id_glue is None: case_id_glue = case_id_glue1 if activity_key is None: activity_key = activity_key1 if timestamp_key is None: timestamp_key = timestamp_key1 if not activity_key == xes.DEFAULT_NAME_KEY: self.dataframe[xes.DEFAULT_NAME_KEY] = self.dataframe[activity_key] if not timestamp_key == xes.DEFAULT_TIMESTAMP_KEY: self.dataframe[xes.DEFAULT_TIMESTAMP_KEY] = self.dataframe[timestamp_key] if not case_id_glue == CASE_CONCEPT_NAME: self.dataframe[CASE_CONCEPT_NAME] = self.dataframe[case_id_glue] self.postloading_processing_dataframe() self.dataframe = self.dataframe.sort_values([DEFAULT_TIMESTAMP_KEY, ws_constants.DEFAULT_EVENT_INDEX_KEY]) if not str(self.dataframe[CASE_CONCEPT_NAME].dtype) == "object": self.dataframe[CASE_CONCEPT_NAME] = self.dataframe[CASE_CONCEPT_NAME].astype(str) if not ws_constants.DEFAULT_CASE_INDEX_KEY in self.dataframe: self.dataframe[ws_constants.DEFAULT_CASE_INDEX_KEY] = self.dataframe.groupby(CASE_CONCEPT_NAME).ngroup() if not self.is_lazy: self.sort_dataframe_by_case_id() self.build_reduced_dataframe() self.build_variants_df() self.build_grouped_dataframe() self.build_reduced_grouped_dataframe() self.calculate_variants_number() self.calculate_cases_number() self.calculate_events_number()
def test_attrValueDifferentPersons_neg(self): df = csv_import_adapter.import_dataframe_from_path( os.path.join("input_data", "running-example.csv")) attr_value_different_persons_neg = ltl_checker.attr_value_different_persons( df, "check ticket", parameters={ltl_checker.Parameters.POSITIVE: False})
def test_alpha_miner_dataframe(self): from pm4py.objects.log.adapters.pandas import csv_import_adapter df = csv_import_adapter.import_dataframe_from_path( os.path.join("input_data", "running-example.csv")) from pm4py.algo.discovery.alpha import factory as alpha_miner net, im, fm = alpha_miner.apply( df, variant=alpha_miner.ALPHA_VERSION_CLASSIC)
def execute_script(): # loads the dataframe from the CSV file csv_path = os.path.join("..", "tests", "input_data", "running-example.csv") df = csv_import_adapter.import_dataframe_from_path(csv_path) # calculates the Matrix Container object mco = sna_transformer.apply(df) # calculates the Handover of Work matrix hw_matrix = handover_of_work.apply(mco) # calculates the Similar Activities matrix sim_act_matrix = similar_activities.apply(mco) # shows the Handover of Work graph gviz = sna_vis_factory.apply(mco, hw_matrix, parameters={"format": "svg"}) sna_vis_factory.view(gviz) # shows the Similar Activities graph gviz = sna_vis_factory.apply(mco, sim_act_matrix, parameters={ "format": "svg", "threshold": 0.0 }) sna_vis_factory.view(gviz) # calculates the Real Handover of Work matrix real_hw_matrix = real_handover_of_work.apply(mco, parameters={"format": "svg"}) gviz = sna_vis_factory.apply(mco, real_hw_matrix)
def test_fourEeyesPrinciple_neg(self): df = csv_import_adapter.import_dataframe_from_path( os.path.join("input_data", "running-example.csv")) filt_foureyes_neg = ltl_checker.four_eyes_principle( df, "check ticket", "pay compensation", parameters={ltl_checker.Parameters.POSITIVE: False})
def test_AeventuallyB_neg(self): df = csv_import_adapter.import_dataframe_from_path( os.path.join("input_data", "running-example.csv")) filt_A_ev_B_neg = ltl_checker.A_eventually_B( df, "check ticket", "pay compensation", parameters={ltl_checker.Parameters.POSITIVE: False})
def import_event_stream(path, parameters=None): """ Imports a CSV file from the given path Parameters ---------- path: Input CSV file path parameters Parameters of the algorithm, including sep -> column separator quotechar -> (if specified) Character that starts/end big strings in CSV nrows -> (if specified) Maximum number of rows to read from the CSV sort -> Boolean value that tells if the CSV should be ordered sort_field -> If sort option is enabled, then the CSV is automatically sorted by the specified column Returns ------- log : :class:`pm4py.log.log.EventLog` An event log """ sep = "," quotechar = None nrows = None sort = False sort_field = "time:timestamp" insert_event_indexes = False timest_format = None timest_columns = None if parameters is None: parameters = {} if "sep" in parameters: sep = parameters["sep"] if "quotechar" in parameters: quotechar = parameters["quotechar"] if "nrows" in parameters: nrows = parameters["nrows"] if "sort" in parameters: sort = parameters["sort"] if "sort_field" in parameters: sort_field = parameters["sort_field"] if "insert_event_indexes" in parameters: insert_event_indexes = parameters["insert_event_indexes"] if "timest_format" in parameters: timest_format = parameters["timest_format"] if "timest_columns" in parameters: timest_columns = parameters["timest_columns"] df = import_dataframe_from_path(path, sep=sep, quotechar=quotechar, nrows=nrows, sort=sort, sort_field=sort_field, timest_format=timest_format, timest_columns=timest_columns) event_log = log_conv_fact.apply(df, variant=log_conv_fact.TO_EVENT_STREAM) if insert_event_indexes: event_log.insert_event_index_as_event_attribute() return event_log
def generate_dataframe(filename): try: # import csv into pandas dataframe by specifying the sep - seperator dataframe = csv_import_adapter.import_dataframe_from_path(filename, sep=",") return dataframe except FileNotFoundError: print("Invalid file name") exit()
def test_petrinet_receipt_df(self): # to avoid static method warnings in tests, # that by construction of the unittest package have to be expressed in such way self.dummy_variable = "dummy_value" df = csv_import_adapter.import_dataframe_from_path( os.path.join(INPUT_DATA_DIR, "running-example.csv")) net, im, fm = heuristics_miner.apply(df) gviz = pn_vis_factory.apply(net, im, fm) del gviz
def test_AnextBnextC_pos(self): df = csv_import_adapter.import_dataframe_from_path( os.path.join("input_data", "running-example.csv")) filt_A_next_B_next_C_pos = ltl_checker.A_next_B_next_C( df, "check ticket", "decide", "pay compensation", parameters={ltl_checker.Parameters.POSITIVE: True})
def test_performance_spectrum(self): log = xes_importer.apply( os.path.join("input_data", "running-example.xes")) from pm4py.statistics.performance_spectrum import factory as pspectrum ps = pspectrum.apply(log, ["register request", "decide"]) from pm4py.objects.log.adapters.pandas import csv_import_adapter df = csv_import_adapter.import_dataframe_from_path( os.path.join("input_data", "running-example.csv")) ps = pspectrum.apply(df, ["register request", "decide"])
def export_results_to_process_log_xes(self): """ Exports the results of the demo to an xes file with the same name as the previously exported csv file. We load this csv file and transform it into an xes file. :return: """ # Create filename for current self.min_category_export_score self.results_xes_path = self.results_log_path_prefix + "_" + \ str(int(round(self.min_category_export_score * 100))) + "_thresh.xes" # Read previously generated csv file an transform to log dataframe = csv_import_adapter.import_dataframe_from_path( self.results_log_csv_path, sep=",") log = conversion_factory.apply( dataframe, parameters={ constants.PARAMETER_CONSTANT_CASEID_KEY: "case:concept:name", constants.PARAMETER_CONSTANT_ACTIVITY_KEY: "category_name", constants.PARAMETER_CONSTANT_TIMESTAMP_KEY: "time:timestamp" }) # Add relevant data for ProM import log._classifiers = { 'Event Name': ['concept:name'], '(Event Name AND Lifecycle transition)': ['concept:name', 'lifecycle:transition'] } log._extensions = { 'Time': { 'prefix': 'time', 'uri': 'http://www.xes-standard.org/time.xesext' }, 'Lifecycle': { 'prefix': 'lifecycle', 'uri': 'http://www.xes-standard.org/lifecycle.xesext' }, 'Concept': { 'prefix': 'concept', 'uri': 'http://www.xes-standard.org/concept.xesext' } } for trace in log._list: # set trace concept:name to str instead of int, also for ProM import trace._attributes["concept:name"] = str( trace._attributes["concept:name"]) # Set org:resource to string as well for item in trace._list: item["org:resource"] = str(item["org:resource"]) # Export results to xes xes_exporter.export_log(log, self.results_xes_path) logger.info("Exported demo detections to: %s" % self.results_xes_path)
def test_filtering_paths(self): # to avoid static method warnings in tests, # that by construction of the unittest package have to be expressed in such way self.dummy_variable = "dummy_value" input_log = os.path.join(INPUT_DATA_DIR, "running-example.csv") dataframe = csv_import_adapter.import_dataframe_from_path(input_log, sep=',') df3 = paths_filter.apply(dataframe, [("examine casually", "check ticket")], {"positive": False}) del df3 df3 = paths_filter.apply(dataframe, [("examine casually", "check ticket")], {"positive": True}) del df3
def test_dfCasedurationPlotSemilogx(self): # to avoid static method warnings in tests, # that by construction of the unittest package have to be expressed in such way self.dummy_variable = "dummy_value" df = csv_import_adapter.import_dataframe_from_path( os.path.join("input_data", "receipt.csv")) x, y = pd_case_statistics.get_kde_caseduration(df) json = pd_case_statistics.get_kde_caseduration_json(df) del json
def test_dfDateAttribute(self): # to avoid static method warnings in tests, # that by construction of the unittest package have to be expressed in such way self.dummy_variable = "dummy_value" df = csv_import_adapter.import_dataframe_from_path( os.path.join("input_data", "receipt.csv")) x, y = pd_attributes_filter.get_kde_date_attribute(df) json = pd_attributes_filter.get_kde_date_attribute_json(df) del json
def test_inductiveminer_df(self): log = csv_import_adapter.import_dataframe_from_path( os.path.join("input_data", "running-example.csv")) net, im, fm = inductive_miner.apply(log) aligned_traces_tr = tr_factory.apply(log, net, im, fm) aligned_traces_alignments = align_factory.apply(log, net, im, fm) evaluation = eval_factory.apply(log, net, im, fm) fitness = rp_fit_factory.apply(log, net, im, fm) precision = precision_factory.apply(log, net, im, fm) generalization = generalization_factory.apply(log, net, im, fm) simplicity = simplicity_factory.apply(net)
def test_footprints_tree_df(self): df = csv_import_adapter.import_dataframe_from_path( os.path.join("input_data", "running-example.csv")) from pm4py.algo.discovery.inductive import algorithm as inductive_miner log = converter.apply(df) tree = inductive_miner.apply_tree(log) from pm4py.algo.discovery.footprints import algorithm as footprints_discovery fp_df = footprints_discovery.apply(df) fp_tree = footprints_discovery.apply(tree) from pm4py.algo.conformance.footprints import algorithm as footprints_conformance conf = footprints_conformance.apply(fp_df, fp_tree)
def test_pandas(self): # to avoid static method warnings in tests, # that by construction of the unittest package have to be expressed in such way self.dummy_variable = "dummy_value" log = csv_import_adapter.import_dataframe_from_path( os.path.join("..", "tests", "input_data", "running-example.csv")) hw_values = sna_factory.apply(log, variant="handover") wt_values = sna_factory.apply(log, variant="working_together") sub_values = sna_factory.apply(log, variant="subcontracting")
def test_dfNumericAttribute(self): # to avoid static method warnings in tests, # that by construction of the unittest package have to be expressed in such way self.dummy_variable = "dummy_value" df = csv_import_adapter.import_dataframe_from_path( os.path.join("input_data", "roadtraffic100traces.csv")) x, y = pd_attributes_filter.get_kde_numeric_attribute(df, "amount") json = pd_attributes_filter.get_kde_numeric_attribute_json( df, "amount") del json
def test_filtering_timeframe(self): # to avoid static method warnings in tests, # that by construction of the unittest package have to be expressed in such way self.dummy_variable = "dummy_value" input_log = os.path.join(INPUT_DATA_DIR, "receipt.csv") df = csv_import_adapter.import_dataframe_from_path(input_log, sep=',') df1 = timestamp_filter.apply_events(df, "2011-03-09 00:00:00", "2012-01-18 23:59:59") df2 = timestamp_filter.filter_traces_intersecting(df, "2011-03-09 00:00:00", "2012-01-18 23:59:59") df3 = timestamp_filter.filter_traces_contained(df, "2011-03-09 00:00:00", "2012-01-18 23:59:59") del df1 del df2 del df3
def read_xes(data_dir, dataset, aggregate_type, mode="pruning"): prune_parameter_freq = 350 prune_parameter_time = -1 #keep all #read the xes file if dataset in "BPIC14": # log = csv_importer.import_event_stream(os.path.join(data_dir, dataset + ".csv")) data = csv_import_adapter.import_dataframe_from_path(os.path.join( data_dir, dataset + ".csv"), sep=";") data['case:concept:name'] = data['Incident ID'] data['time:timestamp'] = data['DateStamp'] data['concept:name'] = data['IncidentActivity_Type'] log = conversion_factory.apply(data) elif dataset == "Unrineweginfectie": data = csv_import_adapter.import_dataframe_from_path(os.path.join( data_dir, dataset + ".csv"), sep=",") data['case:concept:name'] = data['Patientnummer'] data['time:timestamp'] = data['Starttijd'] data['concept:name'] = data['Aciviteit'] log = conversion_factory.apply(data) else: log = xes_import_factory.apply(os.path.join(data_dir, dataset + ".xes")) data = get_dataframe_from_event_stream(log) # dataframe = log_converter.apply(log, variant=log_converter.Variants.TO_DATA_FRAME) # dfg_freq = dfg_factory.apply(log,variant="frequency") # dfg_time =get_dfg_time(data,aggregate_type,dataset) if aggregate_type == AggregateType.FREQ: dfg = dfg_factory.apply(log, variant="frequency") else: dfg = get_dfg_time(data, aggregate_type, dataset) """Getting Start and End activities""" # log = xes_importer.import_log(xes_file) log_start = start_activities_filter.get_start_activities(log) log_end = end_activities_filter.get_end_activities(log) # return dfg_freq,dfg_time return dfg
def test_pdimp_xesexp(self): log0 = csv_import_adapter.import_dataframe_from_path( os.path.join("input_data", "running-example.csv")) log = log_conv_factory.apply(log0, variant=log_conv_factory.TO_EVENT_LOG) stream = log_conv_factory.apply( log0, variant=log_conv_factory.TO_EVENT_STREAM) df = log_conv_factory.apply(log0, variant=log_conv_factory.TO_DATAFRAME) xes_exporter_factory.apply(log, "ru.xes") xes_exporter_factory.apply(stream, "ru.xes") xes_exporter_factory.apply(df, "ru.xes") os.remove('ru.xes')
def calculate_footprint_matrix(filename): output = os.path.splitext(filename)[0].lower() df = csv_import_adapter.import_dataframe_from_path(filename, sep=";") pitches = constants.PITCHES + ["Pause"] pitches_range = np.arange(len(pitches)) footprint_matrix = pd.DataFrame('#', index=pitches_range, columns=pitches_range) rename_dict = {i: pitch for i, pitch in enumerate(pitches)} footprint_matrix.rename(index=rename_dict, columns=rename_dict, inplace=True) # Consider transitions only within a case # d = {} # for index, row in df.iterrows(): # if row['Case_ID'] not in d.keys(): # d[row['Case_ID']] = [] # d[row['Case_ID']].append(row['Event']) # Consider transitions beyond a case d = {"0": []} for _, row in df.iterrows(): d["0"].append(row["Event"]) for _, value in d.items(): for prev, curr, next in zip([None] + value[:-1], value, value[1:] + [None]): if curr is None: continue if curr is not None and curr[:-1] in constants.PITCHES: curr = curr[:-1] if prev is not None and prev[:-1] in constants.PITCHES: prev = prev[:-1] if next is not None and next[:-1] in constants.PITCHES: next = next[:-1] if prev is not None: footprint_matrix = calculate_footprint_symbol( footprint_matrix, prev, curr, '<=', '=>') if next is not None: footprint_matrix = calculate_footprint_symbol( footprint_matrix, next, curr, '=>', '<=') return footprint_matrix
def create_dataframe(): """ create dataframe """ dataframe = csv_import_adapter.import_dataframe_from_path( 'concatenated_files.csv', sep=",") dataframe = dataframe.rename( columns={ 'correlationId': 'case:concept:name', 'timestamp': 'time:timestamp', 'label': 'concept:name', 'approach': 'case:approach', 'errortype': 'case:errortype', 'status': 'case:status' }) return dataframe
def execute_script(): df = csv_import_adapter.import_dataframe_from_path( "../tests/input_data/receipt.csv") act_count = dict(df["concept:name"].value_counts()) dfg, performance_dfg = correlation_miner.apply( df, variant=correlation_miner.Variants.CLASSIC) gviz_freq = dfg_vis.apply(dfg, activities_count=act_count, variant=dfg_vis.Variants.FREQUENCY, parameters={"format": "svg"}) dfg_vis.view(gviz_freq) gviz_perf = dfg_vis.apply(performance_dfg, activities_count=act_count, variant=dfg_vis.Variants.PERFORMANCE, parameters={"format": "svg"}) dfg_vis.view(gviz_perf)
def execute_script(): # import csv & create log dataframe = csv_import_adapter.import_dataframe_from_path( datasourceMockdata(), sep=";") dataframe = dataframe.rename(columns={ 'coID': 'case:concept:name', 'Activity': 'concept:name' }) log = conversion_factory.apply(dataframe) # option 1: Directly-Follows Graph, represent frequency or performance parameters = {constants.PARAMETER_CONSTANT_ACTIVITY_KEY: "concept:name"} variant = 'frequency' dfg = dfg_factory.apply(log, variant=variant, parameters=parameters) gviz1 = dfg_vis_factory.apply(dfg, log=log, variant=variant, parameters=parameters) dfg_vis_factory.view(gviz1) # option 2: Heuristics Miner, acts on the Directly-Follows Graph, find common structures, output: Heuristic Net (.svg) heu_net = heuristics_miner.apply_heu( log, parameters={ heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH: 0.00 }) gviz2 = hn_vis.apply( heu_net, parameters={hn_vis.Variants.PYDOTPLUS.value.Parameters.FORMAT: "svg"}) hn_vis.view(gviz2) # option 3: Petri Net based on Heuristic Miner (.png) net, im, fm = heuristics_miner.apply( log, parameters={ heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH: 0.00 }) gviz3 = petri_vis.apply( net, im, fm, parameters={ petri_vis.Variants.WO_DECORATION.value.Parameters.FORMAT: "png" }) petri_vis.view(gviz3)
def read_csv(file_path, sep=",", quotechar=None, encoding=None, nrows=None, timest_format=None): """ Reads an event log in the CSV format (Pandas adapter) Parameters ---------------- file_path File path sep Separator; default: , quotechar Quote char; default: None encoding Encoding; default: default of Pandas nrows (If specified) number of rows timest_format Format of the timestamp columns Returns ---------------- dataframe Dataframe """ from pm4py.objects.log.adapters.pandas import csv_import_adapter df = csv_import_adapter.import_dataframe_from_path( file_path, sep=sep, quotechar=quotechar, encoding=encoding, nrows=nrows, timest_format=timest_format) if len(df.columns) < 2: logging.error( "Less than three columns were imported from the CSV file. Please check the specification of the separation and the quote character!" ) else: logging.warning( "Please specify the format of the dataframe: df = pm4py.format_dataframe(df, case_id='<name of the case ID column>', activity_key='<name of the activity column>', timestamp_key='<name of the timestamp column>')" ) return df
def export_to_xes(filename): output = os.path.splitext(filename)[0] df = csv_import_adapter.import_dataframe_from_path(filename, sep=";") df = df.rename( columns={ "Case_ID": "case:concept:name", "Event": "concept:name", "Type": "org:type", "Order": "org:order", "Is_Chord": "org:is_chord" }) # create internal XES log from pandas dataframe log = conversion_factory.apply(df) # save XES log xes_exporter.export_log(log, f"{output}.xes")
def main(path0, datasetname): a = len(path0) if path0[a-1] != '/': path = path0 + "/" else: path = path0 df = csv_importer.import_dataframe_from_path(path + datasetname + "_table2_on_file.csv", sep=";") patterns = create_patterns_list(path + datasetname + "_new_patterns_filtered.subs") rule = rules_log_manage(path + "rules_log.txt") data = prepare_data(patterns, df, rule) data.to_csv(r''+ path + datasetname + '_pattern_occurrence_matrix.csv', index=False) return "file creato correttamente!"
def import_event_stream(path, parameters=None): """ Imports a CSV file from the given path Parameters ---------- path: Input CSV file path parameters Parameters of the algorithm, including Parameters.SEPARATOR -> column separator Parameters.QUOTECHAR -> (if specified) Character that starts/end big strings in CSV Parameters.NUM_ROWS -> (if specified) Maximum number of rows to read from the CSV Parameters.SORT -> Boolean value that tells if the CSV should be ordered Parameters.SORT_FIELD -> If sort option is enabled, then the CSV is automatically sorted by the specified column Parameters.INSERT_EVENT_INDICES -> Events get their index as an additional payload Parameters.TIME_STAMP_FORMAT -> Specify the timestamp format, if not specified, auto detection is applied Parameters.TIME_STAMP_COLUMNS -> Column names of data attributes that contain time stamps Parameters.ENCODING -> File Encoding Returns ------- log : :class:`pm4py.log.log.EventLog` An event log """ parameters = {} if parameters is None else parameters insert_event_indexes = exec_utils.get_param_value(Parameters.INSERT_EVENT_INDEXES, parameters, False) df = import_dataframe_from_path(path, sep=exec_utils.get_param_value(Parameters.SEP, parameters, ","), quotechar=exec_utils.get_param_value(Parameters.QUOTECHAR, parameters, None), nrows=exec_utils.get_param_value(Parameters.NROWS, parameters, None), sort=exec_utils.get_param_value(Parameters.SORT, parameters, False), sort_field=exec_utils.get_param_value(Parameters.SORT_FIELD, parameters, 'time:timestamp'), timest_format=exec_utils.get_param_value(Parameters.TIMEST_FORMAT, parameters, None), timest_columns=exec_utils.get_param_value(Parameters.TIMEST_COLUMNS, parameters, None), encoding=exec_utils.get_param_value(Parameters.ENCODING, parameters, None)) stream = log_conv_fact.apply(df, variant=log_conv_fact.TO_EVENT_STREAM) if insert_event_indexes: stream.insert_event_index_as_event_attribute() return stream