def new_window(self, begin, end, activity=''): # increment the id of the window if activity: # when using a detector for an attribute of the activity print( f'Generating model for sub-log [{begin} - {end - 1}] - window [{self.window_count[activity]}] - activity [{activity}]') self.window_count[activity] += 1 else: print(f'Generating model for sub-log [{begin} - {end - 1}] - window [{self.window_count}]') self.window_count += 1 if self.current_parameters.read_log_as == ReadLogAs.EVENT.name: # generate the sub-log for the window window = EventStream(self.event_data[begin:end]) sub_log = log_converter.apply(window, variant=log_converter.Variants.TO_EVENT_LOG) elif self.current_parameters.read_log_as == ReadLogAs.TRACE.name: sub_log = EventLog(self.event_data[begin:end]) else: print(f'Incorrect window type: {self.current_parameters.read_log_as}.') # save the sub-log output_path = os.path.join(self.logs_path, self.current_parameters.logname, activity) if not os.path.exists(output_path): os.makedirs(output_path) if activity and activity != '': output_filename = os.path.join(output_path, f'sublog_w{self.window_count[activity]}_{begin}_{end - 1}.xes') else: output_filename = os.path.join(output_path, f'sublog_w{self.window_count}_{begin}_{end - 1}.xes') xes_exporter.apply(sub_log, output_filename) self.execute_processes_for_window(sub_log, begin, activity)
def execute_script(): log_input_directory = "xesinput" all_logs_names = os.listdir(log_input_directory) all_logs_names = [log for log in all_logs_names if ".xe" in log] for logName in all_logs_names: # logPath = os.path.join("..", "tests", "inputData", logName) log_path = log_input_directory + "\\" + logName log = xes_importer.apply(log_path) print("\n\n") print("log loaded") print("Number of traces - ", len(log)) event_log = log_conversion.apply( log, variant=log_conversion.TO_EVENT_STREAM) print("Number of events - ", len(event_log)) print("Classifiers ", log.classifiers) exp_log_name = "xescert_exportlogs" + "\\" + "exp_" + logName print("exporting log", exp_log_name) xes_exporter.apply(log, exp_log_name) print("exported log", exp_log_name) log, classifier_attr_key = insert_classifier.search_act_class_attr(log) classifiers = list(log.classifiers.keys()) if classifier_attr_key is None and classifiers: try: print(classifiers) log, classifier_attr_key = insert_classifier.insert_activity_classifier_attribute( log, classifiers[0]) print(classifier_attr_key) except: print("exception in handling classifier") if classifier_attr_key is None: classifier_attr_key = "concept:name" if len(event_log) > 0 and classifier_attr_key in event_log[0]: parameters = { constants.PARAMETER_CONSTANT_ACTIVITY_KEY: classifier_attr_key } dfg = dfg_algorithm.apply(log, parameters=parameters) gviz = dfg_vis.apply(dfg, log=log, variant="frequency", parameters=parameters) # dfg_vis.view(gviz) dfg_vis.save(gviz, "xescert_images\\" + logName.replace("xes", "png")) print("Reimporting log file just exported - ", exp_log_name) log = xes_importer.apply(exp_log_name) print("log loaded", exp_log_name) print("Number of traces - ", len(log)) event_log = log_conversion.apply( log, variant=log_conversion.TO_EVENT_STREAM) print("Number of events - ", len(event_log)) print("Classifiers ", log.classifiers)
def cli(con): print("\n\nO2C XES log extractor\n\n") ref_type = input( "Insert the central document type of the extraction (default: Invoice): " ) if not ref_type: ref_type = "Invoice" ext_type = input( "Do you want to extract the document log, or the items log (default: document):" ) if not ext_type: ext_type = "document" if ext_type == "document": keep_first = True else: keep_first = False min_extr_date = input( "Insert the minimum extraction date (default: 2020-01-01 00:00:00): ") if not min_extr_date: min_extr_date = "2020-01-01 00:00:00" gjahr = input("Insert the fiscal year (default: 2020):") if not gjahr: gjahr = "2020" log = apply(con, ref_type=ref_type, keep_first=keep_first, min_extr_date=min_extr_date, gjahr=gjahr) path = input( "Insert the path where the log should be saved (default: o2c.xes):") if not path: path = "o2c.xes" xes_exporter.apply(log, path)
def download_event_log(): parameters = request.args.get("parameters") parameters = __process_parameters(parameters) log = __prepare_event_log(parameters) ext_type = parameters[ "ext_type"] if "ext_type" in parameters else "document_flow_log" log_type = __get_log_type_from_ext_type(ext_type) if log_type == 0: extension = ".jsonocel" temp_file = tempfile.NamedTemporaryFile(suffix=extension) temp_file.close() from pm4pymdl.objects.ocel.exporter import exporter as ocel_exporter ocel_exporter.apply(log, temp_file.name) elif log_type == 1: extension = ".csv" temp_file = tempfile.NamedTemporaryFile(suffix=extension) temp_file.close() log.to_csv(temp_file.name, index=False) elif log_type == 2: extension = ".xes" temp_file = tempfile.NamedTemporaryFile(suffix=extension) temp_file.close() from pm4py.objects.log.exporter.xes import exporter as xes_exporter xes_exporter.apply(log, temp_file.name) resp = send_file( temp_file.name, mimetype="text/plain", # use appropriate type based on file as_attachment=True, conditional=False) resp.headers["x-suggested-filename"] = "log" + extension return resp
def test_nonstandard_exporter(self): log = xes_importer.apply( os.path.join("input_data", "running-example.xes")) xes_exporter.apply(log, os.path.join("test_output_data", "running-example.xes"), variant=xes_exporter.Variants.LINE_BY_LINE) os.remove(os.path.join("test_output_data", "running-example.xes"))
def cli(con): print("\n\nAccounting - Transactions for the single document (XES log)\n") log = apply(con) path = input( "Insert the path where the log should be saved (default: bkpf.xes):") if not path: path = "bkpf.xes" xes_exporter.apply(log, path)
def execute_script(): con = example_connection.get_con() log = sapextractor.get_o2c_classic_event_log( con, ref_type="Invoice", keep_first=True, min_extr_date="1990-01-01 00:00:00") xes_exporter.apply(log, "o2c.xes")
def filterfile(sourceFile, outputFile, patternText, inclusive): log = importer.apply(sourceFile) activities = attributes_filter.get_attribute_values(log, CONCEPT_NAME) filteredLog = attributes_filter.apply( log, [patternText], parameters={ attributes_filter.Parameters.ATTRIBUTE_KEY: CONCEPT_NAME, attributes_filter.Parameters.POSITIVE: inclusive }) xes_exporter.apply(log, outputFile)
def cli(con): print("\n\nP2P - XES log\n") ref_type = input( "Provide the central table for the extraction (default: EKKO):") if not ref_type: ref_type = "EKKO" log = apply(con, ref_type=ref_type) path = input( "Insert the path where the log should be saved (default: p2p.xes): ") if not path: path = "p2p.xes" xes_exporter.apply(log, path)
def test_importExportXESfromGZIP_imp1(self): # to avoid static method warnings in tests, # that by construction of the unittest package have to be expressed in such way self.dummy_variable = "dummy_value" log = xes_importer.apply( os.path.join(COMPRESSED_INPUT_DATA, "01_running-example.xes.gz")) xes_exporter.apply( log, os.path.join(OUTPUT_DATA_DIR, "01-running-example.xes"), parameters={ xes_exporter.Variants.ETREE.value.Parameters.COMPRESS: True }) os.remove(os.path.join(OUTPUT_DATA_DIR, "01-running-example.xes.gz"))
def test_importExportXEStoXES(self): # to avoid static method warnings in tests, # that by construction of the unittest package have to be expressed in such way self.dummy_variable = "dummy_value" log = xes_importer.apply( os.path.join(INPUT_DATA_DIR, "running-example.xes")) xes_exporter.apply( log, os.path.join(OUTPUT_DATA_DIR, "running-example-exported.xes")) log_imported_after_export = xes_importer.apply( os.path.join(OUTPUT_DATA_DIR, "running-example-exported.xes")) self.assertEqual(len(log), len(log_imported_after_export)) os.remove(os.path.join(OUTPUT_DATA_DIR, "running-example-exported.xes"))
def cli(con): print("\n\nAccounting Doc Flow XES log extractor\n\n") ref_type = input( "Insert the central document type of the extraction (default: Goods receipt): " ) if not ref_type: ref_type = "Goods receipt" log = apply(con, ref_type=ref_type) path = input( "Insert the path where the log should be saved (default: doc_flow.xes): " ) if not path: path = "doc_flow.xes" xes_exporter.apply(log, path)
def apply_trans(logpath, activities, attributes, predicates, thresholds, location, new_activities, window, order_flag, visual_flag): """ Given an event log and, set of rules, they are channelised to event derivation algorithm. Evaluation metrics for transformed log and, statistics for original log and transformed log is calculated and returned. Parameters: logpath (str): Path of event log activities (List of str): List of activities in rule attributes (List of str): List of attributes in rule predicates (List of str): List of predicates in rule thresholds (List of float or str): List of thresholds in rule location (List of str): List of locations for derived events in rule new_activities (List of str): List of derived event's identifiers window (List of int): List of time windows in rule order_flag (bool): Flag to denote if order of events in log is considered visual_flag (bool): Flag to denote if activities in rule should be retained Returns: metrics (dict): Dictionary of evaluation measures like fitness, precision, simplicity and generalization for both the logs """ xes_log = importer.apply(logpath) df = log_converter.apply(xes_log, variant=log_converter.Variants.TO_DATA_FRAME) df["time:timestamp"] = pd.to_datetime(df["time:timestamp"], format='%Y-%m-%d', utc=True) transformed_df = deriving_events(df, activities, attributes, predicates, thresholds, location, new_activities, window, order_flag, visual_flag) parameters = { log_converter.Variants.TO_EVENT_LOG.value.Parameters.CASE_ID_KEY: 'case:concept:name' } transformed_xes_log = log_converter.apply( transformed_df, parameters=parameters, variant=log_converter.Variants.TO_EVENT_LOG) xes_exporter.apply(transformed_xes_log, logpath[:-4] + "_modified.xes") metrics = dict.fromkeys(np.arange(2)) metrics[0] = evaluate_logwithmodel(logpath) metrics[1] = evaluate_logwithmodel(logpath[:-4] + "_modified.xes") return metrics
def test_importExportProblematicLogs(self): # to avoid static method warnings in tests, # that by construction of the unittest package have to be expressed in such way self.dummy_variable = "dummy_value" logs = os.listdir(PROBLEMATIC_XES_DIR) for log in logs: log_full_path = os.path.join(PROBLEMATIC_XES_DIR, log) try: output_log_path = os.path.join(OUTPUT_DATA_DIR, log) log = xes_importer.apply(log_full_path) xes_exporter.apply(log, output_log_path) log_imported_after_export = xes_importer.apply(output_log_path) self.assertEqual(len(log), len(log_imported_after_export)) os.remove(output_log_path) except SyntaxError as e: logging.info("SyntaxError on log " + str(log) + ": " + str(e))
def put_event_log(file, caseID, casePrefix) -> str: """Cache the event log.""" id = uuid.uuid4().hex # file.save(os.path.join(cache_dir, id + '.xes')) filename = file.filename if filename.endswith('csv'): path = os.path.join(cache_dir, id + '.csv') file.save(path) if caseID is None: raise CaseIdNotFoundError if casePrefix is None: casePrefix = 'case:' log_csv = pd.read_csv(path, sep=',') # log_csv.rename(columns={'clientID': 'case:clientID'}, inplace=True) parameters = { log_conv.Variants.TO_EVENT_LOG.value.Parameters.CASE_ID_KEY: caseID, log_conv.Variants.TO_EVENT_LOG.value.Parameters.CASE_ATTRIBUTE_PREFIX: casePrefix } event_log = log_conv.apply(log_csv, parameters=parameters, variant=log_conv.Variants.TO_EVENT_LOG) xes_exporter.apply(event_log, os.path.join(cache_dir, id + '.xes')) with open(os.path.join(cache_dir, id + '.xes'), 'r') as f: content = f.read() event_store[id] = content else: content = file.read().decode('utf-8') event_store[id] = content print('Storing file at: ' + id) # __store_delete_time(id) return id
def write_xes(log: EventLog, file_path: str) -> None: """ Exports a XES log Parameters -------------- log Event log file_path Destination path Returns ------------- void """ from pm4py.objects.log.exporter.xes import exporter as xes_exporter xes_exporter.apply(log, file_path)
def write_xes(log, file_path): """ Exports a XES log_skeleton Parameters -------------- log Event log_skeleton file_path Destination path Returns ------------- void """ from pm4py.objects.log.exporter.xes import exporter as xes_exporter xes_exporter.apply(log, file_path)
def main(): log_x, log_pm4py = readLogFile(inPath) epsilon = 1.0 for mode in modeRange: for eps in epsRange: for i in range(tries): if mode == 'df_laplace': out_path = basePath + '/Out/' + logName + '/' + logName + '_' + str(eps) + '_' + mode + '_' + str(i) + ".xes" private_log = privatize_df_laplace.privatize_tracevariants(log_x, log_pm4py, epsilon) xes_exporter.apply(private_log, out_path) elif mode == 'df_exp': for max_k in max_k_list: out_path = basePath + '/Out/' + logName + '/' + logName + '_' + str(eps) + '_max_k' + str( max_k) + '_' + mode + '_' + str(i) + ".xes" private_log = privatize_df_exp.privatize_tracevariants(log_x, log_pm4py, epsilon,max_k) xes_exporter.apply(private_log,out_path) print("Done for all eps for all tries.")
def split(): ps = process_args(sys.argv[1:]) dpn = DPN(read_pnml_input(ps["model"])) (log, has_uncertainty) = read_log(ps["log"]) print("number of traces: %d" % len(log)) #naive_part = NaivePartitioning(list(logd.values())) #interval_part = IntervalPartitioning(dpn, naive_part.representatives()) i = 0 ts = [] for t in log: tp = preprocess_trace(t, dpn) if not tp in ts: log1 = pm4py.filter_log(lambda x: x == t, log) print(len(log1), i) xes_exporter.apply( log1, 'data/hospital_billing/single_traces/' + str(i) + '.xes') i += 1 ts.append(tp)
def test_importExportCSVtoXES(self): # to avoid static method warnings in tests, # that by construction of the unittest package have to be expressed in such way self.dummy_variable = "dummy_value" df = pd.read_csv(os.path.join(INPUT_DATA_DIR, "running-example.csv")) df = dataframe_utils.convert_timestamp_columns_in_df(df) event_log = log_conversion.apply( df, variant=log_conversion.TO_EVENT_STREAM) event_log = sorting.sort_timestamp(event_log) event_log = sampling.sample(event_log) event_log = index_attribute.insert_event_index_as_event_attribute( event_log) log = log_conversion.apply(event_log) log = sorting.sort_timestamp(log) log = sampling.sample(log) log = index_attribute.insert_trace_index_as_event_attribute(log) xes_exporter.apply( log, os.path.join(OUTPUT_DATA_DIR, "running-example-exported.xes")) log_imported_after_export = xes_importer.apply( os.path.join(OUTPUT_DATA_DIR, "running-example-exported.xes")) self.assertEqual(len(log), len(log_imported_after_export)) os.remove(os.path.join(OUTPUT_DATA_DIR, "running-example-exported.xes"))
def test_xesimp_xesexp(self): log0 = xes_importer.apply(os.path.join("input_data", "running-example.xes")) log = log_conversion.apply(log0, variant=log_conversion.TO_EVENT_LOG) stream = log_conversion.apply(log0, variant=log_conversion.TO_EVENT_STREAM) df = log_conversion.apply(log0, variant=log_conversion.TO_DATA_FRAME) xes_exporter.apply(log, "ru.xes") xes_exporter.apply(stream, "ru.xes") xes_exporter.apply(df, "ru.xes") os.remove('ru.xes')
def test_pdimp_xesexp(self): log0 = pd.read_csv(os.path.join("input_data", "running-example.csv")) log0 = dataframe_utils.convert_timestamp_columns_in_df(log0) log = log_conversion.apply(log0, variant=log_conversion.TO_EVENT_LOG) stream = log_conversion.apply(log0, variant=log_conversion.TO_EVENT_STREAM) df = log_conversion.apply(log0, variant=log_conversion.TO_DATA_FRAME) xes_exporter.apply(log, "ru.xes") xes_exporter.apply(stream, "ru.xes") xes_exporter.apply(df, "ru.xes") os.remove('ru.xes')
''' Author : Boltenhagen Mathilde Date : June 2020 randomSequences.py : this file has been created to get 1000 mock traces ''' log = xes_importer.apply("<original log>") variants = getvariants.get_variants(log) # get activities and maximum length in log activities = list(get_attribute_values(log,"concept:name").keys()) max_len = (len(max(project_traces(log),key=len))) log._list=[] for t in range(0,1000): new_sequence = Trace() # random length of the fake sequence size_of_sequence = random.randint(1,max_len-1) # random activities for e in range(0,size_of_sequence): event = Event() event["concept:name"]=activities[random.randint(1,len(activities))] new_sequence.append(event) log._list.append(new_sequence) xes_exporter.apply(log,"<1000 mock traces>")
print("Time of TV Query: " + str((endtime_tv_query - starttime_tv_query))) starttime_trace_matcher = datetime.datetime.now() traceMatcher = TraceMatcher(tv_query_log, log) matchedLog = traceMatcher.matchQueryToLog() print(len(matchedLog)) endtime_trace_matcher = datetime.datetime.now() print("Time of TraceMatcher: " + str((endtime_trace_matcher - starttime_trace_matcher))) distributionOfAttributes = traceMatcher.getAttributeDistribution() occurredTimestamps, occurredTimestampDifferences = traceMatcher.getTimeStampData( ) print(min(occurredTimestamps)) starttime_attribute_anonymizer = datetime.datetime.now() attributeAnonymizer = AttributeAnonymizer() anonymizedLog, attributeDistribution = attributeAnonymizer.anonymize( matchedLog, distributionOfAttributes, epsilon, occurredTimestampDifferences, occurredTimestamps) endtime_attribute_anonymizer = datetime.datetime.now() print("Time of attribute anonymizer: " + str(endtime_attribute_anonymizer - starttime_attribute_anonymizer)) xes_exporter.apply(anonymizedLog, result_log_path) endtime = datetime.datetime.now() print("Complete Time: " + str((endtime - starttime))) print("Time of TV Query: " + str((endtime_tv_query - starttime_tv_query))) print("Time of TraceMatcher: " + str((endtime_trace_matcher - starttime_trace_matcher))) print("Time of attribute anonymizer: " + str(endtime_attribute_anonymizer - starttime_attribute_anonymizer)) print(result_log_path) print(freq(attributeDistribution))
def execute_script(): con = example_connection.get_con() log = sapextractor.get_p2p_classic_event_log(con, ref_type="EKKO") xes_exporter.apply(log, "p2p.xes")
def execute_script(): con = example_connection.get_con() log = sapextractor.get_ap_ar_single_doc_transactions_log(con) xes_exporter.apply(log, "bkpf.xes")
def test_5(self): log = self.load_running_example_xes() from pm4py.objects.log.exporter.xes import exporter as xes_exporter path = os.path.join("test_output_data", "ru.xes") xes_exporter.apply(log, path) os.remove(path)
def apply_filter(req): sessions[req.session["id"]] = datetime.now() filters = { "time": True, "variants": True, "performance": True, "activities": True, "attribute": True } req.session.set_expiry(7200) #print(str(req.body)) o = json.loads(req.body) print(str(o)) custom_time_range = [] for pair in o["filter1"]: #custom_time_range.append((dateutil.parser.parse(pair[0]),dateutil.parser.parse(pair[1]))) custom_time_range.append((pair[0],pair[1])) if o["filter1"] == []: filters["time"] = False #print(o["filter1"][0]) #print(custom_time_range[0][0]) #print(custom_time_range) custom_path_range = [] for pair in o["filter2"]: custom_path_range.append((float(pair[0]),float(pair[1]))) if o["filter2"] == []: filters["variants"] = False #custom_path_range = [(0,1)] #filter2 custom_performance_range = [] for pair in o["filter3"]: custom_performance_range.append((float(pair[0]),float(pair[1]))) if o["filter3"] == []: filters["performance"] = False custom_activitiy_range = [] for pair in o["filter4"]: custom_activitiy_range.append((float(pair[0]),float(pair[1]))) if o["filter4"] == []: filters["activities"] = False #custom_activitiy_range = [(0,1)] #filter3 custom_attribute_range = [] for pair in o["filter5"]: custom_attribute_range.append((float(pair[0]),float(pair[1]))) if o["filter5"] == [] or o["filter5attribute"] == "Empty": filters["attribute"] = False additional_attribute = o["filter5attribute"] selected_viz = o["visualization"] calc_lev = o["distance"] #input_file = os.path.join("webapp","static", req.session["id"] + "_l0.xes") input_file = os.path.join("webapp","static", "sepsis.xes") input_log = xes_importer.apply(input_file) not_filtered_logs = {} flatten = lambda l: [item for sublist in l for item in sublist] time_timestamp_started = datetime.now() if filters["time"]: #TODO check overlapping for filter custom_time_range = sorted(custom_time_range, reverse=False) for i in range(0,len(custom_time_range)-1): if(custom_time_range[i][1] > custom_time_range[i+1][0]): response = HttpResponse(json.dumps({'error': "Wrong intervals for time filter"})) response.status_code = 200 return response #raise ValueError("Overlapping time ranges") logs = [] for (x,y) in custom_time_range: logs.append(timestamp_filter.filter_traces_contained(input_log, x, y)) #log = timestamp_filter.filter_traces_contained(input_log, custom_time_range[0][0], custom_time_range[0][1]) log = pm4py.objects.log.log.EventLog() for timeslice in logs: for trace in timeslice: log.append(trace) print(len(input_log)) print(len(log)) #l2 not_filtered_logs["timestamp_filter"] = pm4py.objects.log.log.EventLog() for trace in input_log: if trace not in log: not_filtered_logs["timestamp_filter"].append(trace) print(len(not_filtered_logs["timestamp_filter"])) else: log = input_log time_variants_started = datetime.now() # where should I start? if filters["variants"]: variants = variants_filter.get_variants(log) variants_count = case_statistics.get_variant_statistics(log) variants_count = sorted(variants_count, key=lambda x: x['count'], reverse=False) custom_path_range = sorted(custom_path_range, reverse=False) # check overlapping for i in range(0,len(custom_path_range)-1): if(custom_path_range[i][1] > custom_path_range[i+1][0]): response = HttpResponse(json.dumps({'error': "Wrong intervals for variants filter"})) response.status_code = 200 return response #raise ValueError("Overlapping variants ranges") nr_variants = len(variants_count) custom_path_range * nr_variants idx = [(math.floor(x*nr_variants), math.ceil(y*nr_variants)) for (x,y) in custom_path_range] variants_subset = [variants_count[x:y+1] for (x,y) in idx] variants_subset = flatten(variants_subset) filtered_variants = {k:v for k,v in variants.items() if k in [x["variant"] for x in variants_subset]} #l2 not_filtered_variants = {k:v for k,v in variants.items() if k not in [x["variant"] for x in variants_subset]} filtered_log = variants_filter.apply(log, filtered_variants) #l2 not_filtered_logs["variant_filter"] = variants_filter.apply(log, not_filtered_variants) else: filtered_log = log time_variants_finished = datetime.now() # note: incl log2 generation if filters["performance"]: custom_performance_range = sorted(custom_performance_range, reverse=False) # check overlapping for i in range(0,len(custom_performance_range)-1): if(custom_performance_range[i][1] > custom_performance_range[i+1][0]): response = HttpResponse(json.dumps({'error': "Wrong intervals for performance filter"})) response.status_code = 200 return response #raise ValueError("Overlapping performance ranges") #all_case_durations = case_statistics.get_all_casedurations(log, parameters={case_statistics.Parameters.TIMESTAMP_KEY: "time:timestamp"}) #case_filter.filter_case_performance(log, 86400, 864000) performances = [] for i in range(len(filtered_log)): filtered_log[i].attributes["throughput"] = (max([event["time:timestamp"]for event in filtered_log[i]])-min([event["time:timestamp"] for event in filtered_log[i]])).total_seconds() performances.append(filtered_log[i].attributes["throughput"]) nr_cases = len(filtered_log) performances = sorted(performances, reverse=False) idx = [(math.floor(x*nr_cases), math.ceil(y*nr_cases)) for (x,y) in custom_performance_range] perf_subset = [performances[x:y+1] for (x,y) in idx] perf_subset = flatten(perf_subset) performance_log = pm4py.objects.log.log.EventLog([trace for trace in filtered_log if trace.attributes["throughput"] in perf_subset]) #l2 not_filtered_logs["performance_filter"] = pm4py.objects.log.log.EventLog([trace for trace in filtered_log if trace.attributes["throughput"] not in perf_subset]) #print(str(len(not_filtered_logs["performance_filter"]))) else: performance_log = filtered_log time_performance_finished = datetime.now() if filters["activities"]: variants = variants_filter.get_variants(performance_log) variants_count = case_statistics.get_variant_statistics(performance_log) variants_count = sorted(variants_count, key=lambda x: x['count'], reverse=False) activities = dict() for variant in variants_count: for activity in variant["variant"].split(","): if (activity not in activities.keys()): activities[activity] = variant["count"] else: activities[activity] += variant["count"] sorted_activities = {k: v for k, v in sorted(activities.items(), key=lambda item: item[1])} activities_sorted_list = list(sorted_activities) custom_activitiy_range = sorted(custom_activitiy_range, reverse=False) # check overlapping for i in range(0,len(custom_activitiy_range)-1): if(custom_activitiy_range[i][1] > custom_activitiy_range[i+1][0]): response = HttpResponse(json.dumps({'error': "Wrong intervals for activities filter"})) response.status_code = 200 return response #raise ValueError("Overlapping activities ranges") nr_activities = len(activities_sorted_list) idx = [(math.floor(x*nr_activities), math.ceil(y*nr_activities)) for (x,y) in custom_activitiy_range] activities_to_keep = [activities_sorted_list[x:y+1] for (x,y) in idx] activities_to_keep = flatten(activities_to_keep) variants_idx = [] for i in range(len(variants_count)): for activity in activities_to_keep: if (activity in variants_count[i]["variant"].split(",") and (i not in variants_idx)): variants_idx.append(i) variants_subset = [variants_count[i] for i in variants_idx] filtered_variants = {k:v for k,v in variants.items() if k in [x["variant"] for x in variants_subset]} #l2 not_filtered_variants = {k:v for k,v in variants.items() if k not in [x["variant"] for x in variants_subset]} filtered_log = variants_filter.apply(performance_log, filtered_variants) #l2 not_filtered_logs["activities_filter"] = variants_filter.apply(performance_log, not_filtered_variants) new_log = pm4py.objects.log.log.EventLog() #not_filtered_logs["activities_filter_traces"] = pm4py.objects.log.log.EventLog() for trace in filtered_log: new_trace = pm4py.objects.log.log.Trace() not_new_trace = pm4py.objects.log.log.Trace() for event in trace: if(event['concept:name'] in activities_to_keep): new_trace.append(event) else: not_new_trace.append(event) if(len(new_trace)>0): new_log.append(new_trace) if(len(not_new_trace)>0): not_filtered_logs["activities_filter"].append(not_new_trace) else: new_log = performance_log time_activities_finished = datetime.now() if filters["attribute"]: custom_attribute_range = sorted(custom_attribute_range, reverse=False) # check overlapping for i in range(0,len(custom_attribute_range)-1): if(custom_attribute_range[i][1] > custom_attribute_range[i+1][0]): response = HttpResponse(json.dumps({'error': "Wrong intervals for additional attribute filter"})) response.status_code = 200 return response newest_log = pm4py.objects.log.log.EventLog() not_filtered_logs["additional_filter"] = pm4py.objects.log.log.EventLog() traces_with_attr = [] not_traces_with_attr = [] for trace in new_log: if additional_attribute in trace.attributes.keys(): traces_with_attr.append(trace) else: not_traces_with_attr.append(trace) #check if trace attribute if len(traces_with_attr)>0: #check if numeric if type(traces_with_attr[0].attributes[additional_attribute]) in [int, float]: for trace in traces_with_attr: if any([trace.attributes[additional_attribute] >= x and trace.attributes[additional_attribute] <= y for (x,y) in custom_attribute_range]): newest_log.append(trace) else: not_filtered_logs["additional_filter"].append(trace) for trace in not_traces_with_attr: not_filtered_logs["additional_filter"].append(trace) else: #string attribute_frequencies = dict() for trace in traces_with_attr: if trace.attributes[additional_attribute] not in attribute_frequencies.keys(): attribute_frequencies[trace.attributes[additional_attribute]] = 0 attribute_frequencies[trace.attributes[additional_attribute]] += 1 sorted_frequencies = {k: v for k, v in sorted(attribute_frequencies.items(), key=lambda item: item[1])} frequencies_sorted_list = list(sorted_frequencies) nr_values = len(frequencies_sorted_list) idx = [(math.floor(x*nr_values), math.ceil(y*nr_values)) for (x,y) in custom_attribute_range] values_to_keep = [frequencies_sorted_list[x:y+1] for (x,y) in idx] values_to_keep = flatten(values_to_keep) for trace in traces_with_attr: if trace.attributes[additional_attribute] in values_to_keep: newest_log.append(trace) else: not_filtered_logs["additional_filter"].append(trace) for trace in not_traces_with_attr: not_filtered_logs["additional_filter"].append(trace) else: #event attribute if [type(event[additional_attribute]) for trace in new_log for event in trace if additional_attribute in event.keys()][0] in [int, float]: for trace in new_log: new_trace = pm4py.objects.log.log.Trace() not_new_trace = pm4py.objects.log.log.Trace() for event in trace: if(additional_attribute in event.keys() and any([event[additional_attribute] >= x and event[additional_attribute] <= y for (x,y) in custom_attribute_range ])): new_trace.append(event) else: not_new_trace.append(event) if(len(new_trace)>0): newest_log.append(new_trace) if(len(not_new_trace)>0): not_filtered_logs["additional_filter"].append(not_new_trace) else: #string attribute_frequencies = dict() for trace in new_log: for event in trace: if additional_attribute in event.keys(): if event[additional_attribute] not in attribute_frequencies.keys(): attribute_frequencies[event[additional_attribute]] = 0 attribute_frequencies[event[additional_attribute]] += 1 sorted_frequencies = {k: v for k, v in sorted(attribute_frequencies.items(), key=lambda item: item[1])} frequencies_sorted_list = list(sorted_frequencies) nr_values = len(frequencies_sorted_list) idx = [(math.floor(x*nr_values), math.ceil(y*nr_values)) for (x,y) in custom_attribute_range] values_to_keep = [frequencies_sorted_list[x:y+1] for (x,y) in idx] values_to_keep = flatten(values_to_keep) for trace in new_log: new_trace = pm4py.objects.log.log.Trace() not_new_trace = pm4py.objects.log.log.Trace() for event in trace: if(additional_attribute in event.keys() and event[additional_attribute] in values_to_keep): new_trace.append(event) else: not_new_trace.append(event) if(len(new_trace)>0): newest_log.append(new_trace) if(len(not_new_trace)>0): not_filtered_logs["additional_filter"].append(not_new_trace) else: newest_log = new_log time_attribute_finished = datetime.now() if(selected_viz=="dfgf"): dfg = dfg_discovery.apply(newest_log) gviz = dfg_visualization.apply(dfg, log=newest_log, variant=dfg_visualization.Variants.FREQUENCY) dfg_visualization.save(gviz, os.path.join("webapp","static", req.session["id"] + "_l1.png")) elif(selected_viz=="dfgp"): dfg = dfg_discovery.apply(newest_log) gviz = dfg_visualization.apply(dfg, log=newest_log, variant=dfg_visualization.Variants.PERFORMANCE) dfg_visualization.save(gviz, os.path.join("webapp","static", req.session["id"] + "_l1.png")) else: heu_net = heuristics_miner.apply_heu(newest_log, parameters={"dependency_thresh": 0.99}) gviz = hn_vis_factory.apply(heu_net) hn_vis_factory.save(gviz, os.path.join("webapp","static", req.session["id"] + "_l1.png")) xes_exporter.apply(newest_log, os.path.join("webapp","static", req.session["id"] + "_l1.xes")) #l2 not_filtered_log = pm4py.objects.log.log.EventLog() for part in not_filtered_logs.keys(): for trace in not_filtered_logs[part]: not_filtered_log.append(trace) if(selected_viz=="dfgf"): dfg = dfg_discovery.apply(not_filtered_log) gviz = dfg_visualization.apply(dfg, log=not_filtered_log, variant=dfg_visualization.Variants.FREQUENCY) dfg_visualization.save(gviz, os.path.join("webapp","static", req.session["id"] + "_l2.png")) elif(selected_viz=="dfgp"): dfg = dfg_discovery.apply(not_filtered_log) gviz = dfg_visualization.apply(dfg, log=not_filtered_log, variant=dfg_visualization.Variants.PERFORMANCE) dfg_visualization.save(gviz, os.path.join("webapp","static", req.session["id"] + "_l2.png")) else: heu_net = heuristics_miner.apply_heu(not_filtered_log, parameters={"dependency_thresh": 0.99}) gviz = hn_vis_factory.apply(heu_net) hn_vis_factory.save(gviz, os.path.join("webapp","static", req.session["id"] + "_l2.png")) xes_exporter.apply(not_filtered_log, os.path.join("webapp","static", req.session["id"] + "_l2.xes")) if(calc_lev): lev_new = [0]*len(newest_log) for i in range(len(newest_log)): lev_new[i] = [hash(event['concept:name']) for event in newest_log[i]] lev_not = [0]*len(not_filtered_log) for i in range(len(not_filtered_log)): lev_not[i] = [hash(event['concept:name']) for event in not_filtered_log[i]] distances = [] for i in range(len(lev_new)): for j in range(len(lev_not)): distances.append(lev_dist(lev_new[i], lev_not[j])) lev_d = sum(distances)/len(distances) print("Levenshtein's distance: "+str(lev_d)) else: lev_d = "null" used_paths = 0 for lower, higher in custom_path_range: used_paths += round((higher-lower)*100) print(f"Using {used_paths}% of paths. {100-used_paths}% of paths are discarded.") print("Timestamp filter: {} seconds. \nVariants filter: {} seconds. \nPerformance filter: {} seconds. \nActivities filter: {} seconds. \nAttribute filter: {} seconds.".format((time_variants_started - time_timestamp_started).total_seconds(), (time_variants_finished - time_variants_started).total_seconds(), (time_performance_finished - time_variants_finished).total_seconds(), (time_activities_finished - time_performance_finished).total_seconds(), (time_attribute_finished - time_activities_finished).total_seconds())) response = HttpResponse(json.dumps({'time':(time_variants_started - time_timestamp_started).total_seconds(), 'variants':(time_variants_finished - time_variants_started).total_seconds(),'performance':(time_performance_finished - time_variants_finished).total_seconds(), 'activities':(time_activities_finished - time_performance_finished).total_seconds(), 'attribute':(time_attribute_finished - time_activities_finished).total_seconds(), 'traces':[len(newest_log), len(not_filtered_log)], 'distance':lev_d})) response.status_code = 200 return response
import os from tqdm import tqdm from meta_feature_extraction import sort_files from pm4py.objects.log.importer.xes import importer as xes_importer from pm4py.objects.log.exporter.xes import exporter as xes_exporter print("Converting log files") event_logs_path = "event_logs" for f in tqdm(sort_files(os.listdir(event_logs_path))): log = xes_importer.apply(f"{event_logs_path}/{f}", parameters={"show_progress_bar": False}) f_name = f.split(".gz")[0] xes_exporter.apply(log, f"{event_logs_path}/{f_name}", parameters={"show_progress_bar": False}) os.remove(f"{event_logs_path}/{f}")
def format(self, log, outFileName): xes_exporter.apply(log, outFileName)