Beispiel #1
0
    def new_window(self, begin, end, activity=''):
        # increment the id of the window
        if activity:  # when using a detector for an attribute of the activity
            print(
                f'Generating model for sub-log [{begin} - {end - 1}] - window [{self.window_count[activity]}] - activity [{activity}]')
            self.window_count[activity] += 1
        else:
            print(f'Generating model for sub-log [{begin} - {end - 1}] - window [{self.window_count}]')
            self.window_count += 1

        if self.current_parameters.read_log_as == ReadLogAs.EVENT.name:
            # generate the sub-log for the window
            window = EventStream(self.event_data[begin:end])
            sub_log = log_converter.apply(window, variant=log_converter.Variants.TO_EVENT_LOG)
        elif self.current_parameters.read_log_as == ReadLogAs.TRACE.name:
            sub_log = EventLog(self.event_data[begin:end])
        else:
            print(f'Incorrect window type: {self.current_parameters.read_log_as}.')

        # save the sub-log
        output_path = os.path.join(self.logs_path, self.current_parameters.logname, activity)
        if not os.path.exists(output_path):
            os.makedirs(output_path)
        if activity and activity != '':
            output_filename = os.path.join(output_path, f'sublog_w{self.window_count[activity]}_{begin}_{end - 1}.xes')
        else:
            output_filename = os.path.join(output_path, f'sublog_w{self.window_count}_{begin}_{end - 1}.xes')
        xes_exporter.apply(sub_log, output_filename)

        self.execute_processes_for_window(sub_log, begin, activity)
Beispiel #2
0
def execute_script():
    log_input_directory = "xesinput"
    all_logs_names = os.listdir(log_input_directory)
    all_logs_names = [log for log in all_logs_names if ".xe" in log]

    for logName in all_logs_names:
        # logPath = os.path.join("..", "tests", "inputData", logName)
        log_path = log_input_directory + "\\" + logName
        log = xes_importer.apply(log_path)
        print("\n\n")
        print("log loaded")
        print("Number of traces - ", len(log))
        event_log = log_conversion.apply(
            log, variant=log_conversion.TO_EVENT_STREAM)
        print("Number of events - ", len(event_log))
        print("Classifiers ", log.classifiers)
        exp_log_name = "xescert_exportlogs" + "\\" + "exp_" + logName
        print("exporting log", exp_log_name)
        xes_exporter.apply(log, exp_log_name)
        print("exported log", exp_log_name)

        log, classifier_attr_key = insert_classifier.search_act_class_attr(log)

        classifiers = list(log.classifiers.keys())
        if classifier_attr_key is None and classifiers:
            try:
                print(classifiers)
                log, classifier_attr_key = insert_classifier.insert_activity_classifier_attribute(
                    log, classifiers[0])
                print(classifier_attr_key)
            except:
                print("exception in handling classifier")

        if classifier_attr_key is None:
            classifier_attr_key = "concept:name"

        if len(event_log) > 0 and classifier_attr_key in event_log[0]:
            parameters = {
                constants.PARAMETER_CONSTANT_ACTIVITY_KEY: classifier_attr_key
            }

            dfg = dfg_algorithm.apply(log, parameters=parameters)
            gviz = dfg_vis.apply(dfg,
                                 log=log,
                                 variant="frequency",
                                 parameters=parameters)
            # dfg_vis.view(gviz)

            dfg_vis.save(gviz,
                         "xescert_images\\" + logName.replace("xes", "png"))

        print("Reimporting log file just exported - ", exp_log_name)

        log = xes_importer.apply(exp_log_name)
        print("log loaded", exp_log_name)
        print("Number of traces - ", len(log))
        event_log = log_conversion.apply(
            log, variant=log_conversion.TO_EVENT_STREAM)
        print("Number of events - ", len(event_log))
        print("Classifiers ", log.classifiers)
def cli(con):
    print("\n\nO2C XES log extractor\n\n")
    ref_type = input(
        "Insert the central document type of the extraction (default: Invoice): "
    )
    if not ref_type:
        ref_type = "Invoice"
    ext_type = input(
        "Do you want to extract the document log, or the items log (default: document):"
    )
    if not ext_type:
        ext_type = "document"
    if ext_type == "document":
        keep_first = True
    else:
        keep_first = False
    min_extr_date = input(
        "Insert the minimum extraction date (default: 2020-01-01 00:00:00): ")
    if not min_extr_date:
        min_extr_date = "2020-01-01 00:00:00"
    gjahr = input("Insert the fiscal year (default: 2020):")
    if not gjahr:
        gjahr = "2020"
    log = apply(con,
                ref_type=ref_type,
                keep_first=keep_first,
                min_extr_date=min_extr_date,
                gjahr=gjahr)
    path = input(
        "Insert the path where the log should be saved (default: o2c.xes):")
    if not path:
        path = "o2c.xes"
    xes_exporter.apply(log, path)
Beispiel #4
0
def download_event_log():
    parameters = request.args.get("parameters")
    parameters = __process_parameters(parameters)

    log = __prepare_event_log(parameters)
    ext_type = parameters[
        "ext_type"] if "ext_type" in parameters else "document_flow_log"
    log_type = __get_log_type_from_ext_type(ext_type)

    if log_type == 0:
        extension = ".jsonocel"
        temp_file = tempfile.NamedTemporaryFile(suffix=extension)
        temp_file.close()
        from pm4pymdl.objects.ocel.exporter import exporter as ocel_exporter
        ocel_exporter.apply(log, temp_file.name)
    elif log_type == 1:
        extension = ".csv"
        temp_file = tempfile.NamedTemporaryFile(suffix=extension)
        temp_file.close()
        log.to_csv(temp_file.name, index=False)
    elif log_type == 2:
        extension = ".xes"
        temp_file = tempfile.NamedTemporaryFile(suffix=extension)
        temp_file.close()
        from pm4py.objects.log.exporter.xes import exporter as xes_exporter
        xes_exporter.apply(log, temp_file.name)
    resp = send_file(
        temp_file.name,
        mimetype="text/plain",  # use appropriate type based on file
        as_attachment=True,
        conditional=False)
    resp.headers["x-suggested-filename"] = "log" + extension

    return resp
Beispiel #5
0
 def test_nonstandard_exporter(self):
     log = xes_importer.apply(
         os.path.join("input_data", "running-example.xes"))
     xes_exporter.apply(log,
                        os.path.join("test_output_data",
                                     "running-example.xes"),
                        variant=xes_exporter.Variants.LINE_BY_LINE)
     os.remove(os.path.join("test_output_data", "running-example.xes"))
def cli(con):
    print("\n\nAccounting - Transactions for the single document (XES log)\n")
    log = apply(con)
    path = input(
        "Insert the path where the log should be saved (default: bkpf.xes):")
    if not path:
        path = "bkpf.xes"
    xes_exporter.apply(log, path)
Beispiel #7
0
def execute_script():
    con = example_connection.get_con()
    log = sapextractor.get_o2c_classic_event_log(
        con,
        ref_type="Invoice",
        keep_first=True,
        min_extr_date="1990-01-01 00:00:00")
    xes_exporter.apply(log, "o2c.xes")
Beispiel #8
0
def filterfile(sourceFile, outputFile, patternText, inclusive):
    log = importer.apply(sourceFile)
    activities = attributes_filter.get_attribute_values(log, CONCEPT_NAME)
    filteredLog = attributes_filter.apply(
        log, [patternText],
        parameters={
            attributes_filter.Parameters.ATTRIBUTE_KEY: CONCEPT_NAME,
            attributes_filter.Parameters.POSITIVE: inclusive
        })
    xes_exporter.apply(log, outputFile)
Beispiel #9
0
def cli(con):
    print("\n\nP2P - XES log\n")
    ref_type = input(
        "Provide the central table for the extraction (default: EKKO):")
    if not ref_type:
        ref_type = "EKKO"
    log = apply(con, ref_type=ref_type)
    path = input(
        "Insert the path where the log should be saved (default: p2p.xes): ")
    if not path:
        path = "p2p.xes"
    xes_exporter.apply(log, path)
Beispiel #10
0
 def test_importExportXESfromGZIP_imp1(self):
     # to avoid static method warnings in tests,
     # that by construction of the unittest package have to be expressed in such way
     self.dummy_variable = "dummy_value"
     log = xes_importer.apply(
         os.path.join(COMPRESSED_INPUT_DATA, "01_running-example.xes.gz"))
     xes_exporter.apply(
         log,
         os.path.join(OUTPUT_DATA_DIR, "01-running-example.xes"),
         parameters={
             xes_exporter.Variants.ETREE.value.Parameters.COMPRESS: True
         })
     os.remove(os.path.join(OUTPUT_DATA_DIR, "01-running-example.xes.gz"))
Beispiel #11
0
 def test_importExportXEStoXES(self):
     # to avoid static method warnings in tests,
     # that by construction of the unittest package have to be expressed in such way
     self.dummy_variable = "dummy_value"
     log = xes_importer.apply(
         os.path.join(INPUT_DATA_DIR, "running-example.xes"))
     xes_exporter.apply(
         log, os.path.join(OUTPUT_DATA_DIR, "running-example-exported.xes"))
     log_imported_after_export = xes_importer.apply(
         os.path.join(OUTPUT_DATA_DIR, "running-example-exported.xes"))
     self.assertEqual(len(log), len(log_imported_after_export))
     os.remove(os.path.join(OUTPUT_DATA_DIR,
                            "running-example-exported.xes"))
def cli(con):
    print("\n\nAccounting Doc Flow XES log extractor\n\n")
    ref_type = input(
        "Insert the central document type of the extraction (default: Goods receipt): "
    )
    if not ref_type:
        ref_type = "Goods receipt"
    log = apply(con, ref_type=ref_type)
    path = input(
        "Insert the path where the log should be saved (default: doc_flow.xes): "
    )
    if not path:
        path = "doc_flow.xes"
    xes_exporter.apply(log, path)
Beispiel #13
0
def apply_trans(logpath, activities, attributes, predicates, thresholds,
                location, new_activities, window, order_flag, visual_flag):
    """
    Given an event log and, set of rules, they are channelised to event derivation algorithm.
    Evaluation metrics for transformed log and, statistics for original log and transformed log is
    calculated and returned.

    Parameters:
        logpath (str): Path of event log
        activities (List of str): List of activities in rule
        attributes (List of str): List of attributes in rule
        predicates (List of str): List of predicates in rule
        thresholds (List of float or str): List of thresholds in rule
        location (List of str): List of locations for derived events in rule
        new_activities (List of str): List of derived event's identifiers
        window (List of int): List of time windows in rule
        order_flag (bool): Flag to denote if order of events in log is considered
        visual_flag (bool): Flag to denote if activities in rule should be retained

    Returns:
        metrics (dict): Dictionary of evaluation measures like fitness, precision, simplicity and generalization for
        both the logs
    """
    xes_log = importer.apply(logpath)
    df = log_converter.apply(xes_log,
                             variant=log_converter.Variants.TO_DATA_FRAME)

    df["time:timestamp"] = pd.to_datetime(df["time:timestamp"],
                                          format='%Y-%m-%d',
                                          utc=True)

    transformed_df = deriving_events(df, activities, attributes, predicates,
                                     thresholds, location, new_activities,
                                     window, order_flag, visual_flag)
    parameters = {
        log_converter.Variants.TO_EVENT_LOG.value.Parameters.CASE_ID_KEY:
        'case:concept:name'
    }
    transformed_xes_log = log_converter.apply(
        transformed_df,
        parameters=parameters,
        variant=log_converter.Variants.TO_EVENT_LOG)
    xes_exporter.apply(transformed_xes_log, logpath[:-4] + "_modified.xes")

    metrics = dict.fromkeys(np.arange(2))
    metrics[0] = evaluate_logwithmodel(logpath)
    metrics[1] = evaluate_logwithmodel(logpath[:-4] + "_modified.xes")

    return metrics
Beispiel #14
0
 def test_importExportProblematicLogs(self):
     # to avoid static method warnings in tests,
     # that by construction of the unittest package have to be expressed in such way
     self.dummy_variable = "dummy_value"
     logs = os.listdir(PROBLEMATIC_XES_DIR)
     for log in logs:
         log_full_path = os.path.join(PROBLEMATIC_XES_DIR, log)
         try:
             output_log_path = os.path.join(OUTPUT_DATA_DIR, log)
             log = xes_importer.apply(log_full_path)
             xes_exporter.apply(log, output_log_path)
             log_imported_after_export = xes_importer.apply(output_log_path)
             self.assertEqual(len(log), len(log_imported_after_export))
             os.remove(output_log_path)
         except SyntaxError as e:
             logging.info("SyntaxError on log " + str(log) + ": " + str(e))
def put_event_log(file, caseID, casePrefix) -> str:
    """Cache the event log."""
    id = uuid.uuid4().hex

    # file.save(os.path.join(cache_dir, id + '.xes'))

    filename = file.filename

    if filename.endswith('csv'):
        path = os.path.join(cache_dir, id + '.csv')
        file.save(path)

        if caseID is None:
            raise CaseIdNotFoundError

        if casePrefix is None:
            casePrefix = 'case:'

        log_csv = pd.read_csv(path, sep=',')
        # log_csv.rename(columns={'clientID': 'case:clientID'}, inplace=True)
        parameters = {
            log_conv.Variants.TO_EVENT_LOG.value.Parameters.CASE_ID_KEY:
            caseID,
            log_conv.Variants.TO_EVENT_LOG.value.Parameters.CASE_ATTRIBUTE_PREFIX:
            casePrefix
        }
        event_log = log_conv.apply(log_csv,
                                   parameters=parameters,
                                   variant=log_conv.Variants.TO_EVENT_LOG)

        xes_exporter.apply(event_log, os.path.join(cache_dir, id + '.xes'))

        with open(os.path.join(cache_dir, id + '.xes'), 'r') as f:
            content = f.read()

            event_store[id] = content

    else:
        content = file.read().decode('utf-8')

        event_store[id] = content

    print('Storing file at: ' + id)

    # __store_delete_time(id)

    return id
Beispiel #16
0
def write_xes(log: EventLog, file_path: str) -> None:
    """
    Exports a XES log

    Parameters
    --------------
    log
        Event log
    file_path
        Destination path

    Returns
    -------------
    void
    """
    from pm4py.objects.log.exporter.xes import exporter as xes_exporter
    xes_exporter.apply(log, file_path)
Beispiel #17
0
def write_xes(log, file_path):
    """
    Exports a XES log_skeleton

    Parameters
    --------------
    log
        Event log_skeleton
    file_path
        Destination path

    Returns
    -------------
    void
    """
    from pm4py.objects.log.exporter.xes import exporter as xes_exporter
    xes_exporter.apply(log, file_path)
Beispiel #18
0
def main():
    log_x, log_pm4py = readLogFile(inPath)
    epsilon = 1.0
    for mode in modeRange:
        for eps in epsRange:
            for i in range(tries):
                if mode == 'df_laplace':
                    out_path = basePath + '/Out/' + logName + '/' + logName + '_' + str(eps) + '_' + mode + '_' + str(i) + ".xes"
                    private_log = privatize_df_laplace.privatize_tracevariants(log_x, log_pm4py, epsilon)
                    xes_exporter.apply(private_log, out_path)
                elif mode == 'df_exp':
                    for max_k in max_k_list:
                        out_path = basePath + '/Out/' + logName + '/' + logName + '_' + str(eps) + '_max_k' + str(
                            max_k) + '_' + mode + '_' + str(i) + ".xes"
                        private_log = privatize_df_exp.privatize_tracevariants(log_x, log_pm4py, epsilon,max_k)
                        xes_exporter.apply(private_log,out_path)
    print("Done for all eps for all tries.")
Beispiel #19
0
def split():
    ps = process_args(sys.argv[1:])
    dpn = DPN(read_pnml_input(ps["model"]))
    (log, has_uncertainty) = read_log(ps["log"])
    print("number of traces: %d" % len(log))

    #naive_part = NaivePartitioning(list(logd.values()))
    #interval_part = IntervalPartitioning(dpn, naive_part.representatives())

    i = 0
    ts = []
    for t in log:
        tp = preprocess_trace(t, dpn)
        if not tp in ts:
            log1 = pm4py.filter_log(lambda x: x == t, log)
            print(len(log1), i)
            xes_exporter.apply(
                log1, 'data/hospital_billing/single_traces/' + str(i) + '.xes')
            i += 1
            ts.append(tp)
Beispiel #20
0
 def test_importExportCSVtoXES(self):
     # to avoid static method warnings in tests,
     # that by construction of the unittest package have to be expressed in such way
     self.dummy_variable = "dummy_value"
     df = pd.read_csv(os.path.join(INPUT_DATA_DIR, "running-example.csv"))
     df = dataframe_utils.convert_timestamp_columns_in_df(df)
     event_log = log_conversion.apply(
         df, variant=log_conversion.TO_EVENT_STREAM)
     event_log = sorting.sort_timestamp(event_log)
     event_log = sampling.sample(event_log)
     event_log = index_attribute.insert_event_index_as_event_attribute(
         event_log)
     log = log_conversion.apply(event_log)
     log = sorting.sort_timestamp(log)
     log = sampling.sample(log)
     log = index_attribute.insert_trace_index_as_event_attribute(log)
     xes_exporter.apply(
         log, os.path.join(OUTPUT_DATA_DIR, "running-example-exported.xes"))
     log_imported_after_export = xes_importer.apply(
         os.path.join(OUTPUT_DATA_DIR, "running-example-exported.xes"))
     self.assertEqual(len(log), len(log_imported_after_export))
     os.remove(os.path.join(OUTPUT_DATA_DIR,
                            "running-example-exported.xes"))
Beispiel #21
0
 def test_xesimp_xesexp(self):
     log0 = xes_importer.apply(os.path.join("input_data", "running-example.xes"))
     log = log_conversion.apply(log0, variant=log_conversion.TO_EVENT_LOG)
     stream = log_conversion.apply(log0, variant=log_conversion.TO_EVENT_STREAM)
     df = log_conversion.apply(log0, variant=log_conversion.TO_DATA_FRAME)
     xes_exporter.apply(log, "ru.xes")
     xes_exporter.apply(stream, "ru.xes")
     xes_exporter.apply(df, "ru.xes")
     os.remove('ru.xes')
Beispiel #22
0
 def test_pdimp_xesexp(self):
     log0 = pd.read_csv(os.path.join("input_data", "running-example.csv"))
     log0 = dataframe_utils.convert_timestamp_columns_in_df(log0)
     log = log_conversion.apply(log0, variant=log_conversion.TO_EVENT_LOG)
     stream = log_conversion.apply(log0, variant=log_conversion.TO_EVENT_STREAM)
     df = log_conversion.apply(log0, variant=log_conversion.TO_DATA_FRAME)
     xes_exporter.apply(log, "ru.xes")
     xes_exporter.apply(stream, "ru.xes")
     xes_exporter.apply(df, "ru.xes")
     os.remove('ru.xes')
'''
Author : Boltenhagen Mathilde
Date : June 2020

randomSequences.py : this file has been created to get 1000 mock traces 
'''

log = xes_importer.apply("<original log>")
variants = getvariants.get_variants(log)

# get activities and maximum length in log
activities = list(get_attribute_values(log,"concept:name").keys())
max_len = (len(max(project_traces(log),key=len)))

log._list=[]
for t in range(0,1000):
    new_sequence = Trace()
    # random length of the fake sequence
    size_of_sequence = random.randint(1,max_len-1)
    # random activities
    for e in range(0,size_of_sequence):
        event = Event()
        event["concept:name"]=activities[random.randint(1,len(activities))]
        new_sequence.append(event)
    log._list.append(new_sequence)

xes_exporter.apply(log,"<1000 mock traces>")


Beispiel #24
0
print("Time of TV Query: " + str((endtime_tv_query - starttime_tv_query)))
starttime_trace_matcher = datetime.datetime.now()
traceMatcher = TraceMatcher(tv_query_log, log)
matchedLog = traceMatcher.matchQueryToLog()
print(len(matchedLog))
endtime_trace_matcher = datetime.datetime.now()
print("Time of TraceMatcher: " +
      str((endtime_trace_matcher - starttime_trace_matcher)))
distributionOfAttributes = traceMatcher.getAttributeDistribution()
occurredTimestamps, occurredTimestampDifferences = traceMatcher.getTimeStampData(
)
print(min(occurredTimestamps))
starttime_attribute_anonymizer = datetime.datetime.now()
attributeAnonymizer = AttributeAnonymizer()
anonymizedLog, attributeDistribution = attributeAnonymizer.anonymize(
    matchedLog, distributionOfAttributes, epsilon,
    occurredTimestampDifferences, occurredTimestamps)
endtime_attribute_anonymizer = datetime.datetime.now()
print("Time of attribute anonymizer: " +
      str(endtime_attribute_anonymizer - starttime_attribute_anonymizer))
xes_exporter.apply(anonymizedLog, result_log_path)
endtime = datetime.datetime.now()
print("Complete Time: " + str((endtime - starttime)))
print("Time of TV Query: " + str((endtime_tv_query - starttime_tv_query)))
print("Time of TraceMatcher: " +
      str((endtime_trace_matcher - starttime_trace_matcher)))
print("Time of attribute anonymizer: " +
      str(endtime_attribute_anonymizer - starttime_attribute_anonymizer))
print(result_log_path)
print(freq(attributeDistribution))
def execute_script():
    con = example_connection.get_con()
    log = sapextractor.get_p2p_classic_event_log(con, ref_type="EKKO")
    xes_exporter.apply(log, "p2p.xes")
Beispiel #26
0
def execute_script():
    con = example_connection.get_con()
    log = sapextractor.get_ap_ar_single_doc_transactions_log(con)
    xes_exporter.apply(log, "bkpf.xes")
Beispiel #27
0
 def test_5(self):
     log = self.load_running_example_xes()
     from pm4py.objects.log.exporter.xes import exporter as xes_exporter
     path = os.path.join("test_output_data", "ru.xes")
     xes_exporter.apply(log, path)
     os.remove(path)
Beispiel #28
0
def apply_filter(req):
	sessions[req.session["id"]] = datetime.now()
	filters = {
		"time": True,
		"variants": True,
		"performance": True,
		"activities": True,
		"attribute": True
	}
	req.session.set_expiry(7200)
	#print(str(req.body))
	o = json.loads(req.body)
	print(str(o))
	custom_time_range = []
	for pair in o["filter1"]:
		#custom_time_range.append((dateutil.parser.parse(pair[0]),dateutil.parser.parse(pair[1])))
		custom_time_range.append((pair[0],pair[1]))
	if o["filter1"] == []:
		filters["time"] = False
	#print(o["filter1"][0])
	#print(custom_time_range[0][0])
	#print(custom_time_range)
	custom_path_range = []
	for pair in o["filter2"]:
		custom_path_range.append((float(pair[0]),float(pair[1])))
	if o["filter2"] == []:
		filters["variants"] = False
		#custom_path_range = [(0,1)] #filter2
	custom_performance_range = []
	for pair in o["filter3"]:
		custom_performance_range.append((float(pair[0]),float(pair[1])))
	if o["filter3"] == []:
		filters["performance"] = False
	custom_activitiy_range = []
	for pair in o["filter4"]:
		custom_activitiy_range.append((float(pair[0]),float(pair[1])))
	if o["filter4"] == []:
		filters["activities"] = False
		#custom_activitiy_range = [(0,1)] #filter3
	custom_attribute_range = []
	for pair in o["filter5"]:
		custom_attribute_range.append((float(pair[0]),float(pair[1])))
	if o["filter5"] == [] or o["filter5attribute"] == "Empty":
		filters["attribute"] = False
	additional_attribute = o["filter5attribute"]

	selected_viz = o["visualization"]
	calc_lev = o["distance"]
	#input_file = os.path.join("webapp","static", req.session["id"] + "_l0.xes")
	input_file = os.path.join("webapp","static", "sepsis.xes")
	input_log = xes_importer.apply(input_file)
	not_filtered_logs = {}
	flatten = lambda l: [item for sublist in l for item in sublist]

	time_timestamp_started = datetime.now()
	if filters["time"]:
		#TODO check overlapping for filter
		custom_time_range = sorted(custom_time_range, reverse=False)
		for i in range(0,len(custom_time_range)-1):
			if(custom_time_range[i][1] > custom_time_range[i+1][0]):
				response = HttpResponse(json.dumps({'error': "Wrong intervals for time filter"}))
				response.status_code = 200
				return response
				#raise ValueError("Overlapping time ranges")

		logs = []
		for (x,y) in custom_time_range:
			logs.append(timestamp_filter.filter_traces_contained(input_log, x, y))

		#log = timestamp_filter.filter_traces_contained(input_log, custom_time_range[0][0], custom_time_range[0][1])
		log = pm4py.objects.log.log.EventLog()
		for timeslice in logs:
			for trace in timeslice:
				log.append(trace)
		print(len(input_log))
		print(len(log))
		#l2
		not_filtered_logs["timestamp_filter"] = pm4py.objects.log.log.EventLog()
		for trace in input_log:
			if trace not in log:
				not_filtered_logs["timestamp_filter"].append(trace)
		print(len(not_filtered_logs["timestamp_filter"]))
	else:
		log = input_log

	time_variants_started = datetime.now() # where should I start?

	if filters["variants"]:
		variants = variants_filter.get_variants(log)
		variants_count = case_statistics.get_variant_statistics(log)
		variants_count = sorted(variants_count, key=lambda x: x['count'], reverse=False)

		custom_path_range = sorted(custom_path_range, reverse=False)
		# check overlapping
		for i in range(0,len(custom_path_range)-1):
			if(custom_path_range[i][1] > custom_path_range[i+1][0]):
				response = HttpResponse(json.dumps({'error': "Wrong intervals for variants filter"}))
				response.status_code = 200
				return response
				#raise ValueError("Overlapping variants ranges")

		nr_variants = len(variants_count)
		custom_path_range * nr_variants
		idx = [(math.floor(x*nr_variants), math.ceil(y*nr_variants)) for (x,y) in custom_path_range]
		variants_subset = [variants_count[x:y+1] for (x,y) in idx]
		variants_subset = flatten(variants_subset)
		filtered_variants = {k:v for k,v in variants.items() if k in [x["variant"] for x in variants_subset]}
		#l2
		not_filtered_variants = {k:v for k,v in variants.items() if k not in [x["variant"] for x in variants_subset]}

		filtered_log = variants_filter.apply(log, filtered_variants)
		#l2
		not_filtered_logs["variant_filter"] = variants_filter.apply(log, not_filtered_variants)
	else:
		filtered_log = log

	time_variants_finished = datetime.now() # note: incl log2 generation

	if filters["performance"]:
		custom_performance_range = sorted(custom_performance_range, reverse=False)
		# check overlapping
		for i in range(0,len(custom_performance_range)-1):
			if(custom_performance_range[i][1] > custom_performance_range[i+1][0]):
				response = HttpResponse(json.dumps({'error': "Wrong intervals for performance filter"}))
				response.status_code = 200
				return response
				#raise ValueError("Overlapping performance ranges")

		#all_case_durations = case_statistics.get_all_casedurations(log, parameters={case_statistics.Parameters.TIMESTAMP_KEY: "time:timestamp"})
		#case_filter.filter_case_performance(log, 86400, 864000)
		performances = []
		for i in range(len(filtered_log)):
			filtered_log[i].attributes["throughput"] = (max([event["time:timestamp"]for event in filtered_log[i]])-min([event["time:timestamp"] for event in filtered_log[i]])).total_seconds()
			performances.append(filtered_log[i].attributes["throughput"])

		nr_cases = len(filtered_log)
		performances = sorted(performances, reverse=False)
		idx = [(math.floor(x*nr_cases), math.ceil(y*nr_cases)) for (x,y) in custom_performance_range]
		perf_subset = [performances[x:y+1] for (x,y) in idx]
		perf_subset = flatten(perf_subset)

		performance_log = pm4py.objects.log.log.EventLog([trace for trace in filtered_log if trace.attributes["throughput"] in perf_subset])
		#l2
		not_filtered_logs["performance_filter"] = pm4py.objects.log.log.EventLog([trace for trace in filtered_log if trace.attributes["throughput"] not in perf_subset])
		#print(str(len(not_filtered_logs["performance_filter"])))

	else:
		performance_log = filtered_log

	time_performance_finished = datetime.now()

	if filters["activities"]:
		variants = variants_filter.get_variants(performance_log)
		variants_count = case_statistics.get_variant_statistics(performance_log)
		variants_count = sorted(variants_count, key=lambda x: x['count'], reverse=False)

		activities = dict()
		for variant in variants_count:
			for activity in variant["variant"].split(","):
				if (activity not in activities.keys()):
					activities[activity] = variant["count"]
				else:
					activities[activity] += variant["count"]

		sorted_activities = {k: v for k, v in sorted(activities.items(), key=lambda item: item[1])}
		activities_sorted_list = list(sorted_activities)
		custom_activitiy_range = sorted(custom_activitiy_range, reverse=False)
		# check overlapping
		for i in range(0,len(custom_activitiy_range)-1):
			if(custom_activitiy_range[i][1] > custom_activitiy_range[i+1][0]):
				response = HttpResponse(json.dumps({'error': "Wrong intervals for activities filter"}))
				response.status_code = 200
				return response
				#raise ValueError("Overlapping activities ranges")
		nr_activities = len(activities_sorted_list)
		idx = [(math.floor(x*nr_activities), math.ceil(y*nr_activities)) for (x,y) in custom_activitiy_range]
		activities_to_keep = [activities_sorted_list[x:y+1] for (x,y) in idx]
		activities_to_keep = flatten(activities_to_keep)
		variants_idx = []
		for i in range(len(variants_count)):
			for activity in activities_to_keep:
				if (activity in variants_count[i]["variant"].split(",") and (i not in variants_idx)):
					variants_idx.append(i)
		variants_subset = [variants_count[i] for i in variants_idx]
		filtered_variants = {k:v for k,v in variants.items() if k in [x["variant"] for x in variants_subset]}
		#l2
		not_filtered_variants = {k:v for k,v in variants.items() if k not in [x["variant"] for x in variants_subset]}

		filtered_log = variants_filter.apply(performance_log, filtered_variants)

		#l2
		not_filtered_logs["activities_filter"] = variants_filter.apply(performance_log, not_filtered_variants)

		new_log = pm4py.objects.log.log.EventLog()
		#not_filtered_logs["activities_filter_traces"] = pm4py.objects.log.log.EventLog()
		for trace in filtered_log:
			new_trace = pm4py.objects.log.log.Trace()
			not_new_trace = pm4py.objects.log.log.Trace()
			for event in trace:
				if(event['concept:name'] in activities_to_keep):
					new_trace.append(event)
				else:
					not_new_trace.append(event)
			if(len(new_trace)>0):
				new_log.append(new_trace)
			if(len(not_new_trace)>0):
				not_filtered_logs["activities_filter"].append(not_new_trace)
	else:
		new_log = performance_log

	time_activities_finished = datetime.now()

	if filters["attribute"]:
		custom_attribute_range = sorted(custom_attribute_range, reverse=False)
		# check overlapping
		for i in range(0,len(custom_attribute_range)-1):
			if(custom_attribute_range[i][1] > custom_attribute_range[i+1][0]):
				response = HttpResponse(json.dumps({'error': "Wrong intervals for additional attribute filter"}))
				response.status_code = 200
				return response

		newest_log = pm4py.objects.log.log.EventLog()
		not_filtered_logs["additional_filter"] = pm4py.objects.log.log.EventLog()

		traces_with_attr = []
		not_traces_with_attr = []
		for trace in new_log:
			if additional_attribute in trace.attributes.keys():
				traces_with_attr.append(trace)
			else:
				not_traces_with_attr.append(trace)
		#check if trace attribute
		if len(traces_with_attr)>0:
			#check if numeric
			if type(traces_with_attr[0].attributes[additional_attribute]) in [int, float]:
				for trace in traces_with_attr:
					if any([trace.attributes[additional_attribute] >= x and trace.attributes[additional_attribute] <= y for (x,y) in custom_attribute_range]):
						newest_log.append(trace)
					else:
						not_filtered_logs["additional_filter"].append(trace)
				for trace in not_traces_with_attr:
					not_filtered_logs["additional_filter"].append(trace)
			else: #string
				attribute_frequencies = dict()
				for trace in traces_with_attr:
					if trace.attributes[additional_attribute] not in attribute_frequencies.keys():
						attribute_frequencies[trace.attributes[additional_attribute]] = 0
					attribute_frequencies[trace.attributes[additional_attribute]] += 1

				sorted_frequencies = {k: v for k, v in sorted(attribute_frequencies.items(), key=lambda item: item[1])}
				frequencies_sorted_list = list(sorted_frequencies)

				nr_values = len(frequencies_sorted_list)
				idx = [(math.floor(x*nr_values), math.ceil(y*nr_values)) for (x,y) in custom_attribute_range]
				values_to_keep = [frequencies_sorted_list[x:y+1] for (x,y) in idx]
				values_to_keep = flatten(values_to_keep)

				for trace in traces_with_attr:
					if trace.attributes[additional_attribute] in values_to_keep:
						newest_log.append(trace)
					else:
						not_filtered_logs["additional_filter"].append(trace)
				for trace in not_traces_with_attr:
					not_filtered_logs["additional_filter"].append(trace)

		else: #event attribute
			if [type(event[additional_attribute]) for trace in new_log for event in trace if additional_attribute in event.keys()][0] in [int, float]:
				for trace in new_log:
					new_trace = pm4py.objects.log.log.Trace()
					not_new_trace = pm4py.objects.log.log.Trace()
					for event in trace:
						if(additional_attribute in event.keys() and any([event[additional_attribute] >= x and event[additional_attribute] <= y for (x,y) in custom_attribute_range ])):
							new_trace.append(event)
						else:
							not_new_trace.append(event)
					if(len(new_trace)>0):
						newest_log.append(new_trace)
					if(len(not_new_trace)>0):
						not_filtered_logs["additional_filter"].append(not_new_trace)
			else: #string
				attribute_frequencies = dict()
				for trace in new_log:
					for event in trace:
						if additional_attribute in event.keys():
							if event[additional_attribute] not in attribute_frequencies.keys():
								attribute_frequencies[event[additional_attribute]] = 0
							attribute_frequencies[event[additional_attribute]] += 1

				sorted_frequencies = {k: v for k, v in sorted(attribute_frequencies.items(), key=lambda item: item[1])}
				frequencies_sorted_list = list(sorted_frequencies)

				nr_values = len(frequencies_sorted_list)
				idx = [(math.floor(x*nr_values), math.ceil(y*nr_values)) for (x,y) in custom_attribute_range]
				values_to_keep = [frequencies_sorted_list[x:y+1] for (x,y) in idx]
				values_to_keep = flatten(values_to_keep)

				for trace in new_log:
					new_trace = pm4py.objects.log.log.Trace()
					not_new_trace = pm4py.objects.log.log.Trace()
					for event in trace:
						if(additional_attribute in event.keys() and event[additional_attribute] in values_to_keep):
							new_trace.append(event)
						else:
							not_new_trace.append(event)
					if(len(new_trace)>0):
						newest_log.append(new_trace)
					if(len(not_new_trace)>0):
						not_filtered_logs["additional_filter"].append(not_new_trace)


	else:
		newest_log = new_log

	time_attribute_finished = datetime.now()

	if(selected_viz=="dfgf"):
		dfg = dfg_discovery.apply(newest_log)
		gviz = dfg_visualization.apply(dfg, log=newest_log, variant=dfg_visualization.Variants.FREQUENCY)
		dfg_visualization.save(gviz, os.path.join("webapp","static", req.session["id"] + "_l1.png"))
	elif(selected_viz=="dfgp"):
		dfg = dfg_discovery.apply(newest_log)
		gviz = dfg_visualization.apply(dfg, log=newest_log, variant=dfg_visualization.Variants.PERFORMANCE)
		dfg_visualization.save(gviz, os.path.join("webapp","static", req.session["id"] + "_l1.png"))
	else:
		heu_net = heuristics_miner.apply_heu(newest_log, parameters={"dependency_thresh": 0.99})
		gviz = hn_vis_factory.apply(heu_net)
		hn_vis_factory.save(gviz, os.path.join("webapp","static", req.session["id"] + "_l1.png"))

	xes_exporter.apply(newest_log, os.path.join("webapp","static", req.session["id"] + "_l1.xes"))


	#l2
	not_filtered_log = pm4py.objects.log.log.EventLog()
	for part in not_filtered_logs.keys():
		for trace in not_filtered_logs[part]:
			not_filtered_log.append(trace)

	if(selected_viz=="dfgf"):
		dfg = dfg_discovery.apply(not_filtered_log)
		gviz = dfg_visualization.apply(dfg, log=not_filtered_log, variant=dfg_visualization.Variants.FREQUENCY)
		dfg_visualization.save(gviz, os.path.join("webapp","static", req.session["id"] + "_l2.png"))
	elif(selected_viz=="dfgp"):
		dfg = dfg_discovery.apply(not_filtered_log)
		gviz = dfg_visualization.apply(dfg, log=not_filtered_log, variant=dfg_visualization.Variants.PERFORMANCE)
		dfg_visualization.save(gviz, os.path.join("webapp","static", req.session["id"] + "_l2.png"))
	else:
		heu_net = heuristics_miner.apply_heu(not_filtered_log, parameters={"dependency_thresh": 0.99})
		gviz = hn_vis_factory.apply(heu_net)
		hn_vis_factory.save(gviz, os.path.join("webapp","static", req.session["id"] + "_l2.png"))
	xes_exporter.apply(not_filtered_log, os.path.join("webapp","static", req.session["id"] + "_l2.xes"))

	if(calc_lev):
		lev_new = [0]*len(newest_log)
		for i in range(len(newest_log)):
			lev_new[i] = [hash(event['concept:name']) for event in newest_log[i]]

		lev_not = [0]*len(not_filtered_log)
		for i in range(len(not_filtered_log)):
			lev_not[i] = [hash(event['concept:name']) for event in not_filtered_log[i]]

		distances = []
		for i in range(len(lev_new)):
			for j in range(len(lev_not)):
				distances.append(lev_dist(lev_new[i], lev_not[j]))
		lev_d = sum(distances)/len(distances)
		print("Levenshtein's distance: "+str(lev_d))
	else:
		lev_d = "null"

	used_paths = 0
	for lower, higher in custom_path_range:
		used_paths += round((higher-lower)*100)
	print(f"Using {used_paths}% of paths. {100-used_paths}% of paths are discarded.")

	print("Timestamp filter: {} seconds. \nVariants filter: {} seconds. \nPerformance filter: {} seconds. \nActivities filter: {} seconds. \nAttribute filter: {} seconds.".format((time_variants_started - time_timestamp_started).total_seconds(), (time_variants_finished - time_variants_started).total_seconds(), (time_performance_finished - time_variants_finished).total_seconds(), (time_activities_finished - time_performance_finished).total_seconds(), (time_attribute_finished - time_activities_finished).total_seconds()))
	response = HttpResponse(json.dumps({'time':(time_variants_started - time_timestamp_started).total_seconds(), 'variants':(time_variants_finished - time_variants_started).total_seconds(),'performance':(time_performance_finished - time_variants_finished).total_seconds(), 'activities':(time_activities_finished - time_performance_finished).total_seconds(), 'attribute':(time_attribute_finished - time_activities_finished).total_seconds(), 'traces':[len(newest_log), len(not_filtered_log)], 'distance':lev_d}))
	response.status_code = 200
	return response
import os
from tqdm import tqdm
from meta_feature_extraction import sort_files
from pm4py.objects.log.importer.xes import importer as xes_importer
from pm4py.objects.log.exporter.xes import exporter as xes_exporter

print("Converting log files")
event_logs_path = "event_logs"
for f in tqdm(sort_files(os.listdir(event_logs_path))):
    log = xes_importer.apply(f"{event_logs_path}/{f}",
                             parameters={"show_progress_bar": False})
    f_name = f.split(".gz")[0]
    xes_exporter.apply(log,
                       f"{event_logs_path}/{f_name}",
                       parameters={"show_progress_bar": False})
    os.remove(f"{event_logs_path}/{f}")
Beispiel #30
0
 def format(self, log, outFileName):
     xes_exporter.apply(log, outFileName)