Example #1
0
def save_vis_performance_dfg(dfg: dict,
                             start_activities: dict,
                             end_activities: dict,
                             file_path: str,
                             aggregation_measure="mean"):
    """
    Saves the visualization of a performance DFG

    Parameters
    ----------------
    dfg
        DFG object
    start_activities
        Start activities
    end_activities
        End activities
    file_path
        Destination path
    aggregation_measure
        Aggregation measure (default: mean): mean, median, min, max, sum, stdev
    """
    format = os.path.splitext(file_path)[1][1:]
    from pm4py.visualization.dfg import visualizer as dfg_visualizer
    from pm4py.visualization.dfg.variants import performance as dfg_perf_visualizer
    dfg_parameters = dfg_perf_visualizer.Parameters
    parameters = {}
    parameters[dfg_parameters.FORMAT] = format
    parameters[dfg_parameters.START_ACTIVITIES] = start_activities
    parameters[dfg_parameters.END_ACTIVITIES] = end_activities
    parameters[dfg_parameters.AGGREGATION_MEASURE] = aggregation_measure
    gviz = dfg_perf_visualizer.apply(dfg, parameters=parameters)
    dfg_visualizer.save(gviz, file_path)
Example #2
0
def execute_script():
    log_input_directory = "xesinput"
    all_logs_names = os.listdir(log_input_directory)
    all_logs_names = [log for log in all_logs_names if ".xe" in log]

    for logName in all_logs_names:
        # logPath = os.path.join("..", "tests", "inputData", logName)
        log_path = log_input_directory + "\\" + logName
        log = xes_importer.apply(log_path)
        print("\n\n")
        print("log loaded")
        print("Number of traces - ", len(log))
        event_log = log_conversion.apply(
            log, variant=log_conversion.TO_EVENT_STREAM)
        print("Number of events - ", len(event_log))
        print("Classifiers ", log.classifiers)
        exp_log_name = "xescert_exportlogs" + "\\" + "exp_" + logName
        print("exporting log", exp_log_name)
        xes_exporter.apply(log, exp_log_name)
        print("exported log", exp_log_name)

        log, classifier_attr_key = insert_classifier.search_act_class_attr(log)

        classifiers = list(log.classifiers.keys())
        if classifier_attr_key is None and classifiers:
            try:
                print(classifiers)
                log, classifier_attr_key = insert_classifier.insert_activity_classifier_attribute(
                    log, classifiers[0])
                print(classifier_attr_key)
            except:
                print("exception in handling classifier")

        if classifier_attr_key is None:
            classifier_attr_key = "concept:name"

        if len(event_log) > 0 and classifier_attr_key in event_log[0]:
            parameters = {
                constants.PARAMETER_CONSTANT_ACTIVITY_KEY: classifier_attr_key
            }

            dfg = dfg_algorithm.apply(log, parameters=parameters)
            gviz = dfg_vis.apply(dfg,
                                 log=log,
                                 variant="frequency",
                                 parameters=parameters)
            # dfg_vis.view(gviz)

            dfg_vis.save(gviz,
                         "xescert_images\\" + logName.replace("xes", "png"))

        print("Reimporting log file just exported - ", exp_log_name)

        log = xes_importer.apply(exp_log_name)
        print("log loaded", exp_log_name)
        print("Number of traces - ", len(log))
        event_log = log_conversion.apply(
            log, variant=log_conversion.TO_EVENT_STREAM)
        print("Number of events - ", len(event_log))
        print("Classifiers ", log.classifiers)
Example #3
0
def save_vis_dfg(dfg, start_activities, end_activities, file_path, log=None):
    """
    Saves a DFG visualization to a file

    Parameters
    --------------
    dfg
        DFG object
    start_activities
        Start activities
    end_activities
        End activities
    file_path
        Destination path
    """
    format = file_path[file_path.index(".") + 1:].lower()
    from pm4py.visualization.dfg import visualizer as dfg_visualizer
    parameters = dfg_visualizer.Variants.FREQUENCY.value.Parameters
    gviz = dfg_visualizer.apply(dfg,
                                log=log,
                                variant=dfg_visualizer.Variants.FREQUENCY,
                                parameters={
                                    parameters.FORMAT: format,
                                    parameters.START_ACTIVITIES:
                                    start_activities,
                                    parameters.END_ACTIVITIES: end_activities
                                })
    dfg_visualizer.save(gviz, file_path)
Example #4
0
def save_directly_follows_graph(graph, path):
    """
    Saves a directly-follows graph to the specified path.
    :param graph: the directly-follows graph
    :param path: the path
    """
    log.info('saving directly follows graph %s to path %s', graph, path)
    dfg_vis.save(graph, path)
Example #5
0
    def test_45(self):
        import os
        from pm4py.objects.log.importer.xes import importer as xes_importer
        log = xes_importer.apply(os.path.join("input_data", "running-example.xes"))

        from pm4py.algo.discovery.dfg import algorithm as dfg_discovery
        from pm4py.visualization.dfg import visualizer as dfg_visualization

        dfg = dfg_discovery.apply(log, variant=dfg_discovery.Variants.PERFORMANCE)
        parameters = {dfg_visualization.Variants.PERFORMANCE.value.Parameters.FORMAT: "svg"}
        gviz = dfg_visualization.apply(dfg, log=log, variant=dfg_visualization.Variants.PERFORMANCE,
                                       parameters=parameters)

        dfg_visualization.save(gviz, os.path.join("test_output_data", "dfg.svg"))
        os.remove(os.path.join("test_output_data", "dfg.svg"))
Example #6
0
def save_full_dfg(log):
    dfg = dfg_discovery.apply(log)

    gviz = dfg_visualization.apply(
        dfg, log=log, variant=dfg_visualization.Variants.FREQUENCY)
    dfg_visualization.view(gviz)
    parameters = {
        dfg_visualization.Variants.PERFORMANCE.value.Parameters.FORMAT: "svg"
    }
    gviz = dfg_visualization.apply(
        dfg,
        log=log,
        variant=dfg_visualization.Variants.FREQUENCY,
        parameters=parameters)
    dfg_visualization.save(gviz, "dfg_full.svg")
    print('Full DFG saves as "dfg_full.svg"')
    return gviz
Example #7
0
def save_graph_file(type, gviz, path):
    if type == "alpha":
        pn_visualizer.save(gviz, path)
    elif type == "heuristic-heu-net":
        hn_visualizer.save(gviz, path)
    elif type == "heuristic-pet-net":
        pn_visualizer.save(gviz, path)
    elif type == "dfg-discovery-frequency":
        dfg_visualization.save(gviz, path)
    elif type == "dfg-discovery-active-time":
        dfg_visualization.save(gviz, path)
    elif type == "dfg-discovery-pet-net":
        pt_visualizer.save(gviz, path)
    elif type == "inductive-miner-tree":
        pt_visualizer.save(gviz, path)
    elif type == "inductive-miner-petri":
        pn_visualizer.save(gviz, path)
Example #8
0
def create_directly_follows_graph(frame: DataFrame, output_format='svg'):
    """
    Creates a Directly Follows Graph from the supplied DataFrame.
    :param frame: the DataFrame
    :param output_format: desired output format
    :return: object representing the created graph
    """
    event_log = _convert_data_frame_to_event_log(frame)
    dfg = dfg_alg.apply(log=event_log, variant=DfgAlgVariants.FREQUENCY)
    apply = dfg_vis.apply(
        dfg,
        log=event_log,
        variant=DfgVisVariants.FREQUENCY,
        parameters={VisualisationParams.FORMAT: output_format})
    saved_dfg = tempfile.NamedTemporaryFile(prefix='pm_',
                                            suffix=f'.{output_format}',
                                            delete=False)
    dfg_vis.save(apply, saved_dfg.name)
    # close here and delete after final use to work around access issues on
    # in case anybody tries to run this on windows
    saved_dfg.close()
    return saved_dfg
Example #9
0
def save_vis_dfg(dfg: dict,
                 start_activities: dict,
                 end_activities: dict,
                 file_path: str,
                 log: Optional[EventLog] = None):
    """
    Saves a DFG visualization to a file

    Parameters
    --------------
    dfg
        DFG object
    start_activities
        Start activities
    end_activities
        End activities
    file_path
        Destination path
    """
    if log is not None:
        if type(log) not in [pd.DataFrame, EventLog, EventStream]:
            raise Exception(
                "the method can be applied only to a traditional event log!")

    format = os.path.splitext(file_path)[1][1:]
    from pm4py.visualization.dfg import visualizer as dfg_visualizer
    dfg_parameters = dfg_visualizer.Variants.FREQUENCY.value.Parameters
    parameters = get_properties(log)
    parameters[dfg_parameters.FORMAT] = format
    parameters[dfg_parameters.START_ACTIVITIES] = start_activities
    parameters[dfg_parameters.END_ACTIVITIES] = end_activities
    gviz = dfg_visualizer.apply(dfg,
                                log=log,
                                variant=dfg_visualizer.Variants.FREQUENCY,
                                parameters=parameters)
    dfg_visualizer.save(gviz, file_path)
Example #10
0
def api_gerar_modelo_pm():
    ramojustica = request.args.get('ramojustica')
    codtribunal = request.args.get('codtribunal')
    atuacao = request.args.get('atuacao')
    cluster = request.args.get('cluster')
    grau = request.args.get('grau')
    codorgaoj = request.args.get('codorgaoj')
    natureza = request.args.get('natureza')
    codclasse = request.args.get('codclasse')
    dtinicio = request.args.get('dtinicio')
    dtfim = request.args.get('dtfim')
    baixado = request.args.get('baixado')
    sensibilidade = request.args.get('sensibilidade')
    metrica = request.args.get('metrica')
    formato = request.args.get('formato')
    
    if ramojustica is None:
        abort(400, description="ramojustica nao informado")
    if atuacao is None:
        abort(400, description="atuacao nao informado")
    if codtribunal is None and cluster is None:
        abort(400, description="codtribunal ou cluster deve ser informado")
    
    gviz = gerar_view_dfg_model_from_params(ramojustica, codtribunal, atuacao, cluster, grau, codorgaoj, natureza, codclasse, \
               dtinicio, dtfim, baixado=baixado, sensibility=sensibilidade, metric_type=metrica, image_format=formato)
    if gviz != None:
        file_remover = FileRemover()
        tempdir = tempfile.mkdtemp()
        path = tempdir + "/model_mp." + str(formato).lower()
        dfg_visualization.save(gviz, path)
        resp = send_file(path, as_attachment=False)
        file_remover.cleanup_once_done(resp, path)
        return resp
    else:
        print("sem dados")
        abort(404, description="Nao encontrado")
Example #11
0
def filter_for_periods(detect_result, event_counts):
    start_element1 = 0 if CHOSEN_PERIOD1 == 1 else detect_result[CHOSEN_PERIOD1
                                                                 - 2]
    end_element1 = detect_result[CHOSEN_PERIOD1 - 1]

    start_element2 = 0 if CHOSEN_PERIOD2 == 1 else detect_result[CHOSEN_PERIOD2
                                                                 - 2]
    end_element2 = detect_result[CHOSEN_PERIOD2 - 1]

    days = list(event_counts.keys())
    #print(days[start_element1])
    start_day1 = days[start_element1]
    end_day1 = days[end_element1 - 1]
    days_count1 = end_element1 - start_element1

    start_day2 = days[start_element2]
    end_day2 = days[end_element2 - 1]
    days_count2 = end_element2 - start_element2

    # Traces that are FULLY CONTAINED in the given timeframe
    period_1_log = timestamp_filter.filter_traces_contained(
        log, start_day1 + " 00:00:00", end_day1 + " 23:59:59")
    period_2_log = timestamp_filter.filter_traces_contained(
        log, start_day2 + " 00:00:00", end_day2 + " 23:59:59")

    # Traces that INTERSECT with the given timeframe
    # period_1_log = timestamp_filter.filter_traces_intersecting(log, start_day+" 00:00:00", end_day+" 23:59:59")

    dfg1 = dfg_discovery.apply(period_1_log)
    dfg2 = dfg_discovery.apply(period_2_log)

    gviz1 = dfg_visualization.apply(
        dfg1, log=period_1_log, variant=dfg_visualization.Variants.FREQUENCY)
    dfg_visualization.view(gviz1)

    # Saving the DFG
    parameters = {
        dfg_visualization.Variants.PERFORMANCE.value.Parameters.FORMAT: "svg"
    }
    gviz1 = dfg_visualization.apply(
        dfg1,
        log=period_1_log,
        variant=dfg_visualization.Variants.FREQUENCY,
        parameters=parameters)
    dfg_visualization.save(gviz1, "dfg1.svg")

    nodes_period1, edges_period1 = dot_to_df(gviz1)

    gviz2 = dfg_visualization.apply(
        dfg2, log=period_2_log, variant=dfg_visualization.Variants.FREQUENCY)
    dfg_visualization.view(gviz2)

    # Saving the DFG
    parameters = {
        dfg_visualization.Variants.PERFORMANCE.value.Parameters.FORMAT: "svg"
    }
    gviz2 = dfg_visualization.apply(
        dfg2,
        log=period_2_log,
        variant=dfg_visualization.Variants.FREQUENCY,
        parameters=parameters)
    dfg_visualization.save(gviz2, "dfg2.svg")

    return days_count1, days_count2, period_1_log, period_2_log, gviz1, gviz2
Example #12
0
def apply_filter(req):
	sessions[req.session["id"]] = datetime.now()
	filters = {
		"time": True,
		"variants": True,
		"performance": True,
		"activities": True,
		"attribute": True
	}
	req.session.set_expiry(7200)
	#print(str(req.body))
	o = json.loads(req.body)
	print(str(o))
	custom_time_range = []
	for pair in o["filter1"]:
		#custom_time_range.append((dateutil.parser.parse(pair[0]),dateutil.parser.parse(pair[1])))
		custom_time_range.append((pair[0],pair[1]))
	if o["filter1"] == []:
		filters["time"] = False
	#print(o["filter1"][0])
	#print(custom_time_range[0][0])
	#print(custom_time_range)
	custom_path_range = []
	for pair in o["filter2"]:
		custom_path_range.append((float(pair[0]),float(pair[1])))
	if o["filter2"] == []:
		filters["variants"] = False
		#custom_path_range = [(0,1)] #filter2
	custom_performance_range = []
	for pair in o["filter3"]:
		custom_performance_range.append((float(pair[0]),float(pair[1])))
	if o["filter3"] == []:
		filters["performance"] = False
	custom_activitiy_range = []
	for pair in o["filter4"]:
		custom_activitiy_range.append((float(pair[0]),float(pair[1])))
	if o["filter4"] == []:
		filters["activities"] = False
		#custom_activitiy_range = [(0,1)] #filter3
	custom_attribute_range = []
	for pair in o["filter5"]:
		custom_attribute_range.append((float(pair[0]),float(pair[1])))
	if o["filter5"] == [] or o["filter5attribute"] == "Empty":
		filters["attribute"] = False
	additional_attribute = o["filter5attribute"]

	selected_viz = o["visualization"]
	calc_lev = o["distance"]
	#input_file = os.path.join("webapp","static", req.session["id"] + "_l0.xes")
	input_file = os.path.join("webapp","static", "sepsis.xes")
	input_log = xes_importer.apply(input_file)
	not_filtered_logs = {}
	flatten = lambda l: [item for sublist in l for item in sublist]

	time_timestamp_started = datetime.now()
	if filters["time"]:
		#TODO check overlapping for filter
		custom_time_range = sorted(custom_time_range, reverse=False)
		for i in range(0,len(custom_time_range)-1):
			if(custom_time_range[i][1] > custom_time_range[i+1][0]):
				response = HttpResponse(json.dumps({'error': "Wrong intervals for time filter"}))
				response.status_code = 200
				return response
				#raise ValueError("Overlapping time ranges")

		logs = []
		for (x,y) in custom_time_range:
			logs.append(timestamp_filter.filter_traces_contained(input_log, x, y))

		#log = timestamp_filter.filter_traces_contained(input_log, custom_time_range[0][0], custom_time_range[0][1])
		log = pm4py.objects.log.log.EventLog()
		for timeslice in logs:
			for trace in timeslice:
				log.append(trace)
		print(len(input_log))
		print(len(log))
		#l2
		not_filtered_logs["timestamp_filter"] = pm4py.objects.log.log.EventLog()
		for trace in input_log:
			if trace not in log:
				not_filtered_logs["timestamp_filter"].append(trace)
		print(len(not_filtered_logs["timestamp_filter"]))
	else:
		log = input_log

	time_variants_started = datetime.now() # where should I start?

	if filters["variants"]:
		variants = variants_filter.get_variants(log)
		variants_count = case_statistics.get_variant_statistics(log)
		variants_count = sorted(variants_count, key=lambda x: x['count'], reverse=False)

		custom_path_range = sorted(custom_path_range, reverse=False)
		# check overlapping
		for i in range(0,len(custom_path_range)-1):
			if(custom_path_range[i][1] > custom_path_range[i+1][0]):
				response = HttpResponse(json.dumps({'error': "Wrong intervals for variants filter"}))
				response.status_code = 200
				return response
				#raise ValueError("Overlapping variants ranges")

		nr_variants = len(variants_count)
		custom_path_range * nr_variants
		idx = [(math.floor(x*nr_variants), math.ceil(y*nr_variants)) for (x,y) in custom_path_range]
		variants_subset = [variants_count[x:y+1] for (x,y) in idx]
		variants_subset = flatten(variants_subset)
		filtered_variants = {k:v for k,v in variants.items() if k in [x["variant"] for x in variants_subset]}
		#l2
		not_filtered_variants = {k:v for k,v in variants.items() if k not in [x["variant"] for x in variants_subset]}

		filtered_log = variants_filter.apply(log, filtered_variants)
		#l2
		not_filtered_logs["variant_filter"] = variants_filter.apply(log, not_filtered_variants)
	else:
		filtered_log = log

	time_variants_finished = datetime.now() # note: incl log2 generation

	if filters["performance"]:
		custom_performance_range = sorted(custom_performance_range, reverse=False)
		# check overlapping
		for i in range(0,len(custom_performance_range)-1):
			if(custom_performance_range[i][1] > custom_performance_range[i+1][0]):
				response = HttpResponse(json.dumps({'error': "Wrong intervals for performance filter"}))
				response.status_code = 200
				return response
				#raise ValueError("Overlapping performance ranges")

		#all_case_durations = case_statistics.get_all_casedurations(log, parameters={case_statistics.Parameters.TIMESTAMP_KEY: "time:timestamp"})
		#case_filter.filter_case_performance(log, 86400, 864000)
		performances = []
		for i in range(len(filtered_log)):
			filtered_log[i].attributes["throughput"] = (max([event["time:timestamp"]for event in filtered_log[i]])-min([event["time:timestamp"] for event in filtered_log[i]])).total_seconds()
			performances.append(filtered_log[i].attributes["throughput"])

		nr_cases = len(filtered_log)
		performances = sorted(performances, reverse=False)
		idx = [(math.floor(x*nr_cases), math.ceil(y*nr_cases)) for (x,y) in custom_performance_range]
		perf_subset = [performances[x:y+1] for (x,y) in idx]
		perf_subset = flatten(perf_subset)

		performance_log = pm4py.objects.log.log.EventLog([trace for trace in filtered_log if trace.attributes["throughput"] in perf_subset])
		#l2
		not_filtered_logs["performance_filter"] = pm4py.objects.log.log.EventLog([trace for trace in filtered_log if trace.attributes["throughput"] not in perf_subset])
		#print(str(len(not_filtered_logs["performance_filter"])))

	else:
		performance_log = filtered_log

	time_performance_finished = datetime.now()

	if filters["activities"]:
		variants = variants_filter.get_variants(performance_log)
		variants_count = case_statistics.get_variant_statistics(performance_log)
		variants_count = sorted(variants_count, key=lambda x: x['count'], reverse=False)

		activities = dict()
		for variant in variants_count:
			for activity in variant["variant"].split(","):
				if (activity not in activities.keys()):
					activities[activity] = variant["count"]
				else:
					activities[activity] += variant["count"]

		sorted_activities = {k: v for k, v in sorted(activities.items(), key=lambda item: item[1])}
		activities_sorted_list = list(sorted_activities)
		custom_activitiy_range = sorted(custom_activitiy_range, reverse=False)
		# check overlapping
		for i in range(0,len(custom_activitiy_range)-1):
			if(custom_activitiy_range[i][1] > custom_activitiy_range[i+1][0]):
				response = HttpResponse(json.dumps({'error': "Wrong intervals for activities filter"}))
				response.status_code = 200
				return response
				#raise ValueError("Overlapping activities ranges")
		nr_activities = len(activities_sorted_list)
		idx = [(math.floor(x*nr_activities), math.ceil(y*nr_activities)) for (x,y) in custom_activitiy_range]
		activities_to_keep = [activities_sorted_list[x:y+1] for (x,y) in idx]
		activities_to_keep = flatten(activities_to_keep)
		variants_idx = []
		for i in range(len(variants_count)):
			for activity in activities_to_keep:
				if (activity in variants_count[i]["variant"].split(",") and (i not in variants_idx)):
					variants_idx.append(i)
		variants_subset = [variants_count[i] for i in variants_idx]
		filtered_variants = {k:v for k,v in variants.items() if k in [x["variant"] for x in variants_subset]}
		#l2
		not_filtered_variants = {k:v for k,v in variants.items() if k not in [x["variant"] for x in variants_subset]}

		filtered_log = variants_filter.apply(performance_log, filtered_variants)

		#l2
		not_filtered_logs["activities_filter"] = variants_filter.apply(performance_log, not_filtered_variants)

		new_log = pm4py.objects.log.log.EventLog()
		#not_filtered_logs["activities_filter_traces"] = pm4py.objects.log.log.EventLog()
		for trace in filtered_log:
			new_trace = pm4py.objects.log.log.Trace()
			not_new_trace = pm4py.objects.log.log.Trace()
			for event in trace:
				if(event['concept:name'] in activities_to_keep):
					new_trace.append(event)
				else:
					not_new_trace.append(event)
			if(len(new_trace)>0):
				new_log.append(new_trace)
			if(len(not_new_trace)>0):
				not_filtered_logs["activities_filter"].append(not_new_trace)
	else:
		new_log = performance_log

	time_activities_finished = datetime.now()

	if filters["attribute"]:
		custom_attribute_range = sorted(custom_attribute_range, reverse=False)
		# check overlapping
		for i in range(0,len(custom_attribute_range)-1):
			if(custom_attribute_range[i][1] > custom_attribute_range[i+1][0]):
				response = HttpResponse(json.dumps({'error': "Wrong intervals for additional attribute filter"}))
				response.status_code = 200
				return response

		newest_log = pm4py.objects.log.log.EventLog()
		not_filtered_logs["additional_filter"] = pm4py.objects.log.log.EventLog()

		traces_with_attr = []
		not_traces_with_attr = []
		for trace in new_log:
			if additional_attribute in trace.attributes.keys():
				traces_with_attr.append(trace)
			else:
				not_traces_with_attr.append(trace)
		#check if trace attribute
		if len(traces_with_attr)>0:
			#check if numeric
			if type(traces_with_attr[0].attributes[additional_attribute]) in [int, float]:
				for trace in traces_with_attr:
					if any([trace.attributes[additional_attribute] >= x and trace.attributes[additional_attribute] <= y for (x,y) in custom_attribute_range]):
						newest_log.append(trace)
					else:
						not_filtered_logs["additional_filter"].append(trace)
				for trace in not_traces_with_attr:
					not_filtered_logs["additional_filter"].append(trace)
			else: #string
				attribute_frequencies = dict()
				for trace in traces_with_attr:
					if trace.attributes[additional_attribute] not in attribute_frequencies.keys():
						attribute_frequencies[trace.attributes[additional_attribute]] = 0
					attribute_frequencies[trace.attributes[additional_attribute]] += 1

				sorted_frequencies = {k: v for k, v in sorted(attribute_frequencies.items(), key=lambda item: item[1])}
				frequencies_sorted_list = list(sorted_frequencies)

				nr_values = len(frequencies_sorted_list)
				idx = [(math.floor(x*nr_values), math.ceil(y*nr_values)) for (x,y) in custom_attribute_range]
				values_to_keep = [frequencies_sorted_list[x:y+1] for (x,y) in idx]
				values_to_keep = flatten(values_to_keep)

				for trace in traces_with_attr:
					if trace.attributes[additional_attribute] in values_to_keep:
						newest_log.append(trace)
					else:
						not_filtered_logs["additional_filter"].append(trace)
				for trace in not_traces_with_attr:
					not_filtered_logs["additional_filter"].append(trace)

		else: #event attribute
			if [type(event[additional_attribute]) for trace in new_log for event in trace if additional_attribute in event.keys()][0] in [int, float]:
				for trace in new_log:
					new_trace = pm4py.objects.log.log.Trace()
					not_new_trace = pm4py.objects.log.log.Trace()
					for event in trace:
						if(additional_attribute in event.keys() and any([event[additional_attribute] >= x and event[additional_attribute] <= y for (x,y) in custom_attribute_range ])):
							new_trace.append(event)
						else:
							not_new_trace.append(event)
					if(len(new_trace)>0):
						newest_log.append(new_trace)
					if(len(not_new_trace)>0):
						not_filtered_logs["additional_filter"].append(not_new_trace)
			else: #string
				attribute_frequencies = dict()
				for trace in new_log:
					for event in trace:
						if additional_attribute in event.keys():
							if event[additional_attribute] not in attribute_frequencies.keys():
								attribute_frequencies[event[additional_attribute]] = 0
							attribute_frequencies[event[additional_attribute]] += 1

				sorted_frequencies = {k: v for k, v in sorted(attribute_frequencies.items(), key=lambda item: item[1])}
				frequencies_sorted_list = list(sorted_frequencies)

				nr_values = len(frequencies_sorted_list)
				idx = [(math.floor(x*nr_values), math.ceil(y*nr_values)) for (x,y) in custom_attribute_range]
				values_to_keep = [frequencies_sorted_list[x:y+1] for (x,y) in idx]
				values_to_keep = flatten(values_to_keep)

				for trace in new_log:
					new_trace = pm4py.objects.log.log.Trace()
					not_new_trace = pm4py.objects.log.log.Trace()
					for event in trace:
						if(additional_attribute in event.keys() and event[additional_attribute] in values_to_keep):
							new_trace.append(event)
						else:
							not_new_trace.append(event)
					if(len(new_trace)>0):
						newest_log.append(new_trace)
					if(len(not_new_trace)>0):
						not_filtered_logs["additional_filter"].append(not_new_trace)


	else:
		newest_log = new_log

	time_attribute_finished = datetime.now()

	if(selected_viz=="dfgf"):
		dfg = dfg_discovery.apply(newest_log)
		gviz = dfg_visualization.apply(dfg, log=newest_log, variant=dfg_visualization.Variants.FREQUENCY)
		dfg_visualization.save(gviz, os.path.join("webapp","static", req.session["id"] + "_l1.png"))
	elif(selected_viz=="dfgp"):
		dfg = dfg_discovery.apply(newest_log)
		gviz = dfg_visualization.apply(dfg, log=newest_log, variant=dfg_visualization.Variants.PERFORMANCE)
		dfg_visualization.save(gviz, os.path.join("webapp","static", req.session["id"] + "_l1.png"))
	else:
		heu_net = heuristics_miner.apply_heu(newest_log, parameters={"dependency_thresh": 0.99})
		gviz = hn_vis_factory.apply(heu_net)
		hn_vis_factory.save(gviz, os.path.join("webapp","static", req.session["id"] + "_l1.png"))

	xes_exporter.apply(newest_log, os.path.join("webapp","static", req.session["id"] + "_l1.xes"))


	#l2
	not_filtered_log = pm4py.objects.log.log.EventLog()
	for part in not_filtered_logs.keys():
		for trace in not_filtered_logs[part]:
			not_filtered_log.append(trace)

	if(selected_viz=="dfgf"):
		dfg = dfg_discovery.apply(not_filtered_log)
		gviz = dfg_visualization.apply(dfg, log=not_filtered_log, variant=dfg_visualization.Variants.FREQUENCY)
		dfg_visualization.save(gviz, os.path.join("webapp","static", req.session["id"] + "_l2.png"))
	elif(selected_viz=="dfgp"):
		dfg = dfg_discovery.apply(not_filtered_log)
		gviz = dfg_visualization.apply(dfg, log=not_filtered_log, variant=dfg_visualization.Variants.PERFORMANCE)
		dfg_visualization.save(gviz, os.path.join("webapp","static", req.session["id"] + "_l2.png"))
	else:
		heu_net = heuristics_miner.apply_heu(not_filtered_log, parameters={"dependency_thresh": 0.99})
		gviz = hn_vis_factory.apply(heu_net)
		hn_vis_factory.save(gviz, os.path.join("webapp","static", req.session["id"] + "_l2.png"))
	xes_exporter.apply(not_filtered_log, os.path.join("webapp","static", req.session["id"] + "_l2.xes"))

	if(calc_lev):
		lev_new = [0]*len(newest_log)
		for i in range(len(newest_log)):
			lev_new[i] = [hash(event['concept:name']) for event in newest_log[i]]

		lev_not = [0]*len(not_filtered_log)
		for i in range(len(not_filtered_log)):
			lev_not[i] = [hash(event['concept:name']) for event in not_filtered_log[i]]

		distances = []
		for i in range(len(lev_new)):
			for j in range(len(lev_not)):
				distances.append(lev_dist(lev_new[i], lev_not[j]))
		lev_d = sum(distances)/len(distances)
		print("Levenshtein's distance: "+str(lev_d))
	else:
		lev_d = "null"

	used_paths = 0
	for lower, higher in custom_path_range:
		used_paths += round((higher-lower)*100)
	print(f"Using {used_paths}% of paths. {100-used_paths}% of paths are discarded.")

	print("Timestamp filter: {} seconds. \nVariants filter: {} seconds. \nPerformance filter: {} seconds. \nActivities filter: {} seconds. \nAttribute filter: {} seconds.".format((time_variants_started - time_timestamp_started).total_seconds(), (time_variants_finished - time_variants_started).total_seconds(), (time_performance_finished - time_variants_finished).total_seconds(), (time_activities_finished - time_performance_finished).total_seconds(), (time_attribute_finished - time_activities_finished).total_seconds()))
	response = HttpResponse(json.dumps({'time':(time_variants_started - time_timestamp_started).total_seconds(), 'variants':(time_variants_finished - time_variants_started).total_seconds(),'performance':(time_performance_finished - time_variants_finished).total_seconds(), 'activities':(time_activities_finished - time_performance_finished).total_seconds(), 'attribute':(time_attribute_finished - time_activities_finished).total_seconds(), 'traces':[len(newest_log), len(not_filtered_log)], 'distance':lev_d}))
	response.status_code = 200
	return response
def discover_process_models(log_path, log_name):
    custom_print('Importando log')

    log_complete = xes_importer.apply(log_path)
    log = variants_filter.filter_log_variants_percentage(log_complete, 0.9)

    #A_ACTIVATED, A_DECLINED, A_CANCELLED
    #log = attributes_filter.apply(log_complete, ["A_ACTIVATED"], parameters={attributes_filter.Parameters.ATTRIBUTE_KEY: "concept:name", attributes_filter.Parameters.POSITIVE: True})

    custom_print('Log importado')

    if (1 == 2):
        #Inductive Miner
        custom_print('Iniciando Inductive Miner')

        parameters = {
            inductive_miner.Variants.IM.value.Parameters.CASE_ID_KEY:
            'case:concept:name',
            inductive_miner.Variants.IM.value.Parameters.TIMESTAMP_KEY:
            'time:timestamp'
        }
        variant = inductive_miner.Variants.IM

        petrinet = inductive_miner.apply(log,
                                         parameters=parameters,
                                         variant=variant)
        print_statistics(petrinet[0], 'IM')

        custom_print('Inductive Miner finalizado\n')

    if (1 == 2):
        #Inductive Miner Infrequent 0.2
        custom_print('Iniciando Inductive Miner Infrequent 0.2')

        parameters = {
            inductive_miner.Variants.IMf.value.Parameters.NOISE_THRESHOLD:
            0.2,
            inductive_miner.Variants.IMf.value.Parameters.CASE_ID_KEY:
            'case:concept:name',
            inductive_miner.Variants.IMf.value.Parameters.TIMESTAMP_KEY:
            'time:timestamp'
        }
        variant = inductive_miner.Variants.IMf

        petrinet = inductive_miner.apply(log,
                                         parameters=parameters,
                                         variant=variant)
        print_statistics(petrinet[0], 'IMf0.2')

        custom_print('Inductive Miner Infrequent 0.2 finalizado\n')

    if (1 == 1):
        #Inductive Miner Infrequent 0.5
        custom_print('Iniciando Inductive Miner Infrequent 0.5')

        parameters = {
            inductive_miner.Variants.IMf.value.Parameters.NOISE_THRESHOLD:
            0.5,
            inductive_miner.Variants.IMf.value.Parameters.CASE_ID_KEY:
            'case:concept:name',
            inductive_miner.Variants.IMf.value.Parameters.TIMESTAMP_KEY:
            'time:timestamp'
        }
        variant = inductive_miner.Variants.IMf

        petrinet, initial_marking, final_marking = inductive_miner.apply(
            log, parameters=parameters, variant=variant)
        print_statistics(petrinet, 'IMf0.5')

        custom_print('Inductive Miner Infrequent 0.5 finalizado\n')

        ts = reachability_graph.construct_reachability_graph(
            petrinet, initial_marking)
        gviz = ts_visualizer.apply(
            ts,
            parameters={
                ts_visualizer.Variants.VIEW_BASED.value.Parameters.FORMAT:
                "png"
            })
        gviz.render('petrinets/simple-reach', cleanup=True)

        pnml_exporter.apply(petrinet, initial_marking,
                            "petrinets/simple-petri.pnml")

    if (1 == 2):
        #Inductive Miner Infrequent 0.8
        custom_print('Iniciando Inductive Miner Infrequent 0.8')

        parameters = {
            inductive_miner.Variants.IMf.value.Parameters.NOISE_THRESHOLD:
            0.8,
            inductive_miner.Variants.IMf.value.Parameters.CASE_ID_KEY:
            'case:concept:name',
            inductive_miner.Variants.IMf.value.Parameters.TIMESTAMP_KEY:
            'time:timestamp'
        }
        variant = inductive_miner.Variants.IMf

        petrinet = inductive_miner.apply(log,
                                         parameters=parameters,
                                         variant=variant)
        print_statistics(petrinet[0], 'IMf0.8')

        custom_print('Inductive Miner Infrequent 0.8 finalizado\n')

    if (1 == 2):
        #Inductive Miner Directly-Follows
        custom_print('Iniciando Inductive Miner Directly-Follows')

        parameters = {
            inductive_miner.Variants.IMd.value.Parameters.CASE_ID_KEY:
            'case:concept:name',
            inductive_miner.Variants.IMd.value.Parameters.TIMESTAMP_KEY:
            'time:timestamp'
        }
        variant = inductive_miner.Variants.IMd

        petrinet = inductive_miner.apply(log,
                                         parameters=parameters,
                                         variant=variant)
        print_statistics(petrinet[0], 'IMd')

        custom_print('Inductive Miner Infrequent Directly-Follows\n')

    if (1 == 2):
        #Alpha Miner
        custom_print('Iniciando Alpha Miner')

        parameters = {}
        variant = alpha_miner.Variants.ALPHA_VERSION_CLASSIC

        petrinet = alpha_miner.apply(log,
                                     parameters=parameters,
                                     variant=variant)
        print_statistics(petrinet[0], 'Alpha')

        custom_print('Alpha Miner finalizado\n')

    if (1 == 2):
        #Heuristic Miner 0.5
        custom_print('Iniciando Heuristic Miner 0.5')

        parameters = {
            heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH:
            0.5
        }

        petrinet = heuristics_miner.apply(log, parameters=parameters)
        print_statistics(petrinet[0], 'HM0.5')

        custom_print('Heuristic Miner 0.5 finalizado\n')

    if (1 == 2):
        #Heuristic Miner 0.99
        custom_print('Iniciando Heuristic Miner 0.99')

        parameters = {
            heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH:
            0.99
        }

        petrinet = heuristics_miner.apply(log, parameters=parameters)
        print_statistics(petrinet[0], 'HM0.99')

        custom_print('Heuristic Miner 0.99 finalizado\n')

    if (1 == 2):
        #Heuristic Miner 0.1
        custom_print('Iniciando Heuristic Miner 0.1')

        parameters = {
            heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH:
            0.1
        }

        petrinet = heuristics_miner.apply(log, parameters=parameters)
        print_statistics(petrinet[0], 'HM0.1')

        custom_print('Heuristic Miner 0.1 finalizado\n')

    if (1 == 2):
        #Heuristic Miner 1.0
        custom_print('Iniciando Heuristic Miner 1.0')

        parameters = {
            heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH:
            1.0
        }

        petrinet = heuristics_miner.apply(log, parameters=parameters)
        print_statistics(petrinet[0], 'HM1.0')

        custom_print('Heuristic Miner 1.0 finalizado\n')

    if (1 == 2):
        #DFG
        custom_print('Iniciando DFG')

        dfg = dfg_discovery.apply(log)
        parameters = {
            dfg_visualization.Variants.FREQUENCY.value.Parameters.FORMAT: 'png'
        }
        gviz = dfg_visualization.apply(
            dfg,
            log=log,
            variant=dfg_visualization.Variants.FREQUENCY,
            parameters=parameters)
        dfg_visualization.save(gviz, 'petrinets/simple-DFG.png')

        custom_print('DFG finalizado\n')
Example #14
0
#with performance
from pm4py.algo.discovery.dfg import algorithm as dfg_discovery
from pm4py.visualization.dfg import visualizer as dfg_visualization

dfg = dfg_discovery.apply(log, variant=dfg_discovery.Variants.PERFORMANCE)
gviz = dfg_visualization.apply(dfg, log=log, variant=dfg_visualization.Variants.PERFORMANCE)
dfg_visualization.view(gviz)

#svg format
from pm4py.algo.discovery.dfg import algorithm as dfg_discovery
from pm4py.visualization.dfg import visualizer as dfg_visualization

dfg = dfg_discovery.apply(log, variant=dfg_discovery.Variants.PERFORMANCE)
parameters = {dfg_visualization.Variants.PERFORMANCE.value.Parameters.FORMAT: "svg"}
gviz = dfg_visualization.apply(dfg, log=log, variant=dfg_visualization.Variants.PERFORMANCE, parameters=parameters)
dfg_visualization.save(gviz, "dfg.svg")


#Conver DF graph to a workflow net
from pm4py.objects.log.importer.xes import importer as xes_importer

import os
filepath = os.path.join('E:/data/pm/running-example.xes')
log = xes_importer.apply(filepath)

from pm4py.algo.discovery.dfg import algorithm as dfg_discovery
dfg = dfg_discovery.apply(log)

from pm4py.objects.conversion.dfg import converter as dfg_mining
net, im, fm = dfg_mining.apply(dfg)
net
def create_process_models(output_case_traces_cluster, path_data_sources,
                          dir_runtime_files, dir_dfg_cluster_files,
                          filename_dfg_cluster, rel_proportion_dfg_threshold,
                          logging_level):
    """
    Creates directly follows graphs out of a event log.
    :param output_case_traces_cluster: traces that are visualised
    :param path_data_sources: path of sources and outputs
    :param dir_runtime_files: folder containing files read and written during runtime
    :param dir_dfg_cluster_files: folder containing dfg png files
    :param filename_dfg_cluster: filename of dfg file (per cluster)
    :param rel_proportion_dfg_threshold: threshold for filtering out sensors in dfg relative to max occurrences of a sensor
    :param logging_level: level of logging
    :return:
    """

    # keep only needed columns
    output_case_traces_cluster = output_case_traces_cluster.reindex(
        columns={'Case', 'LC_Activity', 'Timestamp', 'Cluster'})
    output_case_traces_cluster = output_case_traces_cluster.rename(
        columns={
            'Case': 'case:concept:name',
            'LC_Activity': 'concept:name',
            'Timestamp': 'time:timestamp'
        })

    # create directory for dfg pngs
    os.mkdir(path_data_sources + dir_runtime_files + dir_dfg_cluster_files)
    # create dfg for each cluster
    clusters = output_case_traces_cluster.Cluster.unique()
    for cluster in clusters:
        log = output_case_traces_cluster.loc[output_case_traces_cluster.Cluster
                                             == cluster]
        log = log.astype(str)

        # convert pandas data frame to pm4py event log for further processing
        log = log_converter.apply(log)

        # keep only activities with more than certain number of occurrences
        activities = attributes_get.get_attribute_values(log, 'concept:name')
        # determine that number relative to the max number of occurrences of a sensor in a cluster. (the result is
        # the threshold at which an activity/activity strand is kept)
        min_number_of_occurrences = round(
            (max(activities.values()) * rel_proportion_dfg_threshold), 0)
        activities = {
            x: y
            for x, y in activities.items() if y >= min_number_of_occurrences
        }
        log = attributes_filter.apply(log, activities)

        # create dfg out of event log
        dfg = dfg_discovery.apply(log)

        # define start and
        start_activities = sa_get.get_start_activities(log)
        end_activities = ea_get.get_end_activities(log)

        # create png of dfg (if the graph does not show a graph, it is possible that the sensors did not trigger often)
        gviz = dfg_visualization.apply(
            dfg=dfg,
            log=log,
            variant=dfg_visualization.Variants.FREQUENCY,
            parameters={
                'start_activities': start_activities,
                'end_activities': end_activities
            })
        dfg_visualization.save(
            gviz,
            path_data_sources + dir_runtime_files + dir_dfg_cluster_files +
            (filename_dfg_cluster.format(cluster=str(cluster))))

    # logger
    logger = logging.getLogger(inspect.stack()[0][3])
    logger.setLevel(logging_level)
    logger.info("Saved directly follows graphs into '../%s'.",
                path_data_sources + dir_runtime_files + dir_dfg_cluster_files)