Ejemplo n.º 1
0
def visualization(log, C, petrinet=True, heu_net=False):
    if petrinet:
        # net, im, fm = inductive_miner.apply(variants_filter.apply(log, C))
        net, im, fm = heuristics_miner.apply(variants_filter.apply(log, C))
        gviz = pn_visualizer.apply(net, im, fm)
        pn_visualizer.view(gviz)

    if heu_net:
        heu_net = inductive_miner.apply_heu(variants_filter.apply(log, C))
        gviz = hn_vis_factory.apply(heu_net)
        hn_vis_factory.view(gviz)
Ejemplo n.º 2
0
 def test_filtering_variants(self):
     # to avoid static method warnings in tests,
     # that by construction of the unittest package have to be expressed in such way
     self.dummy_variable = "dummy_value"
     input_log = os.path.join(INPUT_DATA_DIR, "running-example.xes")
     log = xes_importer.import_log(input_log)
     considered_variant = "register request,examine casually,check ticket,decide,reinitiate request"
     considered_variant = considered_variant + ",examine thoroughly,check ticket,decide,pay compensation"
     log1 = variants_module.apply(log, [considered_variant],
                                  parameters={"positive": False})
     log2 = variants_module.apply(log, [considered_variant],
                                  parameters={"positive": True})
     del log1
     del log2
Ejemplo n.º 3
0
def W_creater(log, R, w, output=False):

    W = []
    log = variants_filter.apply(log, R)
    target_size = len(log) * w  # it determines the size of W
    variant = case_statistics.get_variant_statistics(log)
    variant = sorted(variant, key=lambda x: x['count'], reverse=True)
    if output:
        print(
            "=" * 100,
            "\nW creater called with w : {} and target size {}\n".format(
                w, target_size))
    W_size = 0
    for v in variant:
        W_size += v['count']
        W.append(v['variant'])
        if output:
            print(
                "\t\t{}___added with size {} // {} out of {}  // total size : {}"
                .format(v['variant'][:60], v['count'], W_size, target_size,
                        len(log)))

        if W_size > target_size:
            break

    if output:
        print("W creater END with its size: {}".format(len(W)))
        print("=" * 100)
    return W
Ejemplo n.º 4
0
def read_xes(filename, p=1, n_DPI=False):
    '''
    read event log in xes format 
        input   filename, percentage
        output  log object, variants_count

    filename = filename in xes format
    p = percentage of traces % to exploit from the log
    '''
    log = xes_importer.apply(filename)
    if p < 1:
        log = variants_filter.filter_log_variants_percentage(log, percentage=p)
    # variants = variants_filter.get_variants(log)
    variants = case_statistics.get_variant_statistics(log)
    # #
    VARIANT = []
    for v in variants:
        VARIANT.append(v['variant'])
    #
    # VARIANT = list(variants.keys())

    if n_DPI:
        VARIANT = VARIANT[:n_DPI]
        log = variants_filter.apply(log, VARIANT)
    print('=' * 100, '\n=READ THE XES FILE\n'
          'length of log', len(log), '\nlength of event',
          sum(len(trace) for trace in log),
          '\nnumber of variants : {}'.format(len(VARIANT)))
    return log, VARIANT
Ejemplo n.º 5
0
 def test_27(self):
     from pm4py.algo.filtering.pandas.variants import variants_filter
     df = self.load_running_example_df()
     variants = ["register request,examine thoroughly,check ticket,decide,reject request"]
     filtered_df1 = variants_filter.apply(df, variants,
                                          parameters={variants_filter.Parameters.CASE_ID_KEY: "case:concept:name",
                                                      variants_filter.Parameters.ACTIVITY_KEY: "concept:name"})
Ejemplo n.º 6
0
def fit_check(log: list, C: list) -> float:
    log = variants_filter.apply(  # get the log containing variants in C
        log, [c for c in C])
    net, im, fm = heuristics_miner.apply(log)
    # net, im, fm = inductive_miner.apply(log)

    fit = replay_fitness_evaluator.apply(
        log,
        net,
        im,
        fm,
        variant=replay_fitness_evaluator.Variants.TOKEN_BASED)
    return fit['log_fitness']
Ejemplo n.º 7
0
    def filter_variants(self, filter_level):
        variants_count = case_statistics.get_variant_statistics(self.log)
        variants_count = \
            sorted(variants_count,
                   key=lambda x: x['count'],
                   reverse=True)
        total_traces = len(self.log)
        total_variants = len(variants_count)
        filter_threshold = (1 / total_variants) * filter_level

        desired_variants = \
            [v['variant'] for v in variants_count \
                  if v['count']/total_traces >= filter_threshold]
        self.log = variants_filter.apply(self.log, desired_variants)
Ejemplo n.º 8
0
def look_ahead(log: list, C, R, output=False):
    if output:
        print("\n * Look_ahead()")
    C_log = variants_filter.apply(log, C)
    net, im, fm = heuristics_miner.apply(C_log)
    # net, im, fm = inductive_miner.apply(C_log)
    for i, r in enumerate(R):
        if i % 10 == 0:
            print("\t = {} dpi(s) checked".format(i))
        r_log = [variants_filter.apply(log, [r])[0]]
        fit = replay_fitness_evaluator.apply(
            r_log,
            net,
            im,
            fm,
            variant=replay_fitness_evaluator.Variants.TOKEN_BASED)

        if fit == 1:
            print("fitness:", fit)
            if output:
                print("\tFound a perfect fitness - {}".format(r))
            R.remove(r)
            C.append(r)
    return C, R
Ejemplo n.º 9
0
def visualization_total(log, VARIANT, CS, freq_check=False):
    print("visualization of VARIANT")
    if freq_check:
        fitness = fit_check(log, VARIANT)
        print("#variants:{} / #traces:{} / fitness{}".format(
            len(VARIANT), len(log), fitness))
    visualization(log, VARIANT, True, False)

    print("visualization of each cluster in CS")
    for cs in CS:
        if freq_check:
            cs_log = variants_filter.apply(log, cs)
            fitness = fit_check(cs_log, cs)
            print("#variants:{} / #traces:{} / fitness{}".format(
                len(cs), len(cs_log), fitness))
        visualization(log, cs, True, False)
Ejemplo n.º 10
0
    def get_case_statistics(self, parameters=None):
        """
        Gets the statistics on cases

        Parameters
        -------------
        parameters
            Possible parameters of the algorithm

        Returns
        -------------
        list_cases
            List of cases
        """
        if parameters is None:
            parameters = {}
        parameters[
            constants.PARAMETER_CONSTANT_ACTIVITY_KEY] = self.activity_key
        parameters[
            constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY] = self.activity_key
        #parameters["max_ret_cases"] = ws_constants.MAX_NO_CASES_TO_RETURN
        parameters["sort_by_index"] = parameters[
            "sort_by_index"] if "sort_by_index" in parameters else 0
        parameters["sort_ascending"] = parameters[
            "sort_ascending"] if "sort_ascending" in parameters else False
        parameters["variants"] = self.variants
        if "variant" in parameters:
            var_to_filter = parameters["variant"]
            # TODO: TECHNICAL DEBT
            # quick turnaround for bug
            var_to_filter = var_to_filter.replace(" start", "+start")
            var_to_filter = var_to_filter.replace(" START", "+START")
            var_to_filter = var_to_filter.replace(" complete", "+complete")
            var_to_filter = var_to_filter.replace(" COMPLETE", "+COMPLETE")
            filtered_log = variants_filter.apply(self.log, [var_to_filter],
                                                 parameters=parameters)
            return [
                casestats.include_key_in_value_list(
                    case_statistics.get_cases_description(
                        filtered_log, parameters=parameters))
            ] + [self.get_log_summary_dictio()]
        else:
            return [
                casestats.include_key_in_value_list(
                    case_statistics.get_cases_description(
                        self.log, parameters=parameters))
            ] + [self.get_log_summary_dictio()]
Ejemplo n.º 11
0
def apply(log, filter, parameters=None):
    """
    Apply a filter to the current log (variants filter)

    Parameters
    ------------
    log
        Log object
    filter
        Filter to apply
    parameters
        Parameters of the algorithm

    Returns
    ------------
    log
        Log object
    """
    if parameters is None:
        parameters = {}

    return variants_filter.apply(log, filter[1], parameters=parameters)
Ejemplo n.º 12
0
def quality_measure(log, CS):

    # fitness, prec, gen, simp, weighted by # traces

    eval = []
    for cs in CS:
        l = variants_filter.apply(log, cs)
        eval.append(evaluation_w_hm(l))

    DATA = np.array(eval)
    # print(DATA)
    metrics = []
    for i in range(1, DATA.shape[1]):
        metrics.append(
            sum(
                DATA[:, 0] * DATA[:, i]
            )/sum(DATA[:, 0])
        )
    # print(
    #     "fitness:{}, prec:{}, gen:{}, simp:{}, weighted by # traces".
    #     format(metrics[0], metrics[1], metrics[2], metrics[3])
    # )
    return metrics
Ejemplo n.º 13
0
from pm4py.objects.log.importer.xes import factory as xes_import_factory
from pm4py.objects.log.exporter.xes import factory as xes_exporter
from pm4py.statistics.traces.log import case_statistics
from pm4py.algo.filtering.log.variants import variants_filter

K = [20]
for k in K:
    event_log = "Sepsis Cases - Event Log.xes"
    log = xes_import_factory.apply(event_log)
    var_with_count = case_statistics.get_variant_statistics(log)
    variants_count = sorted(var_with_count,
                            key=lambda x: x['count'],
                            reverse=True)
    to_filter = []
    count = 0
    for j in range(0, len(variants_count)):
        dict = variants_count[j]
        if dict["count"] < k:
            to_filter.append([dict["variant"]])
        else:
            count += dict["count"]
    for delete in to_filter:
        log = variants_filter.apply(log,
                                    delete,
                                    parameters={"positive": False})
    xes_exporter.export_log(
        log, "baseline" + "_" + str(k) + "-" + "Annonymity" + ".xes")
    print("baseline" + "_" + str(k) + "-" + "Annonymity" + ".xes" +
          " has been exported!")
Ejemplo n.º 14
0
 def test_28(self):
     from pm4py.algo.filtering.log.variants import variants_filter
     log = self.load_running_example_xes()
     variants = ["register request,examine thoroughly,check ticket,decide,reject request"]
     filtered_log2 = variants_filter.apply(log, variants, parameters={variants_filter.Parameters.POSITIVE: False})
Ejemplo n.º 15
0
print(end_activities_filter.get_end_activities(log_af_ea))

#traces
from pm4py.algo.filtering.log.variants import variants_filter
variants = variants_filter.get_variants(log)
variants

from pm4py.statistics.traces.log import case_statistics
variants_count = case_statistics.get_variant_statistics(log)
variants_count = sorted(variants_count, key=lambda x: x['count'], reverse=True)
print(variants_count)
print(len(variants_count))

#most common
filtered_log1 = variants_filter.apply(log, [
    "Confirmation of receipt,T02 Check confirmation of receipt,T04 Determine confirmation of receipt,T05 Print and send confirmation of receipt,T06 Determine necessity of stop advice,T10 Determine necessity to stop indication"
])
filtered_log1
variants_count_filtered_log1 = case_statistics.get_variant_statistics(
    filtered_log1)
print(variants_count_filtered_log1)

#---
from pm4py.algo.filtering.log.attributes import attributes_filter
activities = attributes_filter.get_attribute_values(log, "concept:name")
resources = attributes_filter.get_attribute_values(log, "org:resource")
activities
resources

#not containing any resource
from pm4py.util import constants
Ejemplo n.º 16
0
 def test_26(self):
     from pm4py.algo.filtering.log.variants import variants_filter
     log = self.load_running_example_xes()
     variants = ["register request,examine thoroughly,check ticket,decide,reject request"]
     filtered_log1 = variants_filter.apply(log, variants)
Ejemplo n.º 17
0
def apply_filter(req):
	sessions[req.session["id"]] = datetime.now()
	filters = {
		"time": True,
		"variants": True,
		"performance": True,
		"activities": True,
		"attribute": True
	}
	req.session.set_expiry(7200)
	#print(str(req.body))
	o = json.loads(req.body)
	print(str(o))
	custom_time_range = []
	for pair in o["filter1"]:
		#custom_time_range.append((dateutil.parser.parse(pair[0]),dateutil.parser.parse(pair[1])))
		custom_time_range.append((pair[0],pair[1]))
	if o["filter1"] == []:
		filters["time"] = False
	#print(o["filter1"][0])
	#print(custom_time_range[0][0])
	#print(custom_time_range)
	custom_path_range = []
	for pair in o["filter2"]:
		custom_path_range.append((float(pair[0]),float(pair[1])))
	if o["filter2"] == []:
		filters["variants"] = False
		#custom_path_range = [(0,1)] #filter2
	custom_performance_range = []
	for pair in o["filter3"]:
		custom_performance_range.append((float(pair[0]),float(pair[1])))
	if o["filter3"] == []:
		filters["performance"] = False
	custom_activitiy_range = []
	for pair in o["filter4"]:
		custom_activitiy_range.append((float(pair[0]),float(pair[1])))
	if o["filter4"] == []:
		filters["activities"] = False
		#custom_activitiy_range = [(0,1)] #filter3
	custom_attribute_range = []
	for pair in o["filter5"]:
		custom_attribute_range.append((float(pair[0]),float(pair[1])))
	if o["filter5"] == [] or o["filter5attribute"] == "Empty":
		filters["attribute"] = False
	additional_attribute = o["filter5attribute"]

	selected_viz = o["visualization"]
	calc_lev = o["distance"]
	#input_file = os.path.join("webapp","static", req.session["id"] + "_l0.xes")
	input_file = os.path.join("webapp","static", "sepsis.xes")
	input_log = xes_importer.apply(input_file)
	not_filtered_logs = {}
	flatten = lambda l: [item for sublist in l for item in sublist]

	time_timestamp_started = datetime.now()
	if filters["time"]:
		#TODO check overlapping for filter
		custom_time_range = sorted(custom_time_range, reverse=False)
		for i in range(0,len(custom_time_range)-1):
			if(custom_time_range[i][1] > custom_time_range[i+1][0]):
				response = HttpResponse(json.dumps({'error': "Wrong intervals for time filter"}))
				response.status_code = 200
				return response
				#raise ValueError("Overlapping time ranges")

		logs = []
		for (x,y) in custom_time_range:
			logs.append(timestamp_filter.filter_traces_contained(input_log, x, y))

		#log = timestamp_filter.filter_traces_contained(input_log, custom_time_range[0][0], custom_time_range[0][1])
		log = pm4py.objects.log.log.EventLog()
		for timeslice in logs:
			for trace in timeslice:
				log.append(trace)
		print(len(input_log))
		print(len(log))
		#l2
		not_filtered_logs["timestamp_filter"] = pm4py.objects.log.log.EventLog()
		for trace in input_log:
			if trace not in log:
				not_filtered_logs["timestamp_filter"].append(trace)
		print(len(not_filtered_logs["timestamp_filter"]))
	else:
		log = input_log

	time_variants_started = datetime.now() # where should I start?

	if filters["variants"]:
		variants = variants_filter.get_variants(log)
		variants_count = case_statistics.get_variant_statistics(log)
		variants_count = sorted(variants_count, key=lambda x: x['count'], reverse=False)

		custom_path_range = sorted(custom_path_range, reverse=False)
		# check overlapping
		for i in range(0,len(custom_path_range)-1):
			if(custom_path_range[i][1] > custom_path_range[i+1][0]):
				response = HttpResponse(json.dumps({'error': "Wrong intervals for variants filter"}))
				response.status_code = 200
				return response
				#raise ValueError("Overlapping variants ranges")

		nr_variants = len(variants_count)
		custom_path_range * nr_variants
		idx = [(math.floor(x*nr_variants), math.ceil(y*nr_variants)) for (x,y) in custom_path_range]
		variants_subset = [variants_count[x:y+1] for (x,y) in idx]
		variants_subset = flatten(variants_subset)
		filtered_variants = {k:v for k,v in variants.items() if k in [x["variant"] for x in variants_subset]}
		#l2
		not_filtered_variants = {k:v for k,v in variants.items() if k not in [x["variant"] for x in variants_subset]}

		filtered_log = variants_filter.apply(log, filtered_variants)
		#l2
		not_filtered_logs["variant_filter"] = variants_filter.apply(log, not_filtered_variants)
	else:
		filtered_log = log

	time_variants_finished = datetime.now() # note: incl log2 generation

	if filters["performance"]:
		custom_performance_range = sorted(custom_performance_range, reverse=False)
		# check overlapping
		for i in range(0,len(custom_performance_range)-1):
			if(custom_performance_range[i][1] > custom_performance_range[i+1][0]):
				response = HttpResponse(json.dumps({'error': "Wrong intervals for performance filter"}))
				response.status_code = 200
				return response
				#raise ValueError("Overlapping performance ranges")

		#all_case_durations = case_statistics.get_all_casedurations(log, parameters={case_statistics.Parameters.TIMESTAMP_KEY: "time:timestamp"})
		#case_filter.filter_case_performance(log, 86400, 864000)
		performances = []
		for i in range(len(filtered_log)):
			filtered_log[i].attributes["throughput"] = (max([event["time:timestamp"]for event in filtered_log[i]])-min([event["time:timestamp"] for event in filtered_log[i]])).total_seconds()
			performances.append(filtered_log[i].attributes["throughput"])

		nr_cases = len(filtered_log)
		performances = sorted(performances, reverse=False)
		idx = [(math.floor(x*nr_cases), math.ceil(y*nr_cases)) for (x,y) in custom_performance_range]
		perf_subset = [performances[x:y+1] for (x,y) in idx]
		perf_subset = flatten(perf_subset)

		performance_log = pm4py.objects.log.log.EventLog([trace for trace in filtered_log if trace.attributes["throughput"] in perf_subset])
		#l2
		not_filtered_logs["performance_filter"] = pm4py.objects.log.log.EventLog([trace for trace in filtered_log if trace.attributes["throughput"] not in perf_subset])
		#print(str(len(not_filtered_logs["performance_filter"])))

	else:
		performance_log = filtered_log

	time_performance_finished = datetime.now()

	if filters["activities"]:
		variants = variants_filter.get_variants(performance_log)
		variants_count = case_statistics.get_variant_statistics(performance_log)
		variants_count = sorted(variants_count, key=lambda x: x['count'], reverse=False)

		activities = dict()
		for variant in variants_count:
			for activity in variant["variant"].split(","):
				if (activity not in activities.keys()):
					activities[activity] = variant["count"]
				else:
					activities[activity] += variant["count"]

		sorted_activities = {k: v for k, v in sorted(activities.items(), key=lambda item: item[1])}
		activities_sorted_list = list(sorted_activities)
		custom_activitiy_range = sorted(custom_activitiy_range, reverse=False)
		# check overlapping
		for i in range(0,len(custom_activitiy_range)-1):
			if(custom_activitiy_range[i][1] > custom_activitiy_range[i+1][0]):
				response = HttpResponse(json.dumps({'error': "Wrong intervals for activities filter"}))
				response.status_code = 200
				return response
				#raise ValueError("Overlapping activities ranges")
		nr_activities = len(activities_sorted_list)
		idx = [(math.floor(x*nr_activities), math.ceil(y*nr_activities)) for (x,y) in custom_activitiy_range]
		activities_to_keep = [activities_sorted_list[x:y+1] for (x,y) in idx]
		activities_to_keep = flatten(activities_to_keep)
		variants_idx = []
		for i in range(len(variants_count)):
			for activity in activities_to_keep:
				if (activity in variants_count[i]["variant"].split(",") and (i not in variants_idx)):
					variants_idx.append(i)
		variants_subset = [variants_count[i] for i in variants_idx]
		filtered_variants = {k:v for k,v in variants.items() if k in [x["variant"] for x in variants_subset]}
		#l2
		not_filtered_variants = {k:v for k,v in variants.items() if k not in [x["variant"] for x in variants_subset]}

		filtered_log = variants_filter.apply(performance_log, filtered_variants)

		#l2
		not_filtered_logs["activities_filter"] = variants_filter.apply(performance_log, not_filtered_variants)

		new_log = pm4py.objects.log.log.EventLog()
		#not_filtered_logs["activities_filter_traces"] = pm4py.objects.log.log.EventLog()
		for trace in filtered_log:
			new_trace = pm4py.objects.log.log.Trace()
			not_new_trace = pm4py.objects.log.log.Trace()
			for event in trace:
				if(event['concept:name'] in activities_to_keep):
					new_trace.append(event)
				else:
					not_new_trace.append(event)
			if(len(new_trace)>0):
				new_log.append(new_trace)
			if(len(not_new_trace)>0):
				not_filtered_logs["activities_filter"].append(not_new_trace)
	else:
		new_log = performance_log

	time_activities_finished = datetime.now()

	if filters["attribute"]:
		custom_attribute_range = sorted(custom_attribute_range, reverse=False)
		# check overlapping
		for i in range(0,len(custom_attribute_range)-1):
			if(custom_attribute_range[i][1] > custom_attribute_range[i+1][0]):
				response = HttpResponse(json.dumps({'error': "Wrong intervals for additional attribute filter"}))
				response.status_code = 200
				return response

		newest_log = pm4py.objects.log.log.EventLog()
		not_filtered_logs["additional_filter"] = pm4py.objects.log.log.EventLog()

		traces_with_attr = []
		not_traces_with_attr = []
		for trace in new_log:
			if additional_attribute in trace.attributes.keys():
				traces_with_attr.append(trace)
			else:
				not_traces_with_attr.append(trace)
		#check if trace attribute
		if len(traces_with_attr)>0:
			#check if numeric
			if type(traces_with_attr[0].attributes[additional_attribute]) in [int, float]:
				for trace in traces_with_attr:
					if any([trace.attributes[additional_attribute] >= x and trace.attributes[additional_attribute] <= y for (x,y) in custom_attribute_range]):
						newest_log.append(trace)
					else:
						not_filtered_logs["additional_filter"].append(trace)
				for trace in not_traces_with_attr:
					not_filtered_logs["additional_filter"].append(trace)
			else: #string
				attribute_frequencies = dict()
				for trace in traces_with_attr:
					if trace.attributes[additional_attribute] not in attribute_frequencies.keys():
						attribute_frequencies[trace.attributes[additional_attribute]] = 0
					attribute_frequencies[trace.attributes[additional_attribute]] += 1

				sorted_frequencies = {k: v for k, v in sorted(attribute_frequencies.items(), key=lambda item: item[1])}
				frequencies_sorted_list = list(sorted_frequencies)

				nr_values = len(frequencies_sorted_list)
				idx = [(math.floor(x*nr_values), math.ceil(y*nr_values)) for (x,y) in custom_attribute_range]
				values_to_keep = [frequencies_sorted_list[x:y+1] for (x,y) in idx]
				values_to_keep = flatten(values_to_keep)

				for trace in traces_with_attr:
					if trace.attributes[additional_attribute] in values_to_keep:
						newest_log.append(trace)
					else:
						not_filtered_logs["additional_filter"].append(trace)
				for trace in not_traces_with_attr:
					not_filtered_logs["additional_filter"].append(trace)

		else: #event attribute
			if [type(event[additional_attribute]) for trace in new_log for event in trace if additional_attribute in event.keys()][0] in [int, float]:
				for trace in new_log:
					new_trace = pm4py.objects.log.log.Trace()
					not_new_trace = pm4py.objects.log.log.Trace()
					for event in trace:
						if(additional_attribute in event.keys() and any([event[additional_attribute] >= x and event[additional_attribute] <= y for (x,y) in custom_attribute_range ])):
							new_trace.append(event)
						else:
							not_new_trace.append(event)
					if(len(new_trace)>0):
						newest_log.append(new_trace)
					if(len(not_new_trace)>0):
						not_filtered_logs["additional_filter"].append(not_new_trace)
			else: #string
				attribute_frequencies = dict()
				for trace in new_log:
					for event in trace:
						if additional_attribute in event.keys():
							if event[additional_attribute] not in attribute_frequencies.keys():
								attribute_frequencies[event[additional_attribute]] = 0
							attribute_frequencies[event[additional_attribute]] += 1

				sorted_frequencies = {k: v for k, v in sorted(attribute_frequencies.items(), key=lambda item: item[1])}
				frequencies_sorted_list = list(sorted_frequencies)

				nr_values = len(frequencies_sorted_list)
				idx = [(math.floor(x*nr_values), math.ceil(y*nr_values)) for (x,y) in custom_attribute_range]
				values_to_keep = [frequencies_sorted_list[x:y+1] for (x,y) in idx]
				values_to_keep = flatten(values_to_keep)

				for trace in new_log:
					new_trace = pm4py.objects.log.log.Trace()
					not_new_trace = pm4py.objects.log.log.Trace()
					for event in trace:
						if(additional_attribute in event.keys() and event[additional_attribute] in values_to_keep):
							new_trace.append(event)
						else:
							not_new_trace.append(event)
					if(len(new_trace)>0):
						newest_log.append(new_trace)
					if(len(not_new_trace)>0):
						not_filtered_logs["additional_filter"].append(not_new_trace)


	else:
		newest_log = new_log

	time_attribute_finished = datetime.now()

	if(selected_viz=="dfgf"):
		dfg = dfg_discovery.apply(newest_log)
		gviz = dfg_visualization.apply(dfg, log=newest_log, variant=dfg_visualization.Variants.FREQUENCY)
		dfg_visualization.save(gviz, os.path.join("webapp","static", req.session["id"] + "_l1.png"))
	elif(selected_viz=="dfgp"):
		dfg = dfg_discovery.apply(newest_log)
		gviz = dfg_visualization.apply(dfg, log=newest_log, variant=dfg_visualization.Variants.PERFORMANCE)
		dfg_visualization.save(gviz, os.path.join("webapp","static", req.session["id"] + "_l1.png"))
	else:
		heu_net = heuristics_miner.apply_heu(newest_log, parameters={"dependency_thresh": 0.99})
		gviz = hn_vis_factory.apply(heu_net)
		hn_vis_factory.save(gviz, os.path.join("webapp","static", req.session["id"] + "_l1.png"))

	xes_exporter.apply(newest_log, os.path.join("webapp","static", req.session["id"] + "_l1.xes"))


	#l2
	not_filtered_log = pm4py.objects.log.log.EventLog()
	for part in not_filtered_logs.keys():
		for trace in not_filtered_logs[part]:
			not_filtered_log.append(trace)

	if(selected_viz=="dfgf"):
		dfg = dfg_discovery.apply(not_filtered_log)
		gviz = dfg_visualization.apply(dfg, log=not_filtered_log, variant=dfg_visualization.Variants.FREQUENCY)
		dfg_visualization.save(gviz, os.path.join("webapp","static", req.session["id"] + "_l2.png"))
	elif(selected_viz=="dfgp"):
		dfg = dfg_discovery.apply(not_filtered_log)
		gviz = dfg_visualization.apply(dfg, log=not_filtered_log, variant=dfg_visualization.Variants.PERFORMANCE)
		dfg_visualization.save(gviz, os.path.join("webapp","static", req.session["id"] + "_l2.png"))
	else:
		heu_net = heuristics_miner.apply_heu(not_filtered_log, parameters={"dependency_thresh": 0.99})
		gviz = hn_vis_factory.apply(heu_net)
		hn_vis_factory.save(gviz, os.path.join("webapp","static", req.session["id"] + "_l2.png"))
	xes_exporter.apply(not_filtered_log, os.path.join("webapp","static", req.session["id"] + "_l2.xes"))

	if(calc_lev):
		lev_new = [0]*len(newest_log)
		for i in range(len(newest_log)):
			lev_new[i] = [hash(event['concept:name']) for event in newest_log[i]]

		lev_not = [0]*len(not_filtered_log)
		for i in range(len(not_filtered_log)):
			lev_not[i] = [hash(event['concept:name']) for event in not_filtered_log[i]]

		distances = []
		for i in range(len(lev_new)):
			for j in range(len(lev_not)):
				distances.append(lev_dist(lev_new[i], lev_not[j]))
		lev_d = sum(distances)/len(distances)
		print("Levenshtein's distance: "+str(lev_d))
	else:
		lev_d = "null"

	used_paths = 0
	for lower, higher in custom_path_range:
		used_paths += round((higher-lower)*100)
	print(f"Using {used_paths}% of paths. {100-used_paths}% of paths are discarded.")

	print("Timestamp filter: {} seconds. \nVariants filter: {} seconds. \nPerformance filter: {} seconds. \nActivities filter: {} seconds. \nAttribute filter: {} seconds.".format((time_variants_started - time_timestamp_started).total_seconds(), (time_variants_finished - time_variants_started).total_seconds(), (time_performance_finished - time_variants_finished).total_seconds(), (time_activities_finished - time_performance_finished).total_seconds(), (time_attribute_finished - time_activities_finished).total_seconds()))
	response = HttpResponse(json.dumps({'time':(time_variants_started - time_timestamp_started).total_seconds(), 'variants':(time_variants_finished - time_variants_started).total_seconds(),'performance':(time_performance_finished - time_variants_finished).total_seconds(), 'activities':(time_activities_finished - time_performance_finished).total_seconds(), 'attribute':(time_attribute_finished - time_activities_finished).total_seconds(), 'traces':[len(newest_log), len(not_filtered_log)], 'distance':lev_d}))
	response.status_code = 200
	return response
Ejemplo n.º 18
0
import pm4py
from pm4py.objects.log.importer.xes import importer as xes_importer
from pm4py.algo.filtering.log.variants import variants_filter
from pm4py.algo.enhancement.sna import algorithm as sna
from pm4py.visualization.sna import visualizer as sna_visualizer

log = xes_importer.apply(
    r'C:\Users\HAXY8W\Desktop\Process Mining\Audit Process Mining\financial_log.xes.gz'
)
filtered_log1 = variants_filter.apply(
    log, {'A_SUBMITTED,A_PARTLYSUBMITTED,A_DECLINED'},
    parameters={variants_filter.Parameters.POSITIVE: False})

# part of the analysis was to eliminate unwanted actions, the code below does just that.
# it is not needed to run the network however if you want to filter these activities out: uncomment it and then change
# the name in network algo sections from filtered_log1 to filtered_log2

# event_log1 = pm4py.convert_to_event_stream(filtered_log1)
# filtered_log2 = pm4py.filtering.filter_event_attribute_values(filtered_log1, 'concept:name',
#                                                               {"O_SELECTED", "O_CREATED", "O_ACCEPTED",
#                                                                "A_REGISTERED", "A_ACTIVATED",
#                                                                "O_CANCELLED", "O_DECLINED", 'A_PARTLYSUBMITTED'},
#                                                               level='event', retain=False)

# Network algorithm sections

# Handover of Work
hw_values = sna.apply(filtered_log1, variant=sna.Variants.HANDOVER_LOG)
gviz_hw_py = sna_visualizer.apply(hw_values,
                                  variant=sna_visualizer.Variants.PYVIS)
sna_visualizer.view(gviz_hw_py, variant=sna_visualizer.Variants.PYVIS)
def apply(log, parameters=None):
    """
    Returns a log from which a sound workflow net could be extracted taking into account
    a discovery algorithm returning models only with visible transitions

    Parameters
    ------------
    log
        Trace log
    parameters
        Possible parameters of the algorithm, including:
            discovery_algorithm -> Discovery algorithm to consider, possible choices: alphaclassic
            max_no_variants -> Maximum number of variants to consider to return a Petri net

    Returns
    ------------
    filtered_log
        Filtered log
    """
    from pm4py.evaluation.replay_fitness import factory as replay_fitness_factory

    if parameters is None:
        parameters = {}
    discovery_algorithm = parameters["discovery_algorithm"] if "discovery_algorithm" in parameters else "alphaclassic"
    max_no_variants = parameters["max_no_variants"] if "max_no_variants" in parameters else 20
    all_variants_dictio = variants_filter.get_variants(log, parameters=parameters)
    all_variants_list = []
    for var in all_variants_dictio:
        all_variants_list.append([var, len(all_variants_dictio[var])])
    all_variants_list = sorted(all_variants_list, key=lambda x: (x[1], x[0]), reverse=True)
    considered_variants = []
    considered_traces = []

    i = 0
    while i < min(len(all_variants_list), max_no_variants):
        variant = all_variants_list[i][0]

        considered_variants.append(variant)
        considered_traces.append(all_variants_dictio[variant][0])
        filtered_log = EventLog(considered_traces)
        net = None
        initial_marking = None
        final_marking = None
        if discovery_algorithm == "alphaclassic" or discovery_algorithm == "alpha":
            net, initial_marking, final_marking = alpha_miner.apply(filtered_log, parameters=parameters)
        is_sound = check_soundness.check_petri_wfnet_and_soundness(net)
        if not is_sound:
            del considered_variants[-1]
            del considered_traces[-1]
        else:
            try:
                fitness = replay_fitness_factory.apply(filtered_log, net, initial_marking, final_marking,
                                                       parameters=parameters)
                if fitness["log_fitness"] < 0.99999:
                    del considered_variants[-1]
                    del considered_traces[-1]
            except TypeError:
                del considered_variants[-1]
                del considered_traces[-1]
        i = i + 1

    sound_log = EventLog()
    if considered_variants:
        sound_log = variants_filter.apply(log, considered_variants, parameters=parameters)

    return sound_log