Beispiel #1
0
def apply(df, discovery_algorithm=discover_inductive, parameters=None):
    if parameters is None:
        parameters = {}

    allowed_activities = parameters[
        "allowed_activities"] if "allowed_activities" in parameters else None
    debug = parameters["debug"] if "debug" in parameters else True

    try:
        if df.type == "succint":
            df = succint_mdl_to_exploded_mdl.apply(df)
            df.type = "exploded"
    except:
        pass

    if len(df) == 0:
        df = pd.DataFrame({"event_id": [], "event_activity": []})

    min_node_freq = parameters[
        "min_node_freq"] if "min_node_freq" in parameters else 0
    min_edge_freq = parameters[
        "min_edge_freq"] if "min_edge_freq" in parameters else 0

    df = clean_frequency.apply(df, min_node_freq)
    df = clean_arc_frequency.apply(df, min_edge_freq)

    if len(df) == 0:
        df = pd.DataFrame({"event_id": [], "event_activity": []})

    persps = [x for x in df.columns if not x.startswith("event_")]

    ret = {}
    ret["nets"] = {}
    ret["act_count"] = {}
    ret["replay"] = {}
    ret["group_size_hist"] = {}
    ret["act_count_replay"] = {}
    ret["group_size_hist_replay"] = {}
    ret["aligned_traces"] = {}
    ret["place_fitness_per_trace"] = {}
    ret["aggregated_statistics_frequency"] = {}
    ret["aggregated_statistics_performance_min"] = {}
    ret["aggregated_statistics_performance_max"] = {}
    ret["aggregated_statistics_performance_median"] = {}
    ret["aggregated_statistics_performance_mean"] = {}

    diff_log = 0
    diff_model = 0
    diff_token_replay = 0
    diff_performance_annotation = 0
    diff_basic_stats = 0

    for persp in persps:
        aa = time.time()
        if debug:
            print(persp, "getting log")
        log = algorithm.apply(df, persp, parameters=parameters)
        if debug:
            print(len(log))

        if allowed_activities is not None:
            if persp not in allowed_activities:
                continue
            filtered_log = attributes_filter.apply_events(
                log, allowed_activities[persp])
        else:
            filtered_log = log
        bb = time.time()

        diff_log += (bb - aa)

        # filtered_log = variants_filter.apply_auto_filter(deepcopy(filtered_log), parameters={"decreasingFactor": 0.5})

        if debug:
            print(len(log))
            print(persp, "got log")

        cc = time.time()
        #net, im, fm = inductive_miner.apply(filtered_log)
        net, im, fm = discovery_algorithm(filtered_log)
        """if persp == "items":
            trans_map = {t.label:t for t in net.transitions}
            source_place_it = list(trans_map["item out of stock"].in_arcs)[0].source
            target_place_re = list(trans_map["reorder item"].out_arcs)[0].target
            skip_trans_1 = PetriNet.Transition(str(uuid.uuid4()), None)
            net.transitions.add(skip_trans_1)
            add_arc_from_to(source_place_it, skip_trans_1, net)
            add_arc_from_to(skip_trans_1, target_place_re, net)"""

        #net = reduce_petri_net(net)
        dd = time.time()

        diff_model += (dd - cc)

        # net, im, fm = alpha_miner.apply(filtered_log)
        if debug:
            print(persp, "got model")

        xx1 = time.time()
        activ_count = algorithm.apply(df,
                                      persp,
                                      variant="activity_occurrence",
                                      parameters=parameters)
        if debug:
            print(persp, "got activ_count")
        xx2 = time.time()

        ee = time.time()
        variants_idx = variants_module.get_variants_from_log_trace_idx(log)
        # variants = variants_module.convert_variants_trace_idx_to_trace_obj(log, variants_idx)
        # parameters_tr = {PARAM_ACTIVITY_KEY: "concept:name", "variants": variants}

        if debug:
            print(persp, "got variants")

        aligned_traces, place_fitness_per_trace, transition_fitness_per_trace, notexisting_activities_in_model = tr_factory.apply(
            log,
            net,
            im,
            fm,
            parameters={
                "enable_pltr_fitness": True,
                "disable_variants": True
            })

        if debug:
            print(persp, "done tbr")

        element_statistics = performance_map.single_element_statistics(
            log, net, im, aligned_traces, variants_idx)

        if debug:
            print(persp, "done element_statistics")
        ff = time.time()

        diff_token_replay += (ff - ee)

        aggregated_statistics = performance_map.aggregate_statistics(
            element_statistics)

        if debug:
            print(persp, "done aggregated_statistics")

        element_statistics_performance = performance_map.single_element_statistics(
            log, net, im, aligned_traces, variants_idx)

        if debug:
            print(persp, "done element_statistics_performance")

        gg = time.time()

        aggregated_statistics_performance_min = performance_map.aggregate_statistics(
            element_statistics_performance,
            measure="performance",
            aggregation_measure="min")
        aggregated_statistics_performance_max = performance_map.aggregate_statistics(
            element_statistics_performance,
            measure="performance",
            aggregation_measure="max")
        aggregated_statistics_performance_median = performance_map.aggregate_statistics(
            element_statistics_performance,
            measure="performance",
            aggregation_measure="median")
        aggregated_statistics_performance_mean = performance_map.aggregate_statistics(
            element_statistics_performance,
            measure="performance",
            aggregation_measure="mean")

        hh = time.time()

        diff_performance_annotation += (hh - ee)

        if debug:
            print(persp, "done aggregated_statistics_performance")

        group_size_hist = algorithm.apply(df,
                                          persp,
                                          variant="group_size_hist",
                                          parameters=parameters)

        if debug:
            print(persp, "done group_size_hist")

        occurrences = {}
        for trans in transition_fitness_per_trace:
            occurrences[trans.label] = set()
            for trace in transition_fitness_per_trace[trans]["fit_traces"]:
                if not trace in transition_fitness_per_trace[trans][
                        "underfed_traces"]:
                    case_id = trace.attributes["concept:name"]
                    for event in trace:
                        if event["concept:name"] == trans.label:
                            occurrences[trans.label].add(
                                (case_id, event["event_id"]))
            # print(transition_fitness_per_trace[trans])

        len_different_ids = {}
        for act in occurrences:
            len_different_ids[act] = len(set(x[1] for x in occurrences[act]))

        eid_acti_count = {}
        for act in occurrences:
            eid_acti_count[act] = {}
            for x in occurrences[act]:
                if not x[0] in eid_acti_count:
                    eid_acti_count[act][x[0]] = 0
                eid_acti_count[act][x[0]] = eid_acti_count[act][x[0]] + 1
            eid_acti_count[act] = sorted(list(eid_acti_count[act].values()))

        ii = time.time()

        diff_basic_stats += (ii - hh) + (xx2 - xx1)

        ret["nets"][persp] = [net, im, fm]
        ret["act_count"][persp] = activ_count
        ret["aligned_traces"][persp] = aligned_traces
        ret["place_fitness_per_trace"][persp] = place_fitness_per_trace
        ret["aggregated_statistics_frequency"][persp] = aggregated_statistics
        ret["aggregated_statistics_performance_min"][
            persp] = aggregated_statistics_performance_min
        ret["aggregated_statistics_performance_max"][
            persp] = aggregated_statistics_performance_max
        ret["aggregated_statistics_performance_median"][
            persp] = aggregated_statistics_performance_median
        ret["aggregated_statistics_performance_mean"][
            persp] = aggregated_statistics_performance_mean

        ret["replay"][persp] = aggregated_statistics
        ret["group_size_hist"][persp] = group_size_hist
        ret["act_count_replay"][persp] = len_different_ids
        ret["group_size_hist_replay"][persp] = eid_acti_count

    ret["computation_statistics"] = {
        "diff_log": diff_log,
        "diff_model": diff_model,
        "diff_token_replay": diff_token_replay,
        "diff_performance_annotation": diff_performance_annotation,
        "diff_basic_stats": diff_basic_stats
    }

    return ret
Beispiel #2
0
def apply(df0, classifier_function=None, parameters=None):
    if parameters is None:
        parameters = {}

    if classifier_function is None:
        classifier_function = lambda x: x["event_activity"]

    min_acti_freq = parameters[
        "min_acti_freq"] if "min_acti_freq" in parameters else 0
    min_edge_freq = parameters[
        "min_edge_freq"] if "min_edge_freq" in parameters else 0

    df = df0.copy()
    df = general.preprocess(df, parameters=parameters)

    df = clean_frequency.apply(df, min_acti_freq=min_acti_freq)
    df = clean_arc_frequency.apply(df, min_freq=min_edge_freq)

    models = {}

    obj_types = [x for x in df.columns if not x.startswith("event_")]
    activities = set()
    activities_repeated = Counter()
    edges = Counter()
    start_activities = dict()
    end_activities = dict()
    acti_spec = Counter()

    for ot in obj_types:
        start_activities[ot] = set()
        end_activities[ot] = set()

        new_df = df[["event_id", "event_activity", "event_timestamp",
                     ot]].dropna(subset=[ot])
        new_df = new_df.rename(columns={
            ot: "case:concept:name",
            "event_timestamp": "time:timestamp"
        })
        log = new_df.to_dict("r")
        for ev in log:
            ev["event_objtype"] = ot
            ev["concept:name"] = classifier_function(ev)
            del ev["event_objtype"]
            del ev["event_activity"]
            activities.add((ev["event_id"], ev["concept:name"]))

        log = EventStream(log)
        this_activities = set(x["concept:name"] for x in log)
        for act in this_activities:
            activities_repeated[act] += 1
        log = log_conv_factory.apply(log,
                                     variant=log_conv_factory.TO_EVENT_LOG)

        for trace in log:
            if trace:
                start_activities[ot].add(trace[0]["concept:name"])
                end_activities[ot].add(trace[-1]["concept:name"])
                for i in range(len(trace) - 1):
                    ev0 = trace[i]
                    ev1 = trace[i + 1]
                    edges[(ot, ev0["concept:name"], ev1["concept:name"],
                           ev0["event_id"], ev1["event_id"],
                           trace.attributes["concept:name"],
                           ev0["time:timestamp"], ev1["time:timestamp"])] += 1
                    acti_spec[(ot, trace[i]["concept:name"],
                               trace[i]["event_id"],
                               trace.attributes["concept:name"],
                               trace[i]["time:timestamp"])] += 1
                if len(trace) > 0:
                    acti_spec[(ot, trace[-1]["concept:name"],
                               trace[-1]["event_id"],
                               trace.attributes["concept:name"],
                               trace[-1]["time:timestamp"])] += 1

        models[ot] = tsystem.apply(log)

    activities = dict(Counter(list(x[1] for x in activities)))
    activities_repeated = set(x for x in activities_repeated
                              if activities_repeated[x] > 1)

    return {
        "type": "trans_system",
        "models": models,
        "activities": activities,
        "activities_repeated": activities_repeated,
        "edges": edges,
        "acti_spec": acti_spec
    }
Beispiel #3
0
        eve = lips_map[n]
        corr = Shared.lips[n]
        for el in corr:
            Shared.events[eve].add(frozendict({"VERKBELEG": el}))

if __name__ == "__main__":
    read_ekpo()
    read_ekbe()
    read_vbfa()
    read_lips()
    read_activities()
    extract_cdhdr()
    extract_vbak()
    extract_eban()
    extract_ekko()
    #extract_likp()
    #extract_rbkp()
    #extract_bkpf()
    insert_ekpo_information()
    insert_ekbe_information()
    insert_vbfa_information()
    insert_lips_information()
    if True:
        dataframe = get_final_dataframe()
        dataframe = clean_frequency.apply(dataframe, 4)
    if True:
        model = discovery.apply(dataframe, model_type_variant="model2", node_freq_variant="type21",
                                edge_freq_variant="type211")
        gviz = vis_factory.apply(model, parameters={"format": "svg", "min_edge_freq": 0})
        vis_factory.view(gviz)