Example #1
0
 def test_importExportCSVtoCSV(self):
     # to avoid static method warnings in tests,
     # that by construction of the unittest package have to be expressed in such way
     self.dummy_variable = "dummy_value"
     df = pd.read_csv(os.path.join(INPUT_DATA_DIR, "running-example.csv"))
     df = dataframe_utils.convert_timestamp_columns_in_df(df)
     event_log = log_conversion.apply(
         df, variant=log_conversion.TO_EVENT_STREAM)
     event_log = sorting.sort_timestamp(event_log)
     event_log = sampling.sample(event_log)
     event_log = index_attribute.insert_event_index_as_event_attribute(
         event_log)
     log = log_conversion.apply(event_log)
     log = sorting.sort_timestamp(log)
     log = sampling.sample(log)
     log = index_attribute.insert_trace_index_as_event_attribute(log)
     event_log_transformed = log_conversion.apply(
         log, variant=log_conversion.TO_EVENT_STREAM)
     df = log_conversion.apply(event_log_transformed,
                               variant=log_conversion.TO_DATA_FRAME)
     df.to_csv(os.path.join(OUTPUT_DATA_DIR,
                            "running-example-exported.csv"))
     df = pd.read_csv(
         os.path.join(OUTPUT_DATA_DIR, "running-example-exported.csv"))
     df = dataframe_utils.convert_timestamp_columns_in_df(df)
     event_log_imported_after_export = log_conversion.apply(
         df, variant=log_conversion.TO_EVENT_STREAM)
     log_imported_after_export = log_conversion.apply(
         event_log_imported_after_export)
     self.assertEqual(len(log), len(log_imported_after_export))
     os.remove(os.path.join(OUTPUT_DATA_DIR,
                            "running-example-exported.csv"))
Example #2
0
def apply(file_path, return_obj_dataframe=False, parameters=None):
    if parameters is None:
        parameters = {}

    db = sqlite3.connect(file_path)

    df = pd.read_sql_query("SELECT * FROM event_log", db)

    col = list(df.columns)
    rep_dict = {}
    for x in col:
        if x.startswith("case_"):
            rep_dict[x] = x.split("case_")[1]
        else:
            rep_dict[x] = "event_" + x
    df = df.rename(columns=rep_dict)

    df = df.dropna(subset=["event_id"])
    df = df.dropna(subset=["event_activity"])
    df = df.dropna(subset=["event_timestamp"])

    df = dataframe_utils.convert_timestamp_columns_in_df(df)
    """print(df)

    ot_columns = [x for x in df.columns if not x.startswith("event_")]
    for ot in ot_columns:
        df[ot] = df[ot].apply(lambda x: eval(x))

    print(df)"""

    if return_obj_dataframe:
        ot_df = pd.read_sql_query("SELECT * FROM object_types", db)
        ot_df = ot_df.dropna(subset=["NAME"])

        OT = list(ot_df["NAME"])

        obj_df_list = []

        for ot in OT:
            o_df = pd.read_sql_query("SELECT * FROM " + ot, db)

            col = list(o_df.columns)
            rep_dict = {}
            for x in col:
                rep_dict[x] = "object_" + x
            o_df = o_df.rename(columns=rep_dict)

            o_df = o_df.dropna(subset=["object_id"])
            o_df["object_type"] = ot

            o_df = dataframe_utils.convert_timestamp_columns_in_df(o_df)

            obj_df_list.append(o_df)

        obj_df = pd.concat(obj_df_list)

        return df, obj_df

    return df
Example #3
0
def execute_script():
    df = pd.read_csv("../tests/input_data/interval_event_log.csv")
    df = dataframe_utils.convert_timestamp_columns_in_df(df)
    act_count = dict(df["concept:name"].value_counts())
    parameters = {}
    parameters[
        constants.PARAMETER_CONSTANT_START_TIMESTAMP_KEY] = "start_timestamp"
    parameters[constants.PARAMETER_CONSTANT_TIMESTAMP_KEY] = "time:timestamp"
    parameters["format"] = "svg"
    start_activities = sa_get.get_start_activities(df, parameters=parameters)
    end_activities = ea_get.get_end_activities(df, parameters=parameters)
    parameters["start_activities"] = start_activities
    parameters["end_activities"] = end_activities
    soj_time = soj_time_get.apply(df, parameters=parameters)
    dfg, performance_dfg = correlation_miner.apply(
        df, variant=correlation_miner.Variants.CLASSIC, parameters=parameters)
    gviz_freq = dfg_vis.apply(dfg,
                              activities_count=act_count,
                              soj_time=soj_time,
                              variant=dfg_vis.Variants.FREQUENCY,
                              parameters=parameters)
    dfg_vis.view(gviz_freq)
    gviz_perf = dfg_vis.apply(performance_dfg,
                              activities_count=act_count,
                              soj_time=soj_time,
                              variant=dfg_vis.Variants.PERFORMANCE,
                              parameters=parameters)
    dfg_vis.view(gviz_perf)
Example #4
0
 def test_passedtime_prepost_df(self):
     df = pd.read_csv(os.path.join("input_data", "running-example.csv"))
     df = dataframe_utils.convert_timestamp_columns_in_df(df)
     prepost = df_passed_time.apply(df,
                                    "decide",
                                    variant=df_passed_time.Variants.PREPOST)
     del prepost
Example #5
0
def apply(net,
          initial_marking=None,
          final_marking=None,
          log=None,
          aggregated_statistics=None,
          parameters=None,
          variant=Variants.WO_DECORATION):
    if parameters is None:
        parameters = {}
    if log is not None:
        if pkgutil.find_loader("pandas"):
            import pandas
            from pm4py.objects.log.util import dataframe_utils

            if isinstance(log, pandas.core.frame.DataFrame):
                log = dataframe_utils.convert_timestamp_columns_in_df(log)
        log = log_conversion.apply(log, parameters,
                                   log_conversion.TO_EVENT_LOG)
    return exec_utils.get_variant(variant).apply(
        net,
        initial_marking,
        final_marking,
        log=log,
        aggregated_statistics=aggregated_statistics,
        parameters=parameters)
 def test_attrValueDifferentPersons_neg(self):
     df = pd.read_csv(os.path.join("input_data", "running-example.csv"))
     df = dataframe_utils.convert_timestamp_columns_in_df(df)
     attr_value_different_persons_neg = ltl_checker.attr_value_different_persons(
         df,
         "check ticket",
         parameters={ltl_checker.Parameters.POSITIVE: False})
 def test_filtering_traces_attribute_in_timeframe(self):
     input_log = os.path.join(INPUT_DATA_DIR, "receipt.csv")
     df = pd.read_csv(input_log)
     df = dataframe_utils.convert_timestamp_columns_in_df(df)
     df1 = timestamp_filter.filter_traces_attribute_in_timeframe(
         df, "concept:name", "Confirmation of receipt",
         "2011-03-09 00:00:00", "2012-01-18 23:59:59")
Example #8
0
 def test_heuplusplus_perf_df(self):
     df = pd.read_csv(os.path.join(INPUT_DATA_DIR,
                                   "interval_event_log.csv"))
     df = dataframe_utils.convert_timestamp_columns_in_df(df)
     heu_net = heuristics_miner.Variants.PLUSPLUS.value.apply_heu_pandas(
         df, parameters={"heu_net_decoration": "performance"})
     gviz = hn_vis.apply(heu_net)
Example #9
0
 def test_performance_spectrum(self):
     log = xes_importer.apply(os.path.join("input_data", "running-example.xes"))
     from pm4py.algo.discovery.performance_spectrum import algorithm as pspectrum
     ps = pspectrum.apply(log, ["register request", "decide"])
     df = pd.read_csv(os.path.join("input_data", "running-example.csv"))
     df = dataframe_utils.convert_timestamp_columns_in_df(df)
     ps = pspectrum.apply(df, ["register request", "decide"])
Example #10
0
 def test_res_profiles_df(self):
     from pm4py.algo.organizational_mining.resource_profiles import algorithm
     log = pd.read_csv(
         os.path.join("..", "tests", "input_data", "running-example.csv"))
     log = dataframe_utils.convert_timestamp_columns_in_df(log)
     algorithm.distinct_activities(log, "2010-12-30 00:00:00",
                                   "2011-01-25 00:00:00", "Sara")
     algorithm.activity_frequency(log, "2010-12-30 00:00:00",
                                  "2011-01-25 00:00:00", "Sara", "decide")
     algorithm.activity_completions(log, "2010-12-30 00:00:00",
                                    "2011-01-25 00:00:00", "Sara")
     algorithm.case_completions(log, "2010-12-30 00:00:00",
                                "2011-01-25 00:00:00", "Pete")
     algorithm.fraction_case_completions(log, "2010-12-30 00:00:00",
                                         "2011-01-25 00:00:00", "Pete")
     algorithm.average_workload(log, "2010-12-30 00:00:00",
                                "2011-01-15 00:00:00", "Mike")
     algorithm.multitasking(log, "2010-12-30 00:00:00",
                            "2011-01-25 00:00:00", "Mike")
     algorithm.average_duration_activity(log, "2010-12-30 00:00:00",
                                         "2011-01-25 00:00:00", "Sue",
                                         "examine thoroughly")
     algorithm.average_case_duration(log, "2010-12-30 00:00:00",
                                     "2011-01-25 00:00:00", "Sue")
     algorithm.interaction_two_resources(log, "2010-12-30 00:00:00",
                                         "2011-01-25 00:00:00", "Mike",
                                         "Pete")
     algorithm.social_position(log, "2010-12-30 00:00:00",
                               "2011-01-25 00:00:00", "Sue")
Example #11
0
 def test_performance_spectrum_df(self):
     df = pd.read_csv(os.path.join("input_data", "receipt.csv"))
     df = dataframe_utils.convert_timestamp_columns_in_df(df)
     pspectr = df_pspectrum.apply(df, [
         "T02 Check confirmation of receipt",
         "T03 Adjust confirmation of receipt"
     ], 1000, {})
 def test_AeventuallyBeventuallyC_neg(self):
     df = pd.read_csv(os.path.join("input_data", "running-example.csv"))
     df = dataframe_utils.convert_timestamp_columns_in_df(df)
     filt_A_ev_B_ev_C_neg = ltl_checker.A_eventually_B_eventually_C(df, "check ticket", "decide",
                                                                    "pay compensation",
                                                                    parameters={
                                                                        ltl_checker.Parameters.POSITIVE: False})
Example #13
0
def apply(net: PetriNet,
          initial_marking: Marking = None,
          final_marking: Marking = None,
          log: Union[EventLog, EventStream, pd.DataFrame] = None,
          aggregated_statistics=None,
          parameters: Optional[Dict[Any, Any]] = None,
          variant=Variants.WO_DECORATION) -> graphviz.Digraph:
    if parameters is None:
        parameters = {}
    if log is not None:
        if pkgutil.find_loader("pandas"):
            import pandas
            from pm4py.objects.log.util import dataframe_utils

            if isinstance(log, pandas.core.frame.DataFrame):
                log = dataframe_utils.convert_timestamp_columns_in_df(log)
        log = log_conversion.apply(log, parameters,
                                   log_conversion.TO_EVENT_LOG)
    return exec_utils.get_variant(variant).apply(
        net,
        initial_marking,
        final_marking,
        log=log,
        aggregated_statistics=aggregated_statistics,
        parameters=parameters)
Example #14
0
 def test_fourEeyesPrinciple_neg(self):
     df = pd.read_csv(os.path.join("input_data", "running-example.csv"))
     df = dataframe_utils.convert_timestamp_columns_in_df(df)
     filt_foureyes_neg = ltl_checker.four_eyes_principle(
         df,
         "check ticket",
         "pay compensation",
         parameters={ltl_checker.Parameters.POSITIVE: False})
Example #15
0
def execute_script():
    dataframe = pd.read_csv("../tests/input_data/receipt.csv")
    dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe)
    tf = temporal_profile_discovery.apply(dataframe)
    conformance = temporal_profile_conformance.apply(dataframe, tf, parameters={"zeta": 6.0})
    for index, dev in enumerate(conformance):
        if len(dev) > 0:
            print(index, dev)
Example #16
0
 def test_efg_pandas(self):
     import pm4py
     import pandas as pd
     dataframe = pd.read_csv(os.path.join("input_data", "interval_event_log.csv"))
     from pm4py.objects.log.util import dataframe_utils
     dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe)
     from pm4py.statistics.eventually_follows.pandas import get
     efg = get.apply(dataframe, parameters={get.Parameters.START_TIMESTAMP_KEY: "start_timestamp"})
Example #17
0
def format_dataframe(df: pd.DataFrame,
                     case_id: str = constants.CASE_CONCEPT_NAME,
                     activity_key: str = xes_constants.DEFAULT_NAME_KEY,
                     timestamp_key: str = xes_constants.DEFAULT_TIMESTAMP_KEY,
                     timest_format: Optional[str] = None) -> pd.DataFrame:
    """
    Give the appropriate format on the dataframe, for process mining purposes

    Parameters
    --------------
    df
        Dataframe
    case_id
        Case identifier column
    activity_key
        Activity column
    timestamp_key
        Timestamp column
    timest_format
        Timestamp format that is provided to Pandas

    Returns
    --------------
    df
        Dataframe
    """
    from pm4py.objects.log.util import dataframe_utils
    if case_id not in df.columns:
        raise Exception(case_id + " column (case ID) is not in the dataframe!")
    if activity_key not in df.columns:
        raise Exception(activity_key +
                        " column (activity) is not in the dataframe!")
    if timestamp_key not in df.columns:
        raise Exception(timestamp_key +
                        " column (timestamp) is not in the dataframe!")
    df = df.rename(
        columns={
            case_id: constants.CASE_CONCEPT_NAME,
            activity_key: xes_constants.DEFAULT_NAME_KEY,
            timestamp_key: xes_constants.DEFAULT_TIMESTAMP_KEY
        })
    df[constants.CASE_CONCEPT_NAME] = df[constants.CASE_CONCEPT_NAME].astype(
        str)
    # makes sure that the timestamp column is of timestamp type
    df = dataframe_utils.convert_timestamp_columns_in_df(
        df,
        timest_format=timest_format,
        timest_columns=[xes_constants.DEFAULT_TIMESTAMP_KEY])
    # set an index column
    df = pandas_utils.insert_index(df, INDEX_COLUMN)
    # sorts the dataframe
    df = df.sort_values([
        constants.CASE_CONCEPT_NAME, xes_constants.DEFAULT_TIMESTAMP_KEY,
        INDEX_COLUMN
    ])
    # logging.warning(
    #    "please convert the dataframe for advanced process mining applications. log = pm4py.convert_to_event_log(df)")
    return df
Example #18
0
 def test_AnextBnextC_pos(self):
     df = pd.read_csv(os.path.join("input_data", "running-example.csv"))
     df = dataframe_utils.convert_timestamp_columns_in_df(df)
     filt_A_next_B_next_C_pos = ltl_checker.A_next_B_next_C(
         df,
         "check ticket",
         "decide",
         "pay compensation",
         parameters={ltl_checker.Parameters.POSITIVE: True})
Example #19
0
 def test_petrinet_receipt_df(self):
     # to avoid static method warnings in tests,
     # that by construction of the unittest package have to be expressed in such way
     self.dummy_variable = "dummy_value"
     df = pd.read_csv(os.path.join(INPUT_DATA_DIR, "receipt.csv"))
     df = dataframe_utils.convert_timestamp_columns_in_df(df)
     net, im, fm = heuristics_miner.apply(df)
     gviz = pn_vis.apply(net, im, fm)
     del gviz
 def test_autofiltering_dataframe(self):
     # to avoid static method warnings in tests,
     # that by construction of the unittest package have to be expressed in such way
     self.dummy_variable = "dummy_value"
     input_log = os.path.join(INPUT_DATA_DIR, "running-example.csv")
     dataframe = pd.read_csv(input_log)
     dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe)
     dataframe = auto_filter.apply_auto_filter(dataframe)
     del dataframe
 def csv_loghandler(self):
     """
     Loads the CSV type log from the path
     :return: The log
     """
     log_csv = pd.read_csv(self.pathlog, sep=',')
     log_csv = dataframe_utils.convert_timestamp_columns_in_df(log_csv)
     log_csv = log_csv.sort_values('<timestamp_column>')
     event_log = log_converter.apply(log_csv)
     return event_log
Example #22
0
 def test_attr_value_repetition(self):
     from pm4py.algo.filtering.pandas.attr_value_repetition import filter
     df = pd.read_csv(os.path.join("input_data", "running-example.csv"))
     df = dataframe_utils.convert_timestamp_columns_in_df(df)
     filtered_df = filter.apply(
         df,
         "Sara",
         parameters={
             constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY: "org:resource"
         })
Example #23
0
 def test_pdimp_xesexp(self):
     log0 = pd.read_csv(os.path.join("input_data", "running-example.csv"))
     log0 = dataframe_utils.convert_timestamp_columns_in_df(log0)
     log = log_conversion.apply(log0, variant=log_conversion.TO_EVENT_LOG)
     stream = log_conversion.apply(log0, variant=log_conversion.TO_EVENT_STREAM)
     df = log_conversion.apply(log0, variant=log_conversion.TO_DATA_FRAME)
     xes_exporter.apply(log, "ru.xes")
     xes_exporter.apply(stream, "ru.xes")
     xes_exporter.apply(df, "ru.xes")
     os.remove('ru.xes')
Example #24
0
    def test_dfCasedurationPlotSemilogx(self):
        # to avoid static method warnings in tests,
        # that by construction of the unittest package have to be expressed in such way
        self.dummy_variable = "dummy_value"

        df = pd.read_csv(os.path.join("input_data", "receipt.csv"))
        df = dataframe_utils.convert_timestamp_columns_in_df(df)
        x, y = pd_case_statistics.get_kde_caseduration(df)
        json = pd_case_statistics.get_kde_caseduration_json(df)
        del json
Example #25
0
 def test_footprints_tree_df(self):
     df = pd.read_csv(os.path.join("input_data", "running-example.csv"))
     df = dataframe_utils.convert_timestamp_columns_in_df(df)
     from pm4py.algo.discovery.inductive import algorithm as inductive_miner
     log = converter.apply(df)
     tree = inductive_miner.apply_tree(log)
     from pm4py.algo.discovery.footprints import algorithm as footprints_discovery
     fp_df = footprints_discovery.apply(df)
     fp_tree = footprints_discovery.apply(tree)
     from pm4py.algo.conformance.footprints import algorithm as footprints_conformance
     conf = footprints_conformance.apply(fp_df, fp_tree)
Example #26
0
def execute_script():
    df = pd.read_csv("../tests/input_data/receipt.csv")
    df = dataframe_utils.convert_timestamp_columns_in_df(df)
    act_count = dict(df["concept:name"].value_counts())
    dfg, performance_dfg = correlation_miner.apply(df, variant=correlation_miner.Variants.CLASSIC)
    gviz_freq = dfg_vis.apply(dfg, activities_count=act_count, variant=dfg_vis.Variants.FREQUENCY,
                              parameters={"format": "svg"})
    dfg_vis.view(gviz_freq)
    gviz_perf = dfg_vis.apply(performance_dfg, activities_count=act_count, variant=dfg_vis.Variants.PERFORMANCE,
                              parameters={"format": "svg"})
    dfg_vis.view(gviz_perf)
Example #27
0
 def test_filtering_variants(self):
     # to avoid static method warnings in tests,
     # that by construction of the unittest package have to be expressed in such way
     self.dummy_variable = "dummy_value"
     input_log = os.path.join(INPUT_DATA_DIR, "running-example.csv")
     dataframe = pd.read_csv(input_log)
     dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe)
     variants = case_statistics.get_variant_statistics(dataframe)
     chosen_variants = [variants[0]["variant"]]
     dataframe = variants_filter.apply(dataframe, chosen_variants)
     del dataframe
Example #28
0
 def test_inductiveminer_df(self):
     log = pd.read_csv(os.path.join("input_data", "running-example.csv"))
     log = dataframe_utils.convert_timestamp_columns_in_df(log)
     net, im, fm = inductive_miner.apply(log)
     aligned_traces_tr = tr_alg.apply(log, net, im, fm)
     aligned_traces_alignments = align_alg.apply(log, net, im, fm)
     evaluation = eval_alg.apply(log, net, im, fm)
     fitness = rp_fit.apply(log, net, im, fm)
     precision = precision_evaluator.apply(log, net, im, fm)
     gen = generalization.apply(log, net, im, fm)
     sim = simplicity.apply(net)
Example #29
0
 def test_inductiveminer_stream(self):
     df = pd.read_csv(os.path.join("input_data", "running-example.csv"))
     df = dataframe_utils.convert_timestamp_columns_in_df(df)
     stream = log_conversion.apply(df, variant=log_conversion.TO_EVENT_STREAM)
     net, im, fm = inductive_miner.apply(stream)
     aligned_traces_tr = tr_alg.apply(stream, net, im, fm)
     aligned_traces_alignments = align_alg.apply(stream, net, im, fm)
     evaluation = eval_alg.apply(stream, net, im, fm)
     fitness = rp_fit.apply(stream, net, im, fm)
     precision = precision_evaluator.apply(stream, net, im, fm)
     gen = generalization.apply(stream, net, im, fm)
     sim = simplicity.apply(net)
Example #30
0
    def test_dfDateAttribute(self):
        # to avoid static method warnings in tests,
        # that by construction of the unittest package have to be expressed in such way
        self.dummy_variable = "dummy_value"

        df = pd.read_csv(os.path.join("input_data",
                                      "roadtraffic100traces.csv"))
        df = dataframe_utils.convert_timestamp_columns_in_df(df)

        x, y = pd_attributes_filter.get_kde_date_attribute(df)
        json = pd_attributes_filter.get_kde_date_attribute_json(df)
        del json