Python import_dataframe_from_path Examples, pm4py.objects.log.adapters.pandas.csv_import_adapter.import_dataframe_from_path Python Examples

Example #1

0

Show file

    def build_from_csv(self, path, parameters=None):
        """
        Builds the handler from the specified path to CSV file

        Parameters
        -------------
        path
            Path to the log file
        parameters
            Parameters of the algorithm
        """
        if parameters is None:
            parameters = {}
        activity_key = parameters[
            constants.PARAMETER_CONSTANT_ACTIVITY_KEY] if constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else None
        timestamp_key = parameters[
            constants.PARAMETER_CONSTANT_TIMESTAMP_KEY] if constants.PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else None
        case_id_glue = parameters[
            constants.PARAMETER_CONSTANT_CASEID_KEY] if constants.PARAMETER_CONSTANT_CASEID_KEY in parameters else None
        recognized_format = format_recognition.get_format_from_csv(path)
        sep = parameters["sep"] if "sep" in parameters else recognized_format.delimiter
        quotechar = parameters["quotechar"] if "quotechar" in parameters else recognized_format.quotechar
        if quotechar is not None:
            self.dataframe = csv_import_adapter.import_dataframe_from_path(path, sep=sep, quotechar=quotechar)
        else:
            self.dataframe = csv_import_adapter.import_dataframe_from_path(path, sep=sep)
        case_id_glue1, activity_key1, timestamp_key1 = assign_column_correspondence(self.dataframe)
        if case_id_glue is None:
            case_id_glue = case_id_glue1
        if activity_key is None:
            activity_key = activity_key1
        if timestamp_key is None:
            timestamp_key = timestamp_key1
        if not activity_key == xes.DEFAULT_NAME_KEY:
            self.dataframe[xes.DEFAULT_NAME_KEY] = self.dataframe[activity_key]
        if not timestamp_key == xes.DEFAULT_TIMESTAMP_KEY:
            self.dataframe[xes.DEFAULT_TIMESTAMP_KEY] = self.dataframe[timestamp_key]
        if not case_id_glue == CASE_CONCEPT_NAME:
            self.dataframe[CASE_CONCEPT_NAME] = self.dataframe[case_id_glue]
        self.postloading_processing_dataframe()
        self.dataframe = self.dataframe.sort_values([DEFAULT_TIMESTAMP_KEY, ws_constants.DEFAULT_EVENT_INDEX_KEY])
        if not str(self.dataframe[CASE_CONCEPT_NAME].dtype) == "object":
            self.dataframe[CASE_CONCEPT_NAME] = self.dataframe[CASE_CONCEPT_NAME].astype(str)
        if not ws_constants.DEFAULT_CASE_INDEX_KEY in self.dataframe:
            self.dataframe[ws_constants.DEFAULT_CASE_INDEX_KEY] = self.dataframe.groupby(CASE_CONCEPT_NAME).ngroup()
        if not self.is_lazy:
            self.sort_dataframe_by_case_id()
            self.build_reduced_dataframe()
            self.build_variants_df()
            self.build_grouped_dataframe()
            self.build_reduced_grouped_dataframe()
            self.calculate_variants_number()
            self.calculate_cases_number()
            self.calculate_events_number()

Example #2

0

Show file

 def test_attrValueDifferentPersons_neg(self):
     df = csv_import_adapter.import_dataframe_from_path(
         os.path.join("input_data", "running-example.csv"))
     attr_value_different_persons_neg = ltl_checker.attr_value_different_persons(
         df,
         "check ticket",
         parameters={ltl_checker.Parameters.POSITIVE: False})

Example #3

0

Show file

File: old_factories_test.py Project: iliam/pm4py-core

 def test_alpha_miner_dataframe(self):
     from pm4py.objects.log.adapters.pandas import csv_import_adapter
     df = csv_import_adapter.import_dataframe_from_path(
         os.path.join("input_data", "running-example.csv"))
     from pm4py.algo.discovery.alpha import factory as alpha_miner
     net, im, fm = alpha_miner.apply(
         df, variant=alpha_miner.ALPHA_VERSION_CLASSIC)

Example #4

0

Show file

def execute_script():
    # loads the dataframe from the CSV file
    csv_path = os.path.join("..", "tests", "input_data", "running-example.csv")
    df = csv_import_adapter.import_dataframe_from_path(csv_path)
    # calculates the Matrix Container object
    mco = sna_transformer.apply(df)
    # calculates the Handover of Work matrix
    hw_matrix = handover_of_work.apply(mco)
    # calculates the Similar Activities matrix
    sim_act_matrix = similar_activities.apply(mco)
    # shows the Handover of Work graph
    gviz = sna_vis_factory.apply(mco, hw_matrix, parameters={"format": "svg"})
    sna_vis_factory.view(gviz)
    # shows the Similar Activities graph
    gviz = sna_vis_factory.apply(mco,
                                 sim_act_matrix,
                                 parameters={
                                     "format": "svg",
                                     "threshold": 0.0
                                 })
    sna_vis_factory.view(gviz)
    # calculates the Real Handover of Work matrix
    real_hw_matrix = real_handover_of_work.apply(mco,
                                                 parameters={"format": "svg"})
    gviz = sna_vis_factory.apply(mco, real_hw_matrix)

Example #5

0

Show file

 def test_fourEeyesPrinciple_neg(self):
     df = csv_import_adapter.import_dataframe_from_path(
         os.path.join("input_data", "running-example.csv"))
     filt_foureyes_neg = ltl_checker.four_eyes_principle(
         df,
         "check ticket",
         "pay compensation",
         parameters={ltl_checker.Parameters.POSITIVE: False})

Example #6

0

Show file

 def test_AeventuallyB_neg(self):
     df = csv_import_adapter.import_dataframe_from_path(
         os.path.join("input_data", "running-example.csv"))
     filt_A_ev_B_neg = ltl_checker.A_eventually_B(
         df,
         "check ticket",
         "pay compensation",
         parameters={ltl_checker.Parameters.POSITIVE: False})

Example #7

0

Show file

File: pandas_df_imp.py Project: gyunamister/pm4py-custom

def import_event_stream(path, parameters=None):
    """
    Imports a CSV file from the given path

    Parameters
    ----------
    path:
        Input CSV file path
    parameters
        Parameters of the algorithm, including
            sep -> column separator
            quotechar -> (if specified) Character that starts/end big strings in CSV
            nrows -> (if specified) Maximum number of rows to read from the CSV
            sort -> Boolean value that tells if the CSV should be ordered
            sort_field -> If sort option is enabled, then the CSV is automatically sorted by the specified column

     Returns
    -------
    log : :class:`pm4py.log.log.EventLog`
        An event log
    """

    sep = ","
    quotechar = None
    nrows = None
    sort = False
    sort_field = "time:timestamp"
    insert_event_indexes = False
    timest_format = None
    timest_columns = None

    if parameters is None:
        parameters = {}
    if "sep" in parameters:
        sep = parameters["sep"]
    if "quotechar" in parameters:
        quotechar = parameters["quotechar"]
    if "nrows" in parameters:
        nrows = parameters["nrows"]
    if "sort" in parameters:
        sort = parameters["sort"]
    if "sort_field" in parameters:
        sort_field = parameters["sort_field"]
    if "insert_event_indexes" in parameters:
        insert_event_indexes = parameters["insert_event_indexes"]
    if "timest_format" in parameters:
        timest_format = parameters["timest_format"]
    if "timest_columns" in parameters:
        timest_columns = parameters["timest_columns"]

    df = import_dataframe_from_path(path, sep=sep, quotechar=quotechar, nrows=nrows, sort=sort, sort_field=sort_field,
                                    timest_format=timest_format, timest_columns=timest_columns)
    event_log = log_conv_fact.apply(df, variant=log_conv_fact.TO_EVENT_STREAM)

    if insert_event_indexes:
        event_log.insert_event_index_as_event_attribute()

    return event_log

Example #8

0

Show file

def generate_dataframe(filename):
    try:
        # import csv into pandas dataframe by specifying the sep - seperator
        dataframe = csv_import_adapter.import_dataframe_from_path(filename,
                                                                  sep=",")
        return dataframe
    except FileNotFoundError:
        print("Invalid file name")
        exit()

Example #9

0

Show file

 def test_petrinet_receipt_df(self):
     # to avoid static method warnings in tests,
     # that by construction of the unittest package have to be expressed in such way
     self.dummy_variable = "dummy_value"
     df = csv_import_adapter.import_dataframe_from_path(
         os.path.join(INPUT_DATA_DIR, "running-example.csv"))
     net, im, fm = heuristics_miner.apply(df)
     gviz = pn_vis_factory.apply(net, im, fm)
     del gviz

Example #10

0

Show file

 def test_AnextBnextC_pos(self):
     df = csv_import_adapter.import_dataframe_from_path(
         os.path.join("input_data", "running-example.csv"))
     filt_A_next_B_next_C_pos = ltl_checker.A_next_B_next_C(
         df,
         "check ticket",
         "decide",
         "pay compensation",
         parameters={ltl_checker.Parameters.POSITIVE: True})

Example #11

0

Show file

File: old_factories_test.py Project: iliam/pm4py-core

 def test_performance_spectrum(self):
     log = xes_importer.apply(
         os.path.join("input_data", "running-example.xes"))
     from pm4py.statistics.performance_spectrum import factory as pspectrum
     ps = pspectrum.apply(log, ["register request", "decide"])
     from pm4py.objects.log.adapters.pandas import csv_import_adapter
     df = csv_import_adapter.import_dataframe_from_path(
         os.path.join("input_data", "running-example.csv"))
     ps = pspectrum.apply(df, ["register request", "decide"])

Example #12

0

Show file

    def export_results_to_process_log_xes(self):
        """
        Exports the results of the demo to an xes file
        with the same name as the previously exported csv file.
        We load this csv file and transform it into an xes file.
        :return:
        """

        # Create filename for current self.min_category_export_score
        self.results_xes_path = self.results_log_path_prefix + "_" + \
                                         str(int(round(self.min_category_export_score * 100))) + "_thresh.xes"

        # Read previously generated csv file an transform to log
        dataframe = csv_import_adapter.import_dataframe_from_path(
            self.results_log_csv_path, sep=",")
        log = conversion_factory.apply(
            dataframe,
            parameters={
                constants.PARAMETER_CONSTANT_CASEID_KEY: "case:concept:name",
                constants.PARAMETER_CONSTANT_ACTIVITY_KEY: "category_name",
                constants.PARAMETER_CONSTANT_TIMESTAMP_KEY: "time:timestamp"
            })

        # Add relevant data for ProM import
        log._classifiers = {
            'Event Name': ['concept:name'],
            '(Event Name AND Lifecycle transition)':
            ['concept:name', 'lifecycle:transition']
        }
        log._extensions = {
            'Time': {
                'prefix': 'time',
                'uri': 'http://www.xes-standard.org/time.xesext'
            },
            'Lifecycle': {
                'prefix': 'lifecycle',
                'uri': 'http://www.xes-standard.org/lifecycle.xesext'
            },
            'Concept': {
                'prefix': 'concept',
                'uri': 'http://www.xes-standard.org/concept.xesext'
            }
        }

        for trace in log._list:
            # set trace concept:name to str instead of int, also for ProM import
            trace._attributes["concept:name"] = str(
                trace._attributes["concept:name"])
            # Set org:resource to string as well
            for item in trace._list:
                item["org:resource"] = str(item["org:resource"])

        # Export results to xes
        xes_exporter.export_log(log, self.results_xes_path)

        logger.info("Exported demo detections to: %s" % self.results_xes_path)

Example #13

0

Show file

 def test_filtering_paths(self):
     # to avoid static method warnings in tests,
     # that by construction of the unittest package have to be expressed in such way
     self.dummy_variable = "dummy_value"
     input_log = os.path.join(INPUT_DATA_DIR, "running-example.csv")
     dataframe = csv_import_adapter.import_dataframe_from_path(input_log, sep=',')
     df3 = paths_filter.apply(dataframe, [("examine casually", "check ticket")], {"positive": False})
     del df3
     df3 = paths_filter.apply(dataframe, [("examine casually", "check ticket")], {"positive": True})
     del df3

Example #14

0

Show file

    def test_dfCasedurationPlotSemilogx(self):
        # to avoid static method warnings in tests,
        # that by construction of the unittest package have to be expressed in such way
        self.dummy_variable = "dummy_value"

        df = csv_import_adapter.import_dataframe_from_path(
            os.path.join("input_data", "receipt.csv"))
        x, y = pd_case_statistics.get_kde_caseduration(df)
        json = pd_case_statistics.get_kde_caseduration_json(df)
        del json

Example #15

0

Show file

    def test_dfDateAttribute(self):
        # to avoid static method warnings in tests,
        # that by construction of the unittest package have to be expressed in such way
        self.dummy_variable = "dummy_value"

        df = csv_import_adapter.import_dataframe_from_path(
            os.path.join("input_data", "receipt.csv"))
        x, y = pd_attributes_filter.get_kde_date_attribute(df)
        json = pd_attributes_filter.get_kde_date_attribute_json(df)
        del json

Example #16

0

Show file

 def test_inductiveminer_df(self):
     log = csv_import_adapter.import_dataframe_from_path(
         os.path.join("input_data", "running-example.csv"))
     net, im, fm = inductive_miner.apply(log)
     aligned_traces_tr = tr_factory.apply(log, net, im, fm)
     aligned_traces_alignments = align_factory.apply(log, net, im, fm)
     evaluation = eval_factory.apply(log, net, im, fm)
     fitness = rp_fit_factory.apply(log, net, im, fm)
     precision = precision_factory.apply(log, net, im, fm)
     generalization = generalization_factory.apply(log, net, im, fm)
     simplicity = simplicity_factory.apply(net)

Example #17

0

Show file

File: other_tests.py Project: iliam/pm4py-core

 def test_footprints_tree_df(self):
     df = csv_import_adapter.import_dataframe_from_path(
         os.path.join("input_data", "running-example.csv"))
     from pm4py.algo.discovery.inductive import algorithm as inductive_miner
     log = converter.apply(df)
     tree = inductive_miner.apply_tree(log)
     from pm4py.algo.discovery.footprints import algorithm as footprints_discovery
     fp_df = footprints_discovery.apply(df)
     fp_tree = footprints_discovery.apply(tree)
     from pm4py.algo.conformance.footprints import algorithm as footprints_conformance
     conf = footprints_conformance.apply(fp_df, fp_tree)

Example #18

0

Show file

File: sna_test.py Project: luisfsts/pm4py-source

    def test_pandas(self):
        # to avoid static method warnings in tests,
        # that by construction of the unittest package have to be expressed in such way
        self.dummy_variable = "dummy_value"

        log = csv_import_adapter.import_dataframe_from_path(
            os.path.join("..", "tests", "input_data", "running-example.csv"))

        hw_values = sna_factory.apply(log, variant="handover")
        wt_values = sna_factory.apply(log, variant="working_together")
        sub_values = sna_factory.apply(log, variant="subcontracting")

Example #19

0

Show file

    def test_dfNumericAttribute(self):
        # to avoid static method warnings in tests,
        # that by construction of the unittest package have to be expressed in such way
        self.dummy_variable = "dummy_value"

        df = csv_import_adapter.import_dataframe_from_path(
            os.path.join("input_data", "roadtraffic100traces.csv"))
        x, y = pd_attributes_filter.get_kde_numeric_attribute(df, "amount")
        json = pd_attributes_filter.get_kde_numeric_attribute_json(
            df, "amount")
        del json

Example #20

0

Show file

 def test_filtering_timeframe(self):
     # to avoid static method warnings in tests,
     # that by construction of the unittest package have to be expressed in such way
     self.dummy_variable = "dummy_value"
     input_log = os.path.join(INPUT_DATA_DIR, "receipt.csv")
     df = csv_import_adapter.import_dataframe_from_path(input_log, sep=',')
     df1 = timestamp_filter.apply_events(df, "2011-03-09 00:00:00", "2012-01-18 23:59:59")
     df2 = timestamp_filter.filter_traces_intersecting(df, "2011-03-09 00:00:00", "2012-01-18 23:59:59")
     df3 = timestamp_filter.filter_traces_contained(df, "2011-03-09 00:00:00", "2012-01-18 23:59:59")
     del df1
     del df2
     del df3

Example #21

0

Show file

File: input_module.py Project: Elkoumy/amun

def read_xes(data_dir, dataset, aggregate_type, mode="pruning"):
    prune_parameter_freq = 350
    prune_parameter_time = -1  #keep all
    #read the xes file
    if dataset in "BPIC14":
        # log = csv_importer.import_event_stream(os.path.join(data_dir, dataset + ".csv"))
        data = csv_import_adapter.import_dataframe_from_path(os.path.join(
            data_dir, dataset + ".csv"),
                                                             sep=";")
        data['case:concept:name'] = data['Incident ID']
        data['time:timestamp'] = data['DateStamp']
        data['concept:name'] = data['IncidentActivity_Type']
        log = conversion_factory.apply(data)
    elif dataset == "Unrineweginfectie":
        data = csv_import_adapter.import_dataframe_from_path(os.path.join(
            data_dir, dataset + ".csv"),
                                                             sep=",")
        data['case:concept:name'] = data['Patientnummer']
        data['time:timestamp'] = data['Starttijd']
        data['concept:name'] = data['Aciviteit']
        log = conversion_factory.apply(data)
    else:
        log = xes_import_factory.apply(os.path.join(data_dir,
                                                    dataset + ".xes"))
        data = get_dataframe_from_event_stream(log)

    # dataframe = log_converter.apply(log, variant=log_converter.Variants.TO_DATA_FRAME)
    # dfg_freq = dfg_factory.apply(log,variant="frequency")
    # dfg_time =get_dfg_time(data,aggregate_type,dataset)

    if aggregate_type == AggregateType.FREQ:
        dfg = dfg_factory.apply(log, variant="frequency")
    else:
        dfg = get_dfg_time(data, aggregate_type, dataset)
    """Getting Start and End activities"""
    # log = xes_importer.import_log(xes_file)
    log_start = start_activities_filter.get_start_activities(log)
    log_end = end_activities_filter.get_end_activities(log)
    # return dfg_freq,dfg_time
    return dfg

Example #22

0

Show file

 def test_pdimp_xesexp(self):
     log0 = csv_import_adapter.import_dataframe_from_path(
         os.path.join("input_data", "running-example.csv"))
     log = log_conv_factory.apply(log0,
                                  variant=log_conv_factory.TO_EVENT_LOG)
     stream = log_conv_factory.apply(
         log0, variant=log_conv_factory.TO_EVENT_STREAM)
     df = log_conv_factory.apply(log0,
                                 variant=log_conv_factory.TO_DATAFRAME)
     xes_exporter_factory.apply(log, "ru.xes")
     xes_exporter_factory.apply(stream, "ru.xes")
     xes_exporter_factory.apply(df, "ru.xes")
     os.remove('ru.xes')

Example #23

0

Show file

File: footprint.py Project: K-u-K/cloud-process-music

def calculate_footprint_matrix(filename):
    output = os.path.splitext(filename)[0].lower()

    df = csv_import_adapter.import_dataframe_from_path(filename, sep=";")

    pitches = constants.PITCHES + ["Pause"]
    pitches_range = np.arange(len(pitches))
    footprint_matrix = pd.DataFrame('#',
                                    index=pitches_range,
                                    columns=pitches_range)

    rename_dict = {i: pitch for i, pitch in enumerate(pitches)}
    footprint_matrix.rename(index=rename_dict,
                            columns=rename_dict,
                            inplace=True)

    # Consider transitions only within a case
    # d = {}
    # for index, row in df.iterrows():
    #     if row['Case_ID'] not in d.keys():
    #         d[row['Case_ID']] = []
    #     d[row['Case_ID']].append(row['Event'])

    # Consider transitions beyond a case
    d = {"0": []}
    for _, row in df.iterrows():
        d["0"].append(row["Event"])

    for _, value in d.items():
        for prev, curr, next in zip([None] + value[:-1], value,
                                    value[1:] + [None]):
            if curr is None:
                continue

            if curr is not None and curr[:-1] in constants.PITCHES:
                curr = curr[:-1]
            if prev is not None and prev[:-1] in constants.PITCHES:
                prev = prev[:-1]
            if next is not None and next[:-1] in constants.PITCHES:
                next = next[:-1]

            if prev is not None:
                footprint_matrix = calculate_footprint_symbol(
                    footprint_matrix, prev, curr, '<=', '=>')

            if next is not None:
                footprint_matrix = calculate_footprint_symbol(
                    footprint_matrix, next, curr, '=>', '<=')

    return footprint_matrix

Example #24

0

Show file

File: logs_process_miner.py Project: AMOS-2020-Team-7/process-miner

def create_dataframe():
    """
    create dataframe
    """
    dataframe = csv_import_adapter.import_dataframe_from_path(
        'concatenated_files.csv', sep=",")
    dataframe = dataframe.rename(
        columns={
            'correlationId': 'case:concept:name',
            'timestamp': 'time:timestamp',
            'label': 'concept:name',
            'approach': 'case:approach',
            'errortype': 'case:errortype',
            'status': 'case:status'
        })
    return dataframe

Example #25

0

Show file

def execute_script():
    df = csv_import_adapter.import_dataframe_from_path(
        "../tests/input_data/receipt.csv")
    act_count = dict(df["concept:name"].value_counts())
    dfg, performance_dfg = correlation_miner.apply(
        df, variant=correlation_miner.Variants.CLASSIC)
    gviz_freq = dfg_vis.apply(dfg,
                              activities_count=act_count,
                              variant=dfg_vis.Variants.FREQUENCY,
                              parameters={"format": "svg"})
    dfg_vis.view(gviz_freq)
    gviz_perf = dfg_vis.apply(performance_dfg,
                              activities_count=act_count,
                              variant=dfg_vis.Variants.PERFORMANCE,
                              parameters={"format": "svg"})
    dfg_vis.view(gviz_perf)

Example #26

0

Show file

File: MockdataExample.py Project: AMOS-2020-Team-7/process-miner

def execute_script():
    # import csv & create log
    dataframe = csv_import_adapter.import_dataframe_from_path(
        datasourceMockdata(), sep=";")
    dataframe = dataframe.rename(columns={
        'coID': 'case:concept:name',
        'Activity': 'concept:name'
    })
    log = conversion_factory.apply(dataframe)

    # option 1: Directly-Follows Graph, represent frequency or performance
    parameters = {constants.PARAMETER_CONSTANT_ACTIVITY_KEY: "concept:name"}
    variant = 'frequency'
    dfg = dfg_factory.apply(log, variant=variant, parameters=parameters)
    gviz1 = dfg_vis_factory.apply(dfg,
                                  log=log,
                                  variant=variant,
                                  parameters=parameters)
    dfg_vis_factory.view(gviz1)

    # option 2: Heuristics Miner, acts on the Directly-Follows Graph, find common structures, output: Heuristic Net (.svg)
    heu_net = heuristics_miner.apply_heu(
        log,
        parameters={
            heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH:
            0.00
        })
    gviz2 = hn_vis.apply(
        heu_net,
        parameters={hn_vis.Variants.PYDOTPLUS.value.Parameters.FORMAT: "svg"})
    hn_vis.view(gviz2)

    # option 3: Petri Net based on Heuristic Miner (.png)
    net, im, fm = heuristics_miner.apply(
        log,
        parameters={
            heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH:
            0.00
        })
    gviz3 = petri_vis.apply(
        net,
        im,
        fm,
        parameters={
            petri_vis.Variants.WO_DECORATION.value.Parameters.FORMAT: "png"
        })
    petri_vis.view(gviz3)

Example #27

0

Show file

File: read.py Project: iliam/pm4py-core

def read_csv(file_path,
             sep=",",
             quotechar=None,
             encoding=None,
             nrows=None,
             timest_format=None):
    """
    Reads an event log in the CSV format (Pandas adapter)

    Parameters
    ----------------
    file_path
        File path
    sep
        Separator; default: ,
    quotechar
        Quote char; default: None
    encoding
        Encoding; default: default of Pandas
    nrows
        (If specified) number of rows
    timest_format
        Format of the timestamp columns

    Returns
    ----------------
    dataframe
        Dataframe
    """
    from pm4py.objects.log.adapters.pandas import csv_import_adapter
    df = csv_import_adapter.import_dataframe_from_path(
        file_path,
        sep=sep,
        quotechar=quotechar,
        encoding=encoding,
        nrows=nrows,
        timest_format=timest_format)
    if len(df.columns) < 2:
        logging.error(
            "Less than three columns were imported from the CSV file. Please check the specification of the separation and the quote character!"
        )
    else:
        logging.warning(
            "Please specify the format of the dataframe: df = pm4py.format_dataframe(df, case_id='<name of the case ID column>', activity_key='<name of the activity column>', timestamp_key='<name of the timestamp column>')"
        )

    return df

Example #28

0

Show file

File: xes.py Project: K-u-K/cloud-process-music

def export_to_xes(filename):
    output = os.path.splitext(filename)[0]

    df = csv_import_adapter.import_dataframe_from_path(filename, sep=";")
    df = df.rename(
        columns={
            "Case_ID": "case:concept:name",
            "Event": "concept:name",
            "Type": "org:type",
            "Order": "org:order",
            "Is_Chord": "org:is_chord"
        })

    # create internal XES log from pandas dataframe
    log = conversion_factory.apply(df)
    # save XES log

    xes_exporter.export_log(log, f"{output}.xes")

Example #29

0

Show file

File: OccurrencesPatterns.py Project: KDMG/process-mining

def main(path0, datasetname):

    a = len(path0)
    if path0[a-1] != '/':
        path = path0 + "/"
    else: path = path0


    df = csv_importer.import_dataframe_from_path(path + datasetname + "_table2_on_file.csv",
                                                 sep=";")
    patterns = create_patterns_list(path + datasetname + "_new_patterns_filtered.subs")
    rule = rules_log_manage(path + "rules_log.txt")

    data = prepare_data(patterns, df, rule)

    data.to_csv(r''+ path + datasetname + '_pattern_occurrence_matrix.csv', index=False)

    return "file creato correttamente!"

Example #30

0

Show file

File: pandas_df_imp.py Project: luisfsts/pm4py-source

def import_event_stream(path, parameters=None):
    """
    Imports a CSV file from the given path

    Parameters
    ----------
    path:
        Input CSV file path
    parameters
        Parameters of the algorithm, including
            Parameters.SEPARATOR -> column separator
            Parameters.QUOTECHAR -> (if specified) Character that starts/end big strings in CSV
            Parameters.NUM_ROWS -> (if specified) Maximum number of rows to read from the CSV
            Parameters.SORT -> Boolean value that tells if the CSV should be ordered
            Parameters.SORT_FIELD -> If sort option is enabled, then the CSV is automatically sorted by the specified column
            Parameters.INSERT_EVENT_INDICES -> Events get their index as an additional payload
            Parameters.TIME_STAMP_FORMAT -> Specify the timestamp format, if not specified, auto detection is applied
            Parameters.TIME_STAMP_COLUMNS -> Column names of data attributes that contain time stamps
            Parameters.ENCODING -> File Encoding

     Returns
    -------
    log : :class:`pm4py.log.log.EventLog`
        An event log
    """
    parameters = {} if parameters is None else parameters
    insert_event_indexes = exec_utils.get_param_value(Parameters.INSERT_EVENT_INDEXES, parameters, False)
    df = import_dataframe_from_path(path, sep=exec_utils.get_param_value(Parameters.SEP, parameters, ","),
                                    quotechar=exec_utils.get_param_value(Parameters.QUOTECHAR, parameters, None),
                                    nrows=exec_utils.get_param_value(Parameters.NROWS, parameters, None),
                                    sort=exec_utils.get_param_value(Parameters.SORT, parameters, False),
                                    sort_field=exec_utils.get_param_value(Parameters.SORT_FIELD, parameters, 'time:timestamp'),
                                    timest_format=exec_utils.get_param_value(Parameters.TIMEST_FORMAT, parameters, None),
                                    timest_columns=exec_utils.get_param_value(Parameters.TIMEST_COLUMNS, parameters, None),
                                    encoding=exec_utils.get_param_value(Parameters.ENCODING, parameters, None))
    stream = log_conv_fact.apply(df, variant=log_conv_fact.TO_EVENT_STREAM)

    if insert_event_indexes:
        stream.insert_event_index_as_event_attribute()
    return stream