Example #1
0
 def test_case_statistics(self):
     from pm4py.statistics.traces.pandas import case_statistics
     df = self.get_dataframe()
     case_statistics.get_cases_description(df)
     case_statistics.get_variants_df(df)
     case_statistics.get_variant_statistics(df)
     #case_statistics.get_variant_statistics_with_case_duration(df)
     case_statistics.get_events(df, "N77802")
     case_statistics.get_variants_df_with_case_duration(df)
     case_statistics.get_variants_df_and_list(df)
     case_statistics.get_kde_caseduration(df)
Example #2
0
def get_case_duration_svg(dataframe, parameters=None):
    """
    Gets the SVG of the case duration graph

    Parameters
    -------------
    dataframe
        Dataframe
    parameters
        Possible parameters of the algorithm

    Returns
    -------------
    graph
        Case duration graph
    """
    if parameters is None:
        parameters = {}

    x, y = case_statistics.get_kde_caseduration(dataframe, parameters)
    gviz = graphs_factory.apply_plot(x,
                                     y,
                                     variant="cases",
                                     parameters={"format": "svg"})

    gviz_base64 = base64.b64encode(str(gviz).encode('utf-8'))

    ret = []
    for i in range(len(x)):
        ret.append((x[i], y[i]))

    return get_base64_from_file(gviz), gviz_base64, ret
Example #3
0
    def test_dfCasedurationPlotSemilogx(self):
        # to avoid static method warnings in tests,
        # that by construction of the unittest package have to be expressed in such way
        self.dummy_variable = "dummy_value"

        df = csv_import_adapter.import_dataframe_from_path(
            os.path.join("input_data", "receipt.csv"))
        x, y = pd_case_statistics.get_kde_caseduration(df)
        json = pd_case_statistics.get_kde_caseduration_json(df)
        del json
Example #4
0
    def test_dfCasedurationPlotSemilogx(self):
        # to avoid static method warnings in tests,
        # that by construction of the unittest package have to be expressed in such way
        self.dummy_variable = "dummy_value"

        df = pd.read_csv(os.path.join("input_data", "receipt.csv"))
        df = dataframe_utils.convert_timestamp_columns_in_df(df)
        x, y = pd_case_statistics.get_kde_caseduration(df)
        json = pd_case_statistics.get_kde_caseduration_json(df)
        del json
Example #5
0
def view_case_duration_graph(log: Union[EventLog, pd.DataFrame], format: str = "png"):
    """
    Visualizes the case duration graph

    Parameters
    -----------------
    log
        Log object
    format
        Format of the visualization (png, svg, ...)
    """
    if check_is_dataframe(log):
        check_dataframe_columns(log)
        from pm4py.statistics.traces.pandas import case_statistics
        graph = case_statistics.get_kde_caseduration(log)
    else:
        from pm4py.statistics.traces.log import case_statistics
        graph = case_statistics.get_kde_caseduration(log)
    from pm4py.visualization.graphs import visualizer as graphs_visualizer
    graph_vis = graphs_visualizer.apply(graph[0], graph[1], variant=graphs_visualizer.Variants.CASES,
                                          parameters={"format": format})
    graphs_visualizer.view(graph_vis)
Example #6
0
def save_vis_case_duration_graph(log: Union[EventLog, pd.DataFrame], file_path: str):
    """
    Saves the case duration graph in the specified path

    Parameters
    ----------------
    log
        Log object
    file_path
        Destination path
    """
    if check_is_dataframe(log):
        check_dataframe_columns(log)
        from pm4py.statistics.traces.pandas import case_statistics
        graph = case_statistics.get_kde_caseduration(log)
    else:
        from pm4py.statistics.traces.log import case_statistics
        graph = case_statistics.get_kde_caseduration(log)
    format = file_path[file_path.index(".") + 1:].lower()
    from pm4py.visualization.graphs import visualizer as graphs_visualizer
    graph_vis = graphs_visualizer.apply(graph[0], graph[1], variant=graphs_visualizer.Variants.CASES,
                                          parameters={"format": format})
    graphs_visualizer.save(graph_vis, file_path)
Example #7
0
    import os
    import pandas as pd
    from pm4py.objects.log.util import dataframe_utils

    log_path = os.path.join("..", "..", "tests", "input_data",
                            "running-example.csv")

    dataset = pd.read_csv(log_path)
    dataset = dataframe_utils.convert_timestamp_columns_in_df(dataset)

import pandas as pd

# this part is required because the dataframe provided by PowerBI has strings
dataset["time:timestamp"] = pd.to_datetime(dataset["time:timestamp"])
dataset = dataset.sort_values("time:timestamp")

from pm4py.statistics.traces.pandas import case_statistics
from pm4py.visualization.graphs import visualizer as graphs_visualizer

x_cases, y_cases = case_statistics.get_kde_caseduration(dataset)

graph_cases = graphs_visualizer.apply(
    x_cases,
    y_cases,
    variant=graphs_visualizer.Variants.CASES,
    parameters={
        graphs_visualizer.Variants.CASES.value.Parameters.FORMAT: "png"
    })

graphs_visualizer.matplotlib_view(graph_cases)