Ejemplo n.º 1
0
def get_cycle_time(log: Union[EventLog, pd.DataFrame]) -> float:
    """
    Calculates the cycle time of the event log.

    The definition that has been followed is the one proposed in:
    https://www.presentationeze.com/presentations/lean-manufacturing-just-in-time/lean-manufacturing-just-in-time-full-details/process-cycle-time-analysis/calculate-cycle-time/#:~:text=Cycle%20time%20%3D%20Average%20time%20between,is%2024%20minutes%20on%20average.

    So:
    Cycle time  = Average time between completion of units.

    Example taken from the website:
    Consider a manufacturing facility, which is producing 100 units of product per 40 hour week.
    The average throughput rate is 1 unit per 0.4 hours, which is one unit every 24 minutes.
    Therefore the cycle time is 24 minutes on average.

    Parameters
    -----------------
    log
        Log object

    Returns
    -----------------
    cycle_time
        Cycle time (calculated with the aforementioned formula).
    """
    if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!")

    if check_is_pandas_dataframe(log):
        check_pandas_dataframe_columns(log)
        from pm4py.statistics.traces.cycle_time.pandas import get as cycle_time
        return cycle_time.apply(log, parameters=get_properties(log))
    else:
        from pm4py.statistics.traces.cycle_time.log import get as cycle_time
        return cycle_time.apply(log, parameters=get_properties(log))
Ejemplo n.º 2
0
def get_rework_cases_per_activity(log: Union[EventLog, pd.DataFrame]) -> Dict[str, int]:
    """
    Find out for which activities of the log the rework (more than one occurrence in the trace for the activity)
    occurs.
    The output is a dictionary associating to each of the aforementioned activities
    the number of cases for which the rework occurred.

    Parameters
    ------------------
    log
        Log object

    Returns
    ------------------
    rework_dictionary
        Dictionary associating to each of the aforementioned activities the number of cases for which the rework
        occurred.
    """
    if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!")

    if check_is_pandas_dataframe(log):
        check_pandas_dataframe_columns(log)
        from pm4py.statistics.rework.pandas import get as rework_get
        return rework_get.apply(log, parameters=get_properties(log))
    else:
        from pm4py.statistics.rework.log import get as rework_get
        return rework_get.apply(log, parameters=get_properties(log))
Ejemplo n.º 3
0
def get_variants_as_tuples(log: Union[EventLog, pd.DataFrame]) -> Dict[Tuple[str], List[Trace]]:
    """
    Gets the variants from the log
    (where the keys are tuples and not strings)

    Parameters
    --------------
    log
        Event log

    Returns
    --------------
    variants
        Dictionary of variants along with their count
    """
    if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!")

    import pm4py
    # the behavior of PM4Py is changed to allow this to work
    pm4py.util.variants_util.VARIANT_SPECIFICATION = pm4py.util.variants_util.VariantsSpecifications.LIST
    if check_is_pandas_dataframe(log):
        check_pandas_dataframe_columns(log)
        from pm4py.statistics.variants.pandas import get
        return get.get_variants_count(log, parameters=get_properties(log))
    else:
        from pm4py.statistics.variants.log import get
        return get.get_variants(log, parameters=get_properties(log))
Ejemplo n.º 4
0
def get_variants(log: Union[EventLog, pd.DataFrame]) -> Dict[str, List[Trace]]:
    """
    Gets the variants from the log

    Parameters
    --------------
    log
        Event log

    Returns
    --------------
    variants
        Dictionary of variants along with their count
    """
    if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!")

    import pm4py
    if pm4py.util.variants_util.VARIANT_SPECIFICATION == pm4py.util.variants_util.VariantsSpecifications.STRING:
        import warnings
        warnings.warn('pm4py.get_variants is deprecated. Please use pm4py.get_variants_as_tuples instead.')
    if pm4py.util.variants_util.VARIANT_SPECIFICATION == pm4py.util.variants_util.VariantsSpecifications.LIST:
        raise Exception('Please use pm4py.get_variants_as_tuples')
    if check_is_pandas_dataframe(log):
        check_pandas_dataframe_columns(log)
        from pm4py.statistics.variants.pandas import get
        return get.get_variants_count(log, parameters=get_properties(log))
    else:
        from pm4py.statistics.variants.log import get
        return get.get_variants(log, parameters=get_properties(log))
Ejemplo n.º 5
0
def get_all_case_durations(log: Union[EventLog, pd.DataFrame], business_hours: bool = False, worktiming: List[int] = [7, 17], weekends: List[int] = [6, 7]) -> List[float]:
    """
    Gets the durations of the cases in the event log

    Parameters
    ---------------
    log
        Event log
    business_hours
        Enables/disables the computation based on the business hours (default: False)
    worktiming
        (If the business hours are enabled) The hour range in which the resources of the log are working (default: 7 to 17)
    weekends
        (If the business hours are enabled) The weekends days (default: Saturday (6), Sunday (7))

    Returns
    ---------------
    durations
        Case durations (as list)
    """
    if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!")

    properties = copy(get_properties(log))
    properties["business_hours"] = business_hours
    properties["worktiming"] = worktiming
    properties["weekends"] = weekends
    if check_is_pandas_dataframe(log):
        check_pandas_dataframe_columns(log)
        from pm4py.statistics.traces.generic.pandas import case_statistics
        cd = case_statistics.get_cases_description(log, parameters=properties)
        return sorted([x["caseDuration"] for x in cd.values()])
    else:
        from pm4py.statistics.traces.generic.log import case_statistics
        return case_statistics.get_all_case_durations(log, parameters=properties)
Ejemplo n.º 6
0
def insert_artificial_start_end(
        log: Union[EventLog, pd.DataFrame]) -> Union[EventLog, pd.DataFrame]:
    """
    Inserts the artificial start/end activities in an event log / Pandas dataframe

    Parameters
    ------------------
    log
        Event log / Pandas dataframe

    Returns
    ------------------
    log
        Event log / Pandas dataframe with artificial start / end activities
    """
    properties = get_properties(log)
    if check_is_pandas_dataframe(log):
        check_pandas_dataframe_columns(log)
        from pm4py.objects.log.util import dataframe_utils
        return dataframe_utils.insert_artificial_start_end(
            log, parameters=properties)
    else:
        from pm4py.objects.log.util import artificial
        return artificial.insert_artificial_start_end(log,
                                                      parameters=properties)
Ejemplo n.º 7
0
def filter_variants_top_k(log: Union[EventLog, pd.DataFrame],
                          k: int) -> Union[EventLog, pd.DataFrame]:
    """
    Keeps the top-k variants of the log

    Parameters
    -------------
    log
        Event log
    k
        Number of variants that should be kept
    parameters
        Parameters

    Returns
    -------------
    filtered_log
        Filtered log
    """
    if type(log) not in [pd.DataFrame, EventLog, EventStream]:
        raise Exception(
            "the method can be applied only to a traditional event log!")

    parameters = get_properties(log)
    if check_is_pandas_dataframe(log):
        check_pandas_dataframe_columns(log)
        from pm4py.algo.filtering.pandas.variants import variants_filter
        return variants_filter.filter_variants_top_k(log,
                                                     k,
                                                     parameters=parameters)
    else:
        from pm4py.algo.filtering.log.variants import variants_filter
        return variants_filter.filter_variants_top_k(log,
                                                     k,
                                                     parameters=parameters)
Ejemplo n.º 8
0
def get_event_attribute_values(log: Union[EventLog, pd.DataFrame], attribute: str, count_once_per_case=False) -> Dict[str, int]:
    """
    Returns the values for a specified attribute

    Parameters
    ---------------
    log
        Log object
    attribute
        Attribute
    count_once_per_case
        If True, consider only an occurrence of the given attribute value inside a case
        (if there are multiple events sharing the same attribute value, count only 1 occurrence)

    Returns
    ---------------
    attribute_values
        Dictionary of values along with their count
    """
    if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!")

    parameters = get_properties(log)
    parameters["keep_once_per_case"] = count_once_per_case
    if check_is_pandas_dataframe(log):
        check_pandas_dataframe_columns(log)
        from pm4py.statistics.attributes.pandas import get
        return get.get_attribute_values(log, attribute, parameters=parameters)
    else:
        from pm4py.statistics.attributes.log import get
        return get.get_attribute_values(log, attribute, parameters=parameters)
Ejemplo n.º 9
0
def save_vis_events_per_time_graph(log: Union[EventLog, pd.DataFrame],
                                   file_path: str):
    """
    Saves the events per time graph in the specified path

    Parameters
    ----------------
    log
        Log object
    file_path
        Destination path
    """
    if type(log) not in [pd.DataFrame, EventLog, EventStream]:
        raise Exception(
            "the method can be applied only to a traditional event log!")

    if check_is_pandas_dataframe(log):
        check_pandas_dataframe_columns(log)
        from pm4py.statistics.attributes.pandas import get as attributes_get
        graph = attributes_get.get_kde_date_attribute(
            log, parameters=get_properties(log))
    else:
        from pm4py.statistics.attributes.log import get as attributes_get
        graph = attributes_get.get_kde_date_attribute(
            log, parameters=get_properties(log))
    format = os.path.splitext(file_path)[1][1:]
    from pm4py.visualization.graphs import visualizer as graphs_visualizer
    graph_vis = graphs_visualizer.apply(
        graph[0],
        graph[1],
        variant=graphs_visualizer.Variants.DATES,
        parameters={"format": format})
    graphs_visualizer.save(graph_vis, file_path)
Ejemplo n.º 10
0
def get_trace_attribute_values(log: Union[EventLog, pd.DataFrame], attribute: str) -> Dict[str, int]:
    """
    Returns the values for a specified trace attribute

    Parameters
    ---------------
    log
        Log object
    attribute
        Attribute

    Returns
    ---------------
    attribute_values
        Dictionary of values along with their count
    """
    if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!")

    if check_is_pandas_dataframe(log):
        check_pandas_dataframe_columns(log)
        from pm4py.statistics.attributes.pandas import get
        return get.get_attribute_values(log, attribute)
    else:
        from pm4py.statistics.attributes.log import get
        return get.get_trace_attribute_values(log, attribute)
Ejemplo n.º 11
0
def view_events_per_time_graph(log: Union[EventLog, pd.DataFrame],
                               format: str = "png"):
    """
    Visualizes the events per time graph

    Parameters
    -----------------
    log
        Log object
    format
        Format of the visualization (png, svg, ...)
    """
    if type(log) not in [pd.DataFrame, EventLog, EventStream]:
        raise Exception(
            "the method can be applied only to a traditional event log!")

    if check_is_pandas_dataframe(log):
        check_pandas_dataframe_columns(log)
        from pm4py.statistics.attributes.pandas import get as attributes_get
        graph = attributes_get.get_kde_date_attribute(
            log, parameters=get_properties(log))
    else:
        from pm4py.statistics.attributes.log import get as attributes_get
        graph = attributes_get.get_kde_date_attribute(
            log, parameters=get_properties(log))
    from pm4py.visualization.graphs import visualizer as graphs_visualizer
    graph_vis = graphs_visualizer.apply(
        graph[0],
        graph[1],
        variant=graphs_visualizer.Variants.DATES,
        parameters={"format": format})
    graphs_visualizer.view(graph_vis)
Ejemplo n.º 12
0
def discover_handover_of_work_network(log: Union[EventLog, pd.DataFrame], beta=0):
    """
    Calculates the handover of work network of the event log.
    The handover of work network is essentially the DFG of the event log, however, using the
    resource as a node of the graph, instead of the activity.
    As such, to use this, resource information should be present in the event log.

    Parameters
    ---------------
    log
        Event log or Pandas dataframe
    beta
        beta parameter for Handover metric

    Returns
    ---------------
    metric_values
        Values of the metric
    """
    if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!")

    from pm4py.algo.organizational_mining.sna import algorithm as sna
    parameters = get_properties(log)
    parameters["beta"] = beta
    if check_is_pandas_dataframe(log):
        check_pandas_dataframe_columns(log)
        return sna.apply(log, variant=sna.Variants.HANDOVER_PANDAS, parameters=parameters)
    else:
        return sna.apply(log, variant=sna.Variants.HANDOVER_LOG, parameters=parameters)
Ejemplo n.º 13
0
def discover_organizational_roles(log: Union[EventLog, pd.DataFrame]):
    """
    Mines the organizational roles

    Parameters
    ---------------
    log
        Event log or Pandas dataframe

    Returns
    ---------------
    roles
        Organizational roles. List where each role is a sublist with two elements:
        - The first element of the sublist is the list of activities belonging to a role.
        Each activity belongs to a single role
        - The second element of the sublist is a dictionary containing the resources of the role
        and the number of times they executed activities belonging to the role.
    """
    if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!")

    from pm4py.algo.organizational_mining.roles import algorithm as roles
    if check_is_pandas_dataframe(log):
        check_pandas_dataframe_columns(log)
        return roles.apply(log, variant=roles.Variants.PANDAS, parameters=get_properties(log))
    else:
        return roles.apply(log, variant=roles.Variants.LOG, parameters=get_properties(log))
Ejemplo n.º 14
0
def discover_subcontracting_network(log: Union[EventLog, pd.DataFrame], n=2):
    """
    Calculates the subcontracting network of the process.

    Parameters
    ---------------
    log
        Event log or Pandas dataframe
    n
        n parameter for Subcontracting metric

    Returns
    ---------------
    metric_values
        Values of the metric
    """
    if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!")

    from pm4py.algo.organizational_mining.sna import algorithm as sna
    parameters = get_properties(log)
    parameters["n"] = n
    if check_is_pandas_dataframe(log):
        check_pandas_dataframe_columns(log)
        return sna.apply(log, variant=sna.Variants.SUBCONTRACTING_PANDAS, parameters=parameters)
    else:
        return sna.apply(log, variant=sna.Variants.SUBCONTRACTING_LOG, parameters=parameters)
Ejemplo n.º 15
0
def filter_activities_rework(
        log: Union[EventLog, pd.DataFrame],
        activity: str,
        min_occurrences: int = 2) -> Union[EventLog, pd.DataFrame]:
    """
    Filters the event log, keeping the cases where the specified activity occurs at least min_occurrences times.

    Parameters
    -----------------
    log
        Event log / Pandas dataframe
    activity
        Activity
    min_occurrences
        Minimum desidered number of occurrences

    Returns
    -----------------
    filtered_log
        Log with cases having at least min_occurrences occurrences of the given activity
    """
    if type(log) not in [pd.DataFrame, EventLog, EventStream]:
        raise Exception(
            "the method can be applied only to a traditional event log!")

    parameters = get_properties(log)
    parameters["min_occurrences"] = min_occurrences
    if check_is_pandas_dataframe(log):
        check_pandas_dataframe_columns(log)
        from pm4py.algo.filtering.pandas.rework import rework_filter
        return rework_filter.apply(log, activity, parameters=parameters)
    else:
        from pm4py.algo.filtering.log.rework import rework_filter
        return rework_filter.apply(log, activity, parameters=parameters)
Ejemplo n.º 16
0
def filter_event_attribute_values(
        log: Union[EventLog, pd.DataFrame],
        attribute_key: str,
        values: Union[Set[str], List[str]],
        level: str = "case",
        retain: bool = True) -> Union[EventLog, pd.DataFrame]:
    """
    Filter a log object on the values of some event attribute

    Parameters
    --------------
    log
        Log object
    attribute_key
        Attribute to filter
    values
        Admitted (or forbidden) values
    level
        Specifies how the filter should be applied ('case' filters the cases where at least one occurrence happens,
        'event' filter the events eventually trimming the cases)
    retain
        Specified if the values should be kept or removed

    Returns
    --------------
    filtered_log
        Filtered log object
    """
    if type(log) not in [pd.DataFrame, EventLog, EventStream]:
        raise Exception(
            "the method can be applied only to a traditional event log!")

    parameters = get_properties(log)
    parameters[constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY] = attribute_key
    if check_is_pandas_dataframe(log):
        check_pandas_dataframe_columns(log)
        from pm4py.algo.filtering.pandas.attributes import attributes_filter
        if level == "event":
            parameters[attributes_filter.Parameters.POSITIVE] = retain
            return attributes_filter.apply_events(log,
                                                  values,
                                                  parameters=parameters)
        elif level == "case":
            parameters[attributes_filter.Parameters.POSITIVE] = retain
            return attributes_filter.apply(log, values, parameters=parameters)
    else:
        from pm4py.algo.filtering.log.attributes import attributes_filter
        if level == "event":
            parameters[attributes_filter.Parameters.POSITIVE] = retain
            return attributes_filter.apply_events(log,
                                                  values,
                                                  parameters=parameters)
        elif level == "case":
            parameters[attributes_filter.Parameters.POSITIVE] = retain
            return attributes_filter.apply(log, values, parameters=parameters)
Ejemplo n.º 17
0
def project_on_event_attribute(log: Union[EventLog, pd.DataFrame], attribute_key=xes_constants.DEFAULT_NAME_KEY) -> \
List[List[str]]:
    """
    Project the event log on a specified event attribute. The result is a list, containing a list for each case:
    all the cases are transformed to list of values for the specified attribute.

    Parameters
    --------------------
    log
        Event log / Pandas dataframe
    attribute_key
        The attribute to be used

    Returns
    --------------------
    projected_cases
        Projection on the given attribute (a list containing, for each case, a list of its values for the
        specified attribute).

        Example:

        pm4py.project_on_event_attribute(log, "concept:name")

        [['register request', 'examine casually', 'check ticket', 'decide', 'reinitiate request', 'examine thoroughly', 'check ticket', 'decide', 'pay compensation'],
        ['register request', 'check ticket', 'examine casually', 'decide', 'pay compensation'],
        ['register request', 'examine thoroughly', 'check ticket', 'decide', 'reject request'],
        ['register request', 'examine casually', 'check ticket', 'decide', 'pay compensation'],
        ['register request', 'examine casually', 'check ticket', 'decide', 'reinitiate request', 'check ticket', 'examine casually', 'decide', 'reinitiate request', 'examine casually', 'check ticket', 'decide', 'reject request'],
        ['register request', 'check ticket', 'examine thoroughly', 'decide', 'reject request']]
    """
    if type(log) not in [pd.DataFrame, EventLog, EventStream]:
        raise Exception(
            "the method can be applied only to a traditional event log!")

    output = []
    if pandas_utils.check_is_pandas_dataframe(log):
        pandas_utils.check_pandas_dataframe_columns(log)
        from pm4py.streaming.conversion import from_pandas
        it = from_pandas.apply(
            log,
            parameters={from_pandas.Parameters.ACTIVITY_KEY: attribute_key})
        for trace in it:
            output.append([
                x[xes_constants.DEFAULT_NAME_KEY]
                if xes_constants.DEFAULT_NAME_KEY is not None else None
                for x in trace
            ])
    else:
        for trace in log:
            output.append([
                x[attribute_key] if attribute_key is not None else None
                for x in trace
            ])
    return output
Ejemplo n.º 18
0
def discover_performance_dfg(log: Union[EventLog, pd.DataFrame], business_hours: bool = False, worktiming: List[int] = [7, 17], weekends: List[int] = [6, 7], workcalendar=constants.DEFAULT_BUSINESS_HOURS_WORKCALENDAR) -> Tuple[dict, dict, dict]:
    """
    Discovers a performance directly-follows graph from an event log

    Parameters
    ---------------
    log
        Event log
    business_hours
        Enables/disables the computation based on the business hours (default: False)
    worktiming
        (If the business hours are enabled) The hour range in which the resources of the log are working (default: 7 to 17)
    weekends
        (If the business hours are enabled) The weekends days (default: Saturday (6), Sunday (7))

    Returns
    ---------------
    performance_dfg
        Performance DFG
    start_activities
        Start activities
    end_activities
        End activities
    """
    if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!")

    if check_is_pandas_dataframe(log):
        check_pandas_dataframe_columns(log)
        from pm4py.util import constants
        properties = get_properties(log)
        from pm4py.algo.discovery.dfg.adapters.pandas.df_statistics import get_dfg_graph
        activity_key = properties[constants.PARAMETER_CONSTANT_ACTIVITY_KEY] if constants.PARAMETER_CONSTANT_ACTIVITY_KEY in properties else xes_constants.DEFAULT_NAME_KEY
        timestamp_key = properties[constants.PARAMETER_CONSTANT_TIMESTAMP_KEY] if constants.PARAMETER_CONSTANT_TIMESTAMP_KEY in properties else xes_constants.DEFAULT_TIMESTAMP_KEY
        case_id_key = properties[constants.PARAMETER_CONSTANT_CASEID_KEY] if constants.PARAMETER_CONSTANT_CASEID_KEY in properties else constants.CASE_CONCEPT_NAME
        dfg = get_dfg_graph(log, activity_key=activity_key, timestamp_key=timestamp_key, case_id_glue=case_id_key, measure="performance", perf_aggregation_key="all",
                            business_hours=business_hours, worktiming=worktiming, weekends=weekends, workcalendar=workcalendar)
        from pm4py.statistics.start_activities.pandas import get as start_activities_module
        from pm4py.statistics.end_activities.pandas import get as end_activities_module
        start_activities = start_activities_module.get_start_activities(log, parameters=properties)
        end_activities = end_activities_module.get_end_activities(log, parameters=properties)
    else:
        from pm4py.algo.discovery.dfg.variants import performance as dfg_discovery
        properties = get_properties(log)
        properties[dfg_discovery.Parameters.AGGREGATION_MEASURE] = "all"
        properties[dfg_discovery.Parameters.BUSINESS_HOURS] = business_hours
        properties[dfg_discovery.Parameters.WORKTIMING] = worktiming
        properties[dfg_discovery.Parameters.WEEKENDS] = weekends
        dfg = dfg_discovery.apply(log, parameters=properties)
        from pm4py.statistics.start_activities.log import get as start_activities_module
        from pm4py.statistics.end_activities.log import get as end_activities_module
        start_activities = start_activities_module.get_start_activities(log, parameters=properties)
        end_activities = end_activities_module.get_end_activities(log, parameters=properties)
    return dfg, start_activities, end_activities
Ejemplo n.º 19
0
def filter_between(log: Union[EventLog, pd.DataFrame], act1: str,
                   act2: str) -> Union[EventLog, pd.DataFrame]:
    """
    Finds all the sub-cases leading from an event with activity "act1" to an event with activity "act2" in the log,
    and returns a log containing only them.

    Example:

    Log
    A B C D E F
    A B E F C
    A B F C B C B E F C

    act1 = B
    act2 = C

    Returned sub-cases:
    B C (from the first case)
    B E F C (from the second case)
    B F C (from the third case)
    B C (from the third case)
    B E F C (from the third case)

    Parameters
    -----------------
    log
        Event log / Pandas dataframe
    act1
        Source activity
    act2
        Target activity

    Returns
    -----------------
    filtered_log
        Log containing all the subcases
    """
    if type(log) not in [pd.DataFrame, EventLog, EventStream]:
        raise Exception(
            "the method can be applied only to a traditional event log!")

    parameters = get_properties(log)
    if check_is_pandas_dataframe(log):
        check_pandas_dataframe_columns(log)
        from pm4py.algo.filtering.pandas.between import between_filter
        return between_filter.apply(log, act1, act2, parameters=parameters)
    else:
        from pm4py.algo.filtering.log.between import between_filter
        return between_filter.apply(log, act1, act2, parameters=parameters)
Ejemplo n.º 20
0
def filter_suffixes(log: Union[EventLog, pd.DataFrame],
                    activity: str,
                    strict=True,
                    first_or_last="first"):
    """
    Filters the log, keeping the suffixes from a given activity. E.g., for a log with traces:

    A,B,C,D
    A,B,Z,A,B,C,D
    A,B,C,D,C,E,C,F

    The suffixes from "C" are respectively:

    D
    D
    D,C,E,C,F

    Parameters
    ------------------
    log
        Event log / Pandas dataframe
    activity
        Target activity of the filter
    strict
        Applies the filter strictly (cuts the occurrences of the selected activity).
    first_or_last
        Decides if the first or last occurrence of an activity should be selected as baseline for the filter.

    Returns
    ------------------
    filtered_log
        Filtered log / dataframe
    """
    if type(log) not in [pd.DataFrame, EventLog, EventStream]:
        raise Exception(
            "the method can be applied only to a traditional event log!")

    parameters = get_properties(log)
    parameters["strict"] = strict
    parameters["first_or_last"] = first_or_last

    if check_is_pandas_dataframe(log):
        check_pandas_dataframe_columns(log)
        from pm4py.algo.filtering.pandas.suffixes import suffix_filter
        return suffix_filter.apply(log, activity, parameters=parameters)
    else:
        from pm4py.algo.filtering.log.suffixes import suffix_filter
        return suffix_filter.apply(log, activity, parameters=parameters)
Ejemplo n.º 21
0
def __builds_events_distribution_graph(log: Union[EventLog, pd.DataFrame],
                                       distr_type: str = "days_week"):
    """
    Internal method to build the events distribution graph
    """
    if type(log) not in [pd.DataFrame, EventLog, EventStream]:
        raise Exception(
            "the method can be applied only to a traditional event log!")

    if distr_type == "days_month":
        title = "Distribution of the Events over the Days of a Month"
        x_axis = "Day of month"
        y_axis = "Number of Events"
    elif distr_type == "months":
        title = "Distribution of the Events over the Months"
        x_axis = "Month"
        y_axis = "Number of Events"
    elif distr_type == "years":
        title = "Distribution of the Events over the Years"
        x_axis = "Year"
        y_axis = "Number of Events"
    elif distr_type == "hours":
        title = "Distribution of the Events over the Hours"
        x_axis = "Hour (of day)"
        y_axis = "Number of Events"
    elif distr_type == "days_week":
        title = "Distribution of the Events over the Days of a Week"
        x_axis = "Day of the Week"
        y_axis = "Number of Events"
    elif distr_type == "weeks":
        title = "Distribution of the Events over the Weeks of a Year"
        x_axis = "Week of the Year"
        y_axis = "Number of Events"
    else:
        raise Exception("unsupported distribution specified.")

    if check_is_pandas_dataframe(log):
        check_pandas_dataframe_columns(log)
        from pm4py.statistics.attributes.pandas import get as attributes_get
        x, y = attributes_get.get_events_distribution(
            log, distr_type=distr_type, parameters=get_properties(log))
    else:
        from pm4py.statistics.attributes.log import get as attributes_get
        x, y = attributes_get.get_events_distribution(
            log, distr_type=distr_type, parameters=get_properties(log))

    return title, x_axis, y_axis, x, y
Ejemplo n.º 22
0
def filter_log_relative_occurrence_event_attribute(
        log: Union[EventLog, pd.DataFrame],
        min_relative_stake: float,
        attribute_key: str = xes_constants.DEFAULT_NAME_KEY,
        level="cases") -> Union[EventLog, pd.DataFrame]:
    """
    Filters the event log keeping only the events having an attribute value which occurs:
    - in at least the specified (min_relative_stake) percentage of events, when level="events"
    - in at least the specified (min_relative_stake) percentage of cases, when level="cases"

    Parameters
    -------------------
    log
        Event log / Pandas dataframe
    min_relative_stake
        Minimum percentage of cases (expressed as a number between 0 and 1) in which the attribute should occur.
    attribute_key
        The attribute to filter
    level
        The level of the filter (if level="events", then events / if level="cases", then cases)

    Returns
    ------------------
    filtered_log
        Filtered event log
    """
    if type(log) not in [pd.DataFrame, EventLog, EventStream]:
        raise Exception(
            "the method can be applied only to a traditional event log!")

    parameters = get_properties(log)
    if check_is_pandas_dataframe(log):
        check_pandas_dataframe_columns(log)
        from pm4py.algo.filtering.pandas.attributes import attributes_filter
        parameters[attributes_filter.Parameters.ATTRIBUTE_KEY] = attribute_key
        parameters[attributes_filter.Parameters.
                   KEEP_ONCE_PER_CASE] = True if level == "cases" else False
        return attributes_filter.filter_df_relative_occurrence_event_attribute(
            log, min_relative_stake, parameters=parameters)
    else:
        from pm4py.algo.filtering.log.attributes import attributes_filter
        parameters[attributes_filter.Parameters.ATTRIBUTE_KEY] = attribute_key
        parameters[attributes_filter.Parameters.
                   KEEP_ONCE_PER_CASE] = True if level == "cases" else False
        return attributes_filter.filter_log_relative_occurrence_event_attribute(
            log, min_relative_stake, parameters=parameters)
Ejemplo n.º 23
0
def filter_paths_performance(log: Union[EventLog, pd.DataFrame],
                             path: Tuple[str, str],
                             min_performance: float,
                             max_performance: float,
                             keep=True) -> Union[EventLog, pd.DataFrame]:
    """
    Filters the event log, either:
    - (keep=True) keeping the cases having the specified path (tuple of 2 activities) with a duration included between min_performance and max_performance
    - (keep=False) discarding the cases having the specified path with a duration included between min_performance and max_performance

    Parameters
    ----------------
    log
        Event log
    path
        Tuple of two activities (source_activity, target_activity)
    min_performance
        Minimum allowed performance (of the path)
    max_performance
        Maximum allowed performance (of the path)
    keep
        Keep/discard the cases having the specified path with a duration included between min_performance and max_performance

    Returns
    ----------------
    filtered_log
        Filtered log with the desidered behavior
    """
    if type(log) not in [pd.DataFrame, EventLog, EventStream]:
        raise Exception(
            "the method can be applied only to a traditional event log!")

    parameters = get_properties(log)
    parameters["positive"] = keep
    parameters["min_performance"] = min_performance
    parameters["max_performance"] = max_performance
    path = tuple(path)
    if check_is_pandas_dataframe(log):
        check_pandas_dataframe_columns(log)
        from pm4py.algo.filtering.pandas.paths import paths_filter
        return paths_filter.apply_performance(log, path, parameters=parameters)
    else:
        from pm4py.algo.filtering.log.paths import paths_filter
        return paths_filter.apply_performance(log, path, parameters=parameters)
Ejemplo n.º 24
0
def discover_dfg(log: Union[EventLog, pd.DataFrame]) -> Tuple[dict, dict, dict]:
    """
    Discovers a DFG from a log

    Parameters
    --------------
    log
        Event log

    Returns
    --------------
    dfg
        DFG
    start_activities
        Start activities
    end_activities
        End activities
    """
    if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!")

    if check_is_pandas_dataframe(log):
        check_pandas_dataframe_columns(log)
        from pm4py.util import constants
        properties = get_properties(log)
        from pm4py.algo.discovery.dfg.adapters.pandas.df_statistics import get_dfg_graph
        activity_key = properties[constants.PARAMETER_CONSTANT_ACTIVITY_KEY] if constants.PARAMETER_CONSTANT_ACTIVITY_KEY in properties else xes_constants.DEFAULT_NAME_KEY
        timestamp_key = properties[constants.PARAMETER_CONSTANT_TIMESTAMP_KEY] if constants.PARAMETER_CONSTANT_TIMESTAMP_KEY in properties else xes_constants.DEFAULT_TIMESTAMP_KEY
        case_id_key = properties[constants.PARAMETER_CONSTANT_CASEID_KEY] if constants.PARAMETER_CONSTANT_CASEID_KEY in properties else constants.CASE_CONCEPT_NAME
        dfg = get_dfg_graph(log, activity_key=activity_key,
                            timestamp_key=timestamp_key,
                            case_id_glue=case_id_key)
        from pm4py.statistics.start_activities.pandas import get as start_activities_module
        from pm4py.statistics.end_activities.pandas import get as end_activities_module
        start_activities = start_activities_module.get_start_activities(log, parameters=properties)
        end_activities = end_activities_module.get_end_activities(log, parameters=properties)
    else:
        from pm4py.algo.discovery.dfg import algorithm as dfg_discovery
        dfg = dfg_discovery.apply(log, parameters=get_properties(log))
        from pm4py.statistics.start_activities.log import get as start_activities_module
        from pm4py.statistics.end_activities.log import get as end_activities_module
        start_activities = start_activities_module.get_start_activities(log, parameters=get_properties(log))
        end_activities = end_activities_module.get_end_activities(log, parameters=get_properties(log))
    return dfg, start_activities, end_activities
Ejemplo n.º 25
0
def filter_trace_attribute_values(
        log: Union[EventLog, pd.DataFrame],
        attribute_key: str,
        values: Union[Set[str], List[str]],
        retain: bool = True) -> Union[EventLog, pd.DataFrame]:
    """
    Filter a log on the values of a trace attribute

    Parameters
    --------------
    log
        Event log
    attribute_key
        Attribute to filter
    values
        Values to filter (list of)
    retain
        Boolean value (keep/discard matching traces)

    Returns
    --------------
    filtered_log
        Filtered event log
    """
    if type(log) not in [pd.DataFrame, EventLog, EventStream]:
        raise Exception(
            "the method can be applied only to a traditional event log!")

    parameters = get_properties(log)
    parameters[constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY] = attribute_key
    if check_is_pandas_dataframe(log):
        check_pandas_dataframe_columns(log)
        from pm4py.algo.filtering.pandas.attributes import attributes_filter
        parameters[attributes_filter.Parameters.POSITIVE] = retain
        return attributes_filter.apply(log, values, parameters=parameters)
    else:
        from pm4py.algo.filtering.log.attributes import attributes_filter
        parameters[attributes_filter.Parameters.POSITIVE] = retain
        return attributes_filter.apply_trace_attribute(log,
                                                       values,
                                                       parameters=parameters)
Ejemplo n.º 26
0
def filter_end_activities(
        log: Union[EventLog, pd.DataFrame],
        activities: Union[Set[str], List[str]],
        retain: bool = True) -> Union[EventLog, pd.DataFrame]:
    """
    Filter cases having an end activity in the provided list

    Parameters
    ---------------
    log
        Log object
    activities
        List of admitted end activities
    retain
        if True, we retain the traces containing the given activities, if false, we drop the traces


    Returns
    ---------------
    filtered_log
        Filtered log object
    """
    if type(log) not in [pd.DataFrame, EventLog, EventStream]:
        raise Exception(
            "the method can be applied only to a traditional event log!")

    parameters = get_properties(log)
    if check_is_pandas_dataframe(log):
        check_pandas_dataframe_columns(log)
        from pm4py.algo.filtering.pandas.end_activities import end_activities_filter
        parameters[end_activities_filter.Parameters.POSITIVE] = retain
        return end_activities_filter.apply(log,
                                           activities,
                                           parameters=parameters)
    else:
        from pm4py.algo.filtering.log.end_activities import end_activities_filter
        parameters[end_activities_filter.Parameters.POSITIVE] = retain
        return end_activities_filter.apply(log,
                                           activities,
                                           parameters=parameters)
Ejemplo n.º 27
0
def get_event_attributes(log: Union[EventLog, pd.DataFrame]) -> List[str]:
    """
    Returns the attributes at the event level of the log

    Parameters
    ---------------
    log
        Log object

    Returns
    ---------------
    attributes_list
        List of attributes contained in the log
    """
    if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!")

    if check_is_pandas_dataframe(log):
        check_pandas_dataframe_columns(log)
        return list(log.columns)
    else:
        from pm4py.statistics.attributes.log import get
        return list(get.get_all_event_attributes_from_log(log))
Ejemplo n.º 28
0
def discover_activity_based_resource_similarity(log: Union[EventLog, pd.DataFrame]):
    """
    Calculates similarity between the resources in the event log, based on their activity profiles.

    Parameters
    ---------------
    log
        Event log or Pandas dataframe

    Returns
    ---------------
    metric_values
        Values of the metric
    """
    if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!")

    from pm4py.algo.organizational_mining.sna import algorithm as sna
    if check_is_pandas_dataframe(log):
        check_pandas_dataframe_columns(log)
        return sna.apply(log, variant=sna.Variants.JOINTACTIVITIES_PANDAS, parameters=get_properties(log))
    else:
        return sna.apply(log, variant=sna.Variants.JOINTACTIVITIES_LOG, parameters=get_properties(log))
Ejemplo n.º 29
0
def filter_case_performance(
        log: Union[EventLog, pd.DataFrame], min_performance: float,
        max_performance: float) -> Union[EventLog, pd.DataFrame]:
    """
    Filters the event log, keeping the cases having a duration (the timestamp of the last event minus the timestamp
    of the first event) included between min_performance and max_performance

    Parameters
    ----------------
    log
        Event log / Pandas dataframe
    min_performance
        Minimum allowed case duration
    max_performance
        Maximum allowed case duration

    Returns
    ----------------
    filtered_log
        Log with cases having a duration in the specified range
    """
    if type(log) not in [pd.DataFrame, EventLog, EventStream]:
        raise Exception(
            "the method can be applied only to a traditional event log!")

    parameters = get_properties(log)
    if check_is_pandas_dataframe(log):
        check_pandas_dataframe_columns(log)
        from pm4py.algo.filtering.pandas.cases import case_filter
        return case_filter.filter_case_performance(log,
                                                   min_performance,
                                                   max_performance,
                                                   parameters=parameters)
    else:
        from pm4py.algo.filtering.log.cases import case_filter
        return case_filter.filter_case_performance(log,
                                                   min_performance,
                                                   max_performance,
                                                   parameters=parameters)
Ejemplo n.º 30
0
def discover_eventually_follows_graph(log: Union[EventLog, pd.DataFrame]) -> Dict[Tuple[str, str], int]:
    """
    Gets the eventually follows graph from a log object

    Parameters
    ---------------
    log
        Log object

    Returns
    ---------------
    eventually_follows_graph
        Dictionary of tuples of activities that eventually follows each other; along with the number of occurrences
    """
    if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!")

    if check_is_pandas_dataframe(log):
        check_pandas_dataframe_columns(log)
        from pm4py.statistics.eventually_follows.pandas import get
        return get.apply(log, parameters=get_properties(log))
    else:
        from pm4py.statistics.eventually_follows.log import get
        return get.apply(log, parameters=get_properties(log))