Esempio n. 1
0
def save_vis_events_per_time_graph(log: Union[EventLog, pd.DataFrame],
                                   file_path: str):
    """
    Saves the events per time graph in the specified path

    Parameters
    ----------------
    log
        Log object
    file_path
        Destination path
    """
    if type(log) not in [pd.DataFrame, EventLog, EventStream]:
        raise Exception(
            "the method can be applied only to a traditional event log!")

    if check_is_pandas_dataframe(log):
        check_pandas_dataframe_columns(log)
        from pm4py.statistics.attributes.pandas import get as attributes_get
        graph = attributes_get.get_kde_date_attribute(
            log, parameters=get_properties(log))
    else:
        from pm4py.statistics.attributes.log import get as attributes_get
        graph = attributes_get.get_kde_date_attribute(
            log, parameters=get_properties(log))
    format = os.path.splitext(file_path)[1][1:]
    from pm4py.visualization.graphs import visualizer as graphs_visualizer
    graph_vis = graphs_visualizer.apply(
        graph[0],
        graph[1],
        variant=graphs_visualizer.Variants.DATES,
        parameters={"format": format})
    graphs_visualizer.save(graph_vis, file_path)
Esempio n. 2
0
def view_events_per_time_graph(log: Union[EventLog, pd.DataFrame],
                               format: str = "png"):
    """
    Visualizes the events per time graph

    Parameters
    -----------------
    log
        Log object
    format
        Format of the visualization (png, svg, ...)
    """
    if type(log) not in [pd.DataFrame, EventLog, EventStream]:
        raise Exception(
            "the method can be applied only to a traditional event log!")

    if check_is_pandas_dataframe(log):
        check_pandas_dataframe_columns(log)
        from pm4py.statistics.attributes.pandas import get as attributes_get
        graph = attributes_get.get_kde_date_attribute(
            log, parameters=get_properties(log))
    else:
        from pm4py.statistics.attributes.log import get as attributes_get
        graph = attributes_get.get_kde_date_attribute(
            log, parameters=get_properties(log))
    from pm4py.visualization.graphs import visualizer as graphs_visualizer
    graph_vis = graphs_visualizer.apply(
        graph[0],
        graph[1],
        variant=graphs_visualizer.Variants.DATES,
        parameters={"format": format})
    graphs_visualizer.view(graph_vis)
Esempio n. 3
0
def discover_organizational_roles(log: Union[EventLog, pd.DataFrame]):
    """
    Mines the organizational roles

    Parameters
    ---------------
    log
        Event log or Pandas dataframe

    Returns
    ---------------
    roles
        Organizational roles. List where each role is a sublist with two elements:
        - The first element of the sublist is the list of activities belonging to a role.
        Each activity belongs to a single role
        - The second element of the sublist is a dictionary containing the resources of the role
        and the number of times they executed activities belonging to the role.
    """
    if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!")

    from pm4py.algo.organizational_mining.roles import algorithm as roles
    if check_is_pandas_dataframe(log):
        check_pandas_dataframe_columns(log)
        return roles.apply(log, variant=roles.Variants.PANDAS, parameters=get_properties(log))
    else:
        return roles.apply(log, variant=roles.Variants.LOG, parameters=get_properties(log))
Esempio n. 4
0
def get_cycle_time(log: Union[EventLog, pd.DataFrame]) -> float:
    """
    Calculates the cycle time of the event log.

    The definition that has been followed is the one proposed in:
    https://www.presentationeze.com/presentations/lean-manufacturing-just-in-time/lean-manufacturing-just-in-time-full-details/process-cycle-time-analysis/calculate-cycle-time/#:~:text=Cycle%20time%20%3D%20Average%20time%20between,is%2024%20minutes%20on%20average.

    So:
    Cycle time  = Average time between completion of units.

    Example taken from the website:
    Consider a manufacturing facility, which is producing 100 units of product per 40 hour week.
    The average throughput rate is 1 unit per 0.4 hours, which is one unit every 24 minutes.
    Therefore the cycle time is 24 minutes on average.

    Parameters
    -----------------
    log
        Log object

    Returns
    -----------------
    cycle_time
        Cycle time (calculated with the aforementioned formula).
    """
    if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!")

    if check_is_pandas_dataframe(log):
        check_pandas_dataframe_columns(log)
        from pm4py.statistics.traces.cycle_time.pandas import get as cycle_time
        return cycle_time.apply(log, parameters=get_properties(log))
    else:
        from pm4py.statistics.traces.cycle_time.log import get as cycle_time
        return cycle_time.apply(log, parameters=get_properties(log))
Esempio n. 5
0
def get_rework_cases_per_activity(log: Union[EventLog, pd.DataFrame]) -> Dict[str, int]:
    """
    Find out for which activities of the log the rework (more than one occurrence in the trace for the activity)
    occurs.
    The output is a dictionary associating to each of the aforementioned activities
    the number of cases for which the rework occurred.

    Parameters
    ------------------
    log
        Log object

    Returns
    ------------------
    rework_dictionary
        Dictionary associating to each of the aforementioned activities the number of cases for which the rework
        occurred.
    """
    if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!")

    if check_is_pandas_dataframe(log):
        check_pandas_dataframe_columns(log)
        from pm4py.statistics.rework.pandas import get as rework_get
        return rework_get.apply(log, parameters=get_properties(log))
    else:
        from pm4py.statistics.rework.log import get as rework_get
        return rework_get.apply(log, parameters=get_properties(log))
Esempio n. 6
0
def get_variants_as_tuples(log: Union[EventLog, pd.DataFrame]) -> Dict[Tuple[str], List[Trace]]:
    """
    Gets the variants from the log
    (where the keys are tuples and not strings)

    Parameters
    --------------
    log
        Event log

    Returns
    --------------
    variants
        Dictionary of variants along with their count
    """
    if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!")

    import pm4py
    # the behavior of PM4Py is changed to allow this to work
    pm4py.util.variants_util.VARIANT_SPECIFICATION = pm4py.util.variants_util.VariantsSpecifications.LIST
    if check_is_pandas_dataframe(log):
        check_pandas_dataframe_columns(log)
        from pm4py.statistics.variants.pandas import get
        return get.get_variants_count(log, parameters=get_properties(log))
    else:
        from pm4py.statistics.variants.log import get
        return get.get_variants(log, parameters=get_properties(log))
Esempio n. 7
0
def get_variants(log: Union[EventLog, pd.DataFrame]) -> Dict[str, List[Trace]]:
    """
    Gets the variants from the log

    Parameters
    --------------
    log
        Event log

    Returns
    --------------
    variants
        Dictionary of variants along with their count
    """
    if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!")

    import pm4py
    if pm4py.util.variants_util.VARIANT_SPECIFICATION == pm4py.util.variants_util.VariantsSpecifications.STRING:
        import warnings
        warnings.warn('pm4py.get_variants is deprecated. Please use pm4py.get_variants_as_tuples instead.')
    if pm4py.util.variants_util.VARIANT_SPECIFICATION == pm4py.util.variants_util.VariantsSpecifications.LIST:
        raise Exception('Please use pm4py.get_variants_as_tuples')
    if check_is_pandas_dataframe(log):
        check_pandas_dataframe_columns(log)
        from pm4py.statistics.variants.pandas import get
        return get.get_variants_count(log, parameters=get_properties(log))
    else:
        from pm4py.statistics.variants.log import get
        return get.get_variants(log, parameters=get_properties(log))
Esempio n. 8
0
def discover_performance_dfg(log: Union[EventLog, pd.DataFrame], business_hours: bool = False, worktiming: List[int] = [7, 17], weekends: List[int] = [6, 7], workcalendar=constants.DEFAULT_BUSINESS_HOURS_WORKCALENDAR) -> Tuple[dict, dict, dict]:
    """
    Discovers a performance directly-follows graph from an event log

    Parameters
    ---------------
    log
        Event log
    business_hours
        Enables/disables the computation based on the business hours (default: False)
    worktiming
        (If the business hours are enabled) The hour range in which the resources of the log are working (default: 7 to 17)
    weekends
        (If the business hours are enabled) The weekends days (default: Saturday (6), Sunday (7))

    Returns
    ---------------
    performance_dfg
        Performance DFG
    start_activities
        Start activities
    end_activities
        End activities
    """
    if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!")

    if check_is_pandas_dataframe(log):
        check_pandas_dataframe_columns(log)
        from pm4py.util import constants
        properties = get_properties(log)
        from pm4py.algo.discovery.dfg.adapters.pandas.df_statistics import get_dfg_graph
        activity_key = properties[constants.PARAMETER_CONSTANT_ACTIVITY_KEY] if constants.PARAMETER_CONSTANT_ACTIVITY_KEY in properties else xes_constants.DEFAULT_NAME_KEY
        timestamp_key = properties[constants.PARAMETER_CONSTANT_TIMESTAMP_KEY] if constants.PARAMETER_CONSTANT_TIMESTAMP_KEY in properties else xes_constants.DEFAULT_TIMESTAMP_KEY
        case_id_key = properties[constants.PARAMETER_CONSTANT_CASEID_KEY] if constants.PARAMETER_CONSTANT_CASEID_KEY in properties else constants.CASE_CONCEPT_NAME
        dfg = get_dfg_graph(log, activity_key=activity_key, timestamp_key=timestamp_key, case_id_glue=case_id_key, measure="performance", perf_aggregation_key="all",
                            business_hours=business_hours, worktiming=worktiming, weekends=weekends, workcalendar=workcalendar)
        from pm4py.statistics.start_activities.pandas import get as start_activities_module
        from pm4py.statistics.end_activities.pandas import get as end_activities_module
        start_activities = start_activities_module.get_start_activities(log, parameters=properties)
        end_activities = end_activities_module.get_end_activities(log, parameters=properties)
    else:
        from pm4py.algo.discovery.dfg.variants import performance as dfg_discovery
        properties = get_properties(log)
        properties[dfg_discovery.Parameters.AGGREGATION_MEASURE] = "all"
        properties[dfg_discovery.Parameters.BUSINESS_HOURS] = business_hours
        properties[dfg_discovery.Parameters.WORKTIMING] = worktiming
        properties[dfg_discovery.Parameters.WEEKENDS] = weekends
        dfg = dfg_discovery.apply(log, parameters=properties)
        from pm4py.statistics.start_activities.log import get as start_activities_module
        from pm4py.statistics.end_activities.log import get as end_activities_module
        start_activities = start_activities_module.get_start_activities(log, parameters=properties)
        end_activities = end_activities_module.get_end_activities(log, parameters=properties)
    return dfg, start_activities, end_activities
Esempio n. 9
0
def conformance_diagnostics_alignments(log: EventLog, *args, multi_processing: bool = False) -> List[Dict[str, Any]]:
    """
    Apply the alignments algorithm between a log and a process model.
    The methods return the full alignment diagnostics.

    Parameters
    -------------
    log
        Event log
    args
        Specification of the process model
    multi_processing
        Boolean value that enables the multiprocessing (default: False)

    Returns
    -------------
    aligned_traces
        A list of alignments for each trace of the log (in the same order as the traces in the event log)
    """
    if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!")

    if len(args) == 3:
        if type(args[0]) is PetriNet:
            # Petri net alignments
            from pm4py.algo.conformance.alignments.petri_net import algorithm as alignments
            if multi_processing:
                return alignments.apply_multiprocessing(log, args[0], args[1], args[2], parameters=get_properties(log))
            else:
                return alignments.apply(log, args[0], args[1], args[2], parameters=get_properties(log))
        elif type(args[0]) is dict or type(args[0]) is Counter:
            # DFG alignments
            from pm4py.algo.conformance.alignments.dfg import algorithm as dfg_alignment
            return dfg_alignment.apply(log, args[0], args[1], args[2], parameters=get_properties(log))
    elif len(args) == 1:
        if type(args[0]) is ProcessTree:
            # process tree alignments
            from pm4py.algo.conformance.alignments.process_tree.variants import search_graph_pt
            if multi_processing:
                return search_graph_pt.apply_multiprocessing(log, args[0], parameters=get_properties(log))
            else:
                return search_graph_pt.apply(log, args[0], parameters=get_properties(log))
    # try to convert to Petri net
    import pm4py
    from pm4py.algo.conformance.alignments.petri_net import algorithm as alignments
    net, im, fm = pm4py.convert_to_petri_net(*args)
    if multi_processing:
        return alignments.apply_multiprocessing(log, net, im, fm, parameters=get_properties(log))
    else:
        return alignments.apply(log, net, im, fm, parameters=get_properties(log))
Esempio n. 10
0
def filter_directly_follows_relation(log: Union[EventLog, pd.DataFrame], relations: List[str], retain: bool = True) -> \
        Union[EventLog, pd.DataFrame]:
    """
    Retain traces that contain any of the specified 'directly follows' relations.
    For example, if relations == [('a','b'),('a','c')] and log [<a,b,c>,<a,c,b>,<a,d,b>]
    the resulting log will contain traces describing [<a,b,c>,<a,c,b>].

    Parameters
    ---------------
    log
        Log object
    relations
        List of activity name pairs, which are allowed/forbidden paths
    retain
        Parameter that says whether the paths
        should be kept/removed

    Returns
    ----------------
    filtered_log
        Filtered log object
    """
    if type(log) not in [pd.DataFrame, EventLog, EventStream]:
        raise Exception(
            "the method can be applied only to a traditional event log!")

    parameters = get_properties(log)
    if check_is_pandas_dataframe(log):
        from pm4py.algo.filtering.pandas.paths import paths_filter
        parameters[paths_filter.Parameters.POSITIVE] = retain
        return paths_filter.apply(log, relations, parameters=parameters)
    else:
        from pm4py.algo.filtering.log.paths import paths_filter
        parameters[paths_filter.Parameters.POSITIVE] = retain
        return paths_filter.apply(log, relations, parameters=parameters)
Esempio n. 11
0
def filter_variants_percentage(
        log: Union[EventLog, pd.DataFrame],
        threshold: float = 0.8) -> Union[EventLog, pd.DataFrame]:
    """
    Filter a log on the percentage of variants

    Parameters
    ---------------
    log
        Event log
    threshold
        Percentage (scale 0.1) of admitted variants

    Returns
    --------------
    filtered_log
        Filtered log object
    """
    if type(log) not in [pd.DataFrame, EventLog, EventStream]:
        raise Exception(
            "the method can be applied only to a traditional event log!")

    if check_is_pandas_dataframe(log):
        raise Exception(
            "filtering variants percentage on Pandas dataframe is currently not available! please convert the dataframe to event log with the method: log =  pm4py.convert_to_event_log(df)"
        )
    else:
        from pm4py.algo.filtering.log.variants import variants_filter
        return variants_filter.filter_log_variants_percentage(
            log, percentage=threshold, parameters=get_properties(log))
Esempio n. 12
0
def discover_handover_of_work_network(log: Union[EventLog, pd.DataFrame], beta=0):
    """
    Calculates the handover of work network of the event log.
    The handover of work network is essentially the DFG of the event log, however, using the
    resource as a node of the graph, instead of the activity.
    As such, to use this, resource information should be present in the event log.

    Parameters
    ---------------
    log
        Event log or Pandas dataframe
    beta
        beta parameter for Handover metric

    Returns
    ---------------
    metric_values
        Values of the metric
    """
    if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!")

    from pm4py.algo.organizational_mining.sna import algorithm as sna
    parameters = get_properties(log)
    parameters["beta"] = beta
    if check_is_pandas_dataframe(log):
        check_pandas_dataframe_columns(log)
        return sna.apply(log, variant=sna.Variants.HANDOVER_PANDAS, parameters=parameters)
    else:
        return sna.apply(log, variant=sna.Variants.HANDOVER_LOG, parameters=parameters)
Esempio n. 13
0
def view_dfg(dfg: dict,
             start_activities: dict,
             end_activities: dict,
             format: str = "png",
             log: Optional[EventLog] = None):
    """
    Views a (composite) DFG

    Parameters
    -------------
    dfg
        DFG object
    start_activities
        Start activities
    end_activities
        End activities
    format
        Format of the output picture (default: png)
    """
    from pm4py.visualization.dfg import visualizer as dfg_visualizer
    dfg_parameters = dfg_visualizer.Variants.FREQUENCY.value.Parameters
    parameters = get_properties(log)
    parameters[dfg_parameters.FORMAT] = format
    parameters[dfg_parameters.START_ACTIVITIES] = start_activities
    parameters[dfg_parameters.END_ACTIVITIES] = end_activities
    gviz = dfg_visualizer.apply(dfg,
                                log=log,
                                variant=dfg_visualizer.Variants.FREQUENCY,
                                parameters=parameters)
    dfg_visualizer.view(gviz)
Esempio n. 14
0
def insert_artificial_start_end(
        log: Union[EventLog, pd.DataFrame]) -> Union[EventLog, pd.DataFrame]:
    """
    Inserts the artificial start/end activities in an event log / Pandas dataframe

    Parameters
    ------------------
    log
        Event log / Pandas dataframe

    Returns
    ------------------
    log
        Event log / Pandas dataframe with artificial start / end activities
    """
    properties = get_properties(log)
    if check_is_pandas_dataframe(log):
        check_pandas_dataframe_columns(log)
        from pm4py.objects.log.util import dataframe_utils
        return dataframe_utils.insert_artificial_start_end(
            log, parameters=properties)
    else:
        from pm4py.objects.log.util import artificial
        return artificial.insert_artificial_start_end(log,
                                                      parameters=properties)
Esempio n. 15
0
def get_all_case_durations(log: Union[EventLog, pd.DataFrame], business_hours: bool = False, worktiming: List[int] = [7, 17], weekends: List[int] = [6, 7]) -> List[float]:
    """
    Gets the durations of the cases in the event log

    Parameters
    ---------------
    log
        Event log
    business_hours
        Enables/disables the computation based on the business hours (default: False)
    worktiming
        (If the business hours are enabled) The hour range in which the resources of the log are working (default: 7 to 17)
    weekends
        (If the business hours are enabled) The weekends days (default: Saturday (6), Sunday (7))

    Returns
    ---------------
    durations
        Case durations (as list)
    """
    if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!")

    properties = copy(get_properties(log))
    properties["business_hours"] = business_hours
    properties["worktiming"] = worktiming
    properties["weekends"] = weekends
    if check_is_pandas_dataframe(log):
        check_pandas_dataframe_columns(log)
        from pm4py.statistics.traces.generic.pandas import case_statistics
        cd = case_statistics.get_cases_description(log, parameters=properties)
        return sorted([x["caseDuration"] for x in cd.values()])
    else:
        from pm4py.statistics.traces.generic.log import case_statistics
        return case_statistics.get_all_case_durations(log, parameters=properties)
Esempio n. 16
0
def filter_variants_top_k(log: Union[EventLog, pd.DataFrame],
                          k: int) -> Union[EventLog, pd.DataFrame]:
    """
    Keeps the top-k variants of the log

    Parameters
    -------------
    log
        Event log
    k
        Number of variants that should be kept
    parameters
        Parameters

    Returns
    -------------
    filtered_log
        Filtered log
    """
    if type(log) not in [pd.DataFrame, EventLog, EventStream]:
        raise Exception(
            "the method can be applied only to a traditional event log!")

    parameters = get_properties(log)
    if check_is_pandas_dataframe(log):
        check_pandas_dataframe_columns(log)
        from pm4py.algo.filtering.pandas.variants import variants_filter
        return variants_filter.filter_variants_top_k(log,
                                                     k,
                                                     parameters=parameters)
    else:
        from pm4py.algo.filtering.log.variants import variants_filter
        return variants_filter.filter_variants_top_k(log,
                                                     k,
                                                     parameters=parameters)
Esempio n. 17
0
def discover_subcontracting_network(log: Union[EventLog, pd.DataFrame], n=2):
    """
    Calculates the subcontracting network of the process.

    Parameters
    ---------------
    log
        Event log or Pandas dataframe
    n
        n parameter for Subcontracting metric

    Returns
    ---------------
    metric_values
        Values of the metric
    """
    if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!")

    from pm4py.algo.organizational_mining.sna import algorithm as sna
    parameters = get_properties(log)
    parameters["n"] = n
    if check_is_pandas_dataframe(log):
        check_pandas_dataframe_columns(log)
        return sna.apply(log, variant=sna.Variants.SUBCONTRACTING_PANDAS, parameters=parameters)
    else:
        return sna.apply(log, variant=sna.Variants.SUBCONTRACTING_LOG, parameters=parameters)
Esempio n. 18
0
def filter_activities_rework(
        log: Union[EventLog, pd.DataFrame],
        activity: str,
        min_occurrences: int = 2) -> Union[EventLog, pd.DataFrame]:
    """
    Filters the event log, keeping the cases where the specified activity occurs at least min_occurrences times.

    Parameters
    -----------------
    log
        Event log / Pandas dataframe
    activity
        Activity
    min_occurrences
        Minimum desidered number of occurrences

    Returns
    -----------------
    filtered_log
        Log with cases having at least min_occurrences occurrences of the given activity
    """
    if type(log) not in [pd.DataFrame, EventLog, EventStream]:
        raise Exception(
            "the method can be applied only to a traditional event log!")

    parameters = get_properties(log)
    parameters["min_occurrences"] = min_occurrences
    if check_is_pandas_dataframe(log):
        check_pandas_dataframe_columns(log)
        from pm4py.algo.filtering.pandas.rework import rework_filter
        return rework_filter.apply(log, activity, parameters=parameters)
    else:
        from pm4py.algo.filtering.log.rework import rework_filter
        return rework_filter.apply(log, activity, parameters=parameters)
Esempio n. 19
0
def get_event_attribute_values(log: Union[EventLog, pd.DataFrame], attribute: str, count_once_per_case=False) -> Dict[str, int]:
    """
    Returns the values for a specified attribute

    Parameters
    ---------------
    log
        Log object
    attribute
        Attribute
    count_once_per_case
        If True, consider only an occurrence of the given attribute value inside a case
        (if there are multiple events sharing the same attribute value, count only 1 occurrence)

    Returns
    ---------------
    attribute_values
        Dictionary of values along with their count
    """
    if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!")

    parameters = get_properties(log)
    parameters["keep_once_per_case"] = count_once_per_case
    if check_is_pandas_dataframe(log):
        check_pandas_dataframe_columns(log)
        from pm4py.statistics.attributes.pandas import get
        return get.get_attribute_values(log, attribute, parameters=parameters)
    else:
        from pm4py.statistics.attributes.log import get
        return get.get_attribute_values(log, attribute, parameters=parameters)
Esempio n. 20
0
def fitness_alignments(log: EventLog, petri_net: PetriNet, initial_marking: Marking, final_marking: Marking, multi_processing: bool = False) -> \
        Dict[str, float]:
    """
    Calculates the fitness using alignments

    Parameters
    --------------
    log
        Event log
    petri_net
        Petri net object
    initial_marking
        Initial marking
    final_marking
        Final marking
    multi_processing
        Boolean value that enables the multiprocessing (default: False)

    Returns
    ---------------
    fitness_dictionary
        dictionary describing average fitness (key: average_trace_fitness) and the percentage of fitting traces (key: percentage_of_fitting_traces)
    """
    if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!")

    from pm4py.algo.evaluation.replay_fitness import algorithm as replay_fitness
    parameters = get_properties(log)
    parameters["multiprocessing"] = multi_processing
    return replay_fitness.apply(log, petri_net, initial_marking, final_marking,
                                variant=replay_fitness.Variants.ALIGNMENT_BASED, parameters=parameters)
Esempio n. 21
0
def precision_alignments(log: EventLog, petri_net: PetriNet, initial_marking: Marking,
                         final_marking: Marking, multi_processing: bool = False) -> float:
    """
    Calculates the precision of the model w.r.t. the event log using alignments

    Parameters
    --------------
    log
        Event log
    petri_net
        Petri net object
    initial_marking
        Initial marking
    final_marking
        Final marking
    multi_processing
        Boolean value that enables the multiprocessing (default: False)

    Returns
    --------------
    precision
        float representing the precision value
    """
    if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!")

    from pm4py.algo.evaluation.precision import algorithm as precision_evaluator
    parameters = get_properties(log)
    parameters["multiprocessing"] = multi_processing
    return precision_evaluator.apply(log, petri_net, initial_marking, final_marking,
                                     variant=precision_evaluator.Variants.ALIGN_ETCONFORMANCE,
                                     parameters=parameters)
Esempio n. 22
0
def discover_heuristics_net(log: Union[EventLog, pd.DataFrame], dependency_threshold: float = 0.5,
                            and_threshold: float = 0.65,
                            loop_two_threshold: float = 0.5) -> HeuristicsNet:
    """
    Discovers an heuristics net

    Parameters
    ---------------
    log
        Event log
    dependency_threshold
        Dependency threshold (default: 0.5)
    and_threshold
        AND threshold (default: 0.65)
    loop_two_threshold
        Loop two threshold (default: 0.5)

    Returns
    --------------
    heu_net
        Heuristics net
    """
    if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!")

    from pm4py.algo.discovery.heuristics import algorithm as heuristics_miner
    heu_parameters = heuristics_miner.Variants.CLASSIC.value.Parameters
    parameters = get_properties(log)
    parameters[heu_parameters.DEPENDENCY_THRESH] = dependency_threshold
    parameters[heu_parameters.AND_MEASURE_THRESH] = and_threshold
    parameters[heu_parameters.LOOP_LENGTH_TWO_THRESH] = loop_two_threshold
    return heuristics_miner.apply_heu(log, variant=heuristics_miner.Variants.CLASSIC, parameters=parameters)
Esempio n. 23
0
def save_vis_performance_spectrum(log: Union[EventLog, pd.DataFrame],
                                  activities: List[str], file_path: str):
    """
    Saves the visualization of the performance spectrum to a file

    Parameters
    ---------------
    log
        Event log
    activities
        List of activities (in order) that is used to build the performance spectrum
    file_path
        Destination path (including the extension)
    """
    if type(log) not in [pd.DataFrame, EventLog, EventStream]:
        raise Exception(
            "the method can be applied only to a traditional event log!")

    from pm4py.algo.discovery.performance_spectrum import algorithm as performance_spectrum
    perf_spectrum = performance_spectrum.apply(log,
                                               activities,
                                               parameters=get_properties(log))
    from pm4py.visualization.performance_spectrum import visualizer as perf_spectrum_visualizer
    from pm4py.visualization.performance_spectrum.variants import neato
    format = os.path.splitext(file_path)[1][1:]
    gviz = perf_spectrum_visualizer.apply(
        perf_spectrum, parameters={neato.Parameters.FORMAT.value: format})
    perf_spectrum_visualizer.save(gviz, file_path)
Esempio n. 24
0
def view_performance_spectrum(log: Union[EventLog, pd.DataFrame],
                              activities: List[str],
                              format: str = "png"):
    """
    Displays the performance spectrum

    Parameters
    ----------------
    perf_spectrum
        Performance spectrum
    format
        Format of the visualization (png, svg ...)
    """
    if type(log) not in [pd.DataFrame, EventLog, EventStream]:
        raise Exception(
            "the method can be applied only to a traditional event log!")

    from pm4py.algo.discovery.performance_spectrum import algorithm as performance_spectrum
    perf_spectrum = performance_spectrum.apply(log,
                                               activities,
                                               parameters=get_properties(log))
    from pm4py.visualization.performance_spectrum import visualizer as perf_spectrum_visualizer
    from pm4py.visualization.performance_spectrum.variants import neato
    gviz = perf_spectrum_visualizer.apply(
        perf_spectrum, parameters={neato.Parameters.FORMAT.value: format})
    perf_spectrum_visualizer.view(gviz)
Esempio n. 25
0
def __builds_events_distribution_graph(log: Union[EventLog, pd.DataFrame],
                                       distr_type: str = "days_week"):
    """
    Internal method to build the events distribution graph
    """
    if type(log) not in [pd.DataFrame, EventLog, EventStream]:
        raise Exception(
            "the method can be applied only to a traditional event log!")

    if distr_type == "days_month":
        title = "Distribution of the Events over the Days of a Month"
        x_axis = "Day of month"
        y_axis = "Number of Events"
    elif distr_type == "months":
        title = "Distribution of the Events over the Months"
        x_axis = "Month"
        y_axis = "Number of Events"
    elif distr_type == "years":
        title = "Distribution of the Events over the Years"
        x_axis = "Year"
        y_axis = "Number of Events"
    elif distr_type == "hours":
        title = "Distribution of the Events over the Hours"
        x_axis = "Hour (of day)"
        y_axis = "Number of Events"
    elif distr_type == "days_week":
        title = "Distribution of the Events over the Days of a Week"
        x_axis = "Day of the Week"
        y_axis = "Number of Events"
    elif distr_type == "weeks":
        title = "Distribution of the Events over the Weeks of a Year"
        x_axis = "Week of the Year"
        y_axis = "Number of Events"
    else:
        raise Exception("unsupported distribution specified.")

    if check_is_pandas_dataframe(log):
        check_pandas_dataframe_columns(log)
        from pm4py.statistics.attributes.pandas import get as attributes_get
        x, y = attributes_get.get_events_distribution(
            log, distr_type=distr_type, parameters=get_properties(log))
    else:
        from pm4py.statistics.attributes.log import get as attributes_get
        x, y = attributes_get.get_events_distribution(
            log, distr_type=distr_type, parameters=get_properties(log))

    return title, x_axis, y_axis, x, y
Esempio n. 26
0
def discover_dfg(log: Union[EventLog, pd.DataFrame]) -> Tuple[dict, dict, dict]:
    """
    Discovers a DFG from a log

    Parameters
    --------------
    log
        Event log

    Returns
    --------------
    dfg
        DFG
    start_activities
        Start activities
    end_activities
        End activities
    """
    if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!")

    if check_is_pandas_dataframe(log):
        check_pandas_dataframe_columns(log)
        from pm4py.util import constants
        properties = get_properties(log)
        from pm4py.algo.discovery.dfg.adapters.pandas.df_statistics import get_dfg_graph
        activity_key = properties[constants.PARAMETER_CONSTANT_ACTIVITY_KEY] if constants.PARAMETER_CONSTANT_ACTIVITY_KEY in properties else xes_constants.DEFAULT_NAME_KEY
        timestamp_key = properties[constants.PARAMETER_CONSTANT_TIMESTAMP_KEY] if constants.PARAMETER_CONSTANT_TIMESTAMP_KEY in properties else xes_constants.DEFAULT_TIMESTAMP_KEY
        case_id_key = properties[constants.PARAMETER_CONSTANT_CASEID_KEY] if constants.PARAMETER_CONSTANT_CASEID_KEY in properties else constants.CASE_CONCEPT_NAME
        dfg = get_dfg_graph(log, activity_key=activity_key,
                            timestamp_key=timestamp_key,
                            case_id_glue=case_id_key)
        from pm4py.statistics.start_activities.pandas import get as start_activities_module
        from pm4py.statistics.end_activities.pandas import get as end_activities_module
        start_activities = start_activities_module.get_start_activities(log, parameters=properties)
        end_activities = end_activities_module.get_end_activities(log, parameters=properties)
    else:
        from pm4py.algo.discovery.dfg import algorithm as dfg_discovery
        dfg = dfg_discovery.apply(log, parameters=get_properties(log))
        from pm4py.statistics.start_activities.log import get as start_activities_module
        from pm4py.statistics.end_activities.log import get as end_activities_module
        start_activities = start_activities_module.get_start_activities(log, parameters=get_properties(log))
        end_activities = end_activities_module.get_end_activities(log, parameters=get_properties(log))
    return dfg, start_activities, end_activities
Esempio n. 27
0
def filter_event_attribute_values(
        log: Union[EventLog, pd.DataFrame],
        attribute_key: str,
        values: Union[Set[str], List[str]],
        level: str = "case",
        retain: bool = True) -> Union[EventLog, pd.DataFrame]:
    """
    Filter a log object on the values of some event attribute

    Parameters
    --------------
    log
        Log object
    attribute_key
        Attribute to filter
    values
        Admitted (or forbidden) values
    level
        Specifies how the filter should be applied ('case' filters the cases where at least one occurrence happens,
        'event' filter the events eventually trimming the cases)
    retain
        Specified if the values should be kept or removed

    Returns
    --------------
    filtered_log
        Filtered log object
    """
    if type(log) not in [pd.DataFrame, EventLog, EventStream]:
        raise Exception(
            "the method can be applied only to a traditional event log!")

    parameters = get_properties(log)
    parameters[constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY] = attribute_key
    if check_is_pandas_dataframe(log):
        check_pandas_dataframe_columns(log)
        from pm4py.algo.filtering.pandas.attributes import attributes_filter
        if level == "event":
            parameters[attributes_filter.Parameters.POSITIVE] = retain
            return attributes_filter.apply_events(log,
                                                  values,
                                                  parameters=parameters)
        elif level == "case":
            parameters[attributes_filter.Parameters.POSITIVE] = retain
            return attributes_filter.apply(log, values, parameters=parameters)
    else:
        from pm4py.algo.filtering.log.attributes import attributes_filter
        if level == "event":
            parameters[attributes_filter.Parameters.POSITIVE] = retain
            return attributes_filter.apply_events(log,
                                                  values,
                                                  parameters=parameters)
        elif level == "case":
            parameters[attributes_filter.Parameters.POSITIVE] = retain
            return attributes_filter.apply(log, values, parameters=parameters)
Esempio n. 28
0
def filter_time_range(log: Union[EventLog, pd.DataFrame],
                      dt1: str,
                      dt2: str,
                      mode="events") -> Union[EventLog, pd.DataFrame]:
    """
    Filter a log on a time interval

    Parameters
    ----------------
    log
        Log object
    dt1
        Left extreme of the interval
    dt2
        Right extreme of the interval
    mode
        Modality of filtering (events, traces_contained, traces_intersecting)
        events: any event that fits the time frame is retained
        traces_contained: any trace completely contained in the timeframe is retained
        traces_intersecting: any trace intersecting with the time-frame is retained.

    Returns
    ----------------
    filtered_log
        Filtered log
    """
    if type(log) not in [pd.DataFrame, EventLog, EventStream]:
        raise Exception(
            "the method can be applied only to a traditional event log!")

    if check_is_pandas_dataframe(log):
        from pm4py.algo.filtering.pandas.timestamp import timestamp_filter
        if mode == "events":
            return timestamp_filter.apply_events(
                log, dt1, dt2, parameters=get_properties(log))
        elif mode == "traces_contained":
            return timestamp_filter.filter_traces_contained(
                log, dt1, dt2, parameters=get_properties(log))
        elif mode == "traces_intersecting":
            return timestamp_filter.filter_traces_intersecting(
                log, dt1, dt2, parameters=get_properties(log))
        else:
            warnings.warn('mode provided: ' + mode +
                          ' is not recognized; original log returned!')
            return log
    else:
        from pm4py.algo.filtering.log.timestamp import timestamp_filter
        if mode == "events":
            return timestamp_filter.apply_events(
                log, dt1, dt2, parameters=get_properties(log))
        elif mode == "traces_contained":
            return timestamp_filter.filter_traces_contained(
                log, dt1, dt2, parameters=get_properties(log))
        elif mode == "traces_intersecting":
            return timestamp_filter.filter_traces_intersecting(
                log, dt1, dt2, parameters=get_properties(log))
        else:
            warnings.warn('mode provided: ' + mode +
                          ' is not recognized; original log returned!')
            return log
Esempio n. 29
0
def get_end_activities(log: Union[EventLog, pd.DataFrame]) -> Dict[str, int]:
    """
    Returns the end activities of a log

    Parameters
    ---------------
    log
        Lob object

    Returns
    ---------------
    end_activities
        Dictionary of end activities along with their count
    """
    if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!")

    if check_is_pandas_dataframe(log):
        check_pandas_dataframe_columns(log)
        from pm4py.statistics.end_activities.pandas import get
        return get.get_end_activities(log, parameters=get_properties(log))
    else:
        from pm4py.statistics.end_activities.log import get
        return get.get_end_activities(log, parameters=get_properties(log))
Esempio n. 30
0
def discover_eventually_follows_graph(log: Union[EventLog, pd.DataFrame]) -> Dict[Tuple[str, str], int]:
    """
    Gets the eventually follows graph from a log object

    Parameters
    ---------------
    log
        Log object

    Returns
    ---------------
    eventually_follows_graph
        Dictionary of tuples of activities that eventually follows each other; along with the number of occurrences
    """
    if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!")

    if check_is_pandas_dataframe(log):
        check_pandas_dataframe_columns(log)
        from pm4py.statistics.eventually_follows.pandas import get
        return get.apply(log, parameters=get_properties(log))
    else:
        from pm4py.statistics.eventually_follows.log import get
        return get.apply(log, parameters=get_properties(log))