Example #1
0
def log_statistics(logpath):
    """
    Extracts log statistics such as #Events, #Cases, #Activities and #Variants, given the path of event log.

    Parameters:
        logpath (str): Path of event log

    Returns:
        events (int): Number of events
        cases (int): Number of traces
        activities (int): Number of activities
        variants (int): Number of variants
    """
    log = importer.apply(logpath)
    log_df = log_converter.apply(log,
                                 variant=log_converter.Variants.TO_DATA_FRAME)
    return len(log_df), log_df["case:concept:name"].nunique(
    ), log_df["concept:name"].nunique(), len(variants_filter.get_variants(log))
Example #2
0
def write_csv(log, file_path):
    """
    Exports a CSV log_skeleton

    Parameters
    ---------------
    log
        Event log_skeleton
    file_path
        Destination path

    Returns
    --------------
    void
    """
    from pm4py.objects.conversion.log import converter
    dataframe = converter.apply(log, variant=converter.Variants.TO_DATA_FRAME)
    dataframe.to_csv(file_path, index=False)
Example #3
0
def __export_log_as_string(log, parameters=None):
    """
    Exports the given log to string format

    Parameters
    -----------
    log: :class:`pm4py.log.log.EventLog`
        Event log. Also, can take a log and convert it to event stream
    parameters
        Possible parameters of the algorithm

    Returns
    -----------
    string
        String representing the CSV log
    """
    df = converter.apply(log, variant=converter.Variants.TO_DATA_FRAME)
    return df.to_string()
Example #4
0
def get_stream_from_dataframe(df, parameters=None):
    if parameters is None:
        parameters = {}

    df_type = df.type
    df = df.sort_values(["event_timestamp", "event_id"])
    if df_type == "succint":
        df = succint_mdl_to_exploded_mdl.apply(df)

    columns = [
        x for x in df.columns if not x.startswith("event")
        or x == "event_activity" or x == "event_id" or x == "event_timestamp"
    ]
    df = df[columns]

    stream = converter.apply(df, variant=converter.Variants.TO_EVENT_STREAM)

    return stream
Example #5
0
def apply(log, parameters=None, variant=CLASSIC):
    """
    Discovers a Petri net using Heuristics Miner

    Parameters
    ------------
    log
        Event log
    parameters
        Possible parameters of the algorithm,
        including:
            - Parameters.ACTIVITY_KEY
            - Parameters.TIMESTAMP_KEY
            - Parameters.CASE_ID_KEY
            - Parameters.DEPENDENCY_THRESH
            - Parameters.AND_MEASURE_THRESH
            - Parameters.MIN_ACT_COUNT
            - Parameters.MIN_DFG_OCCURRENCES
            - Parameters.DFG_PRE_CLEANING_NOISE_THRESH
            - Parameters.LOOP_LENGTH_TWO_THRESH
    variant
        Variant of the algorithm:
            - Variants.CLASSIC
            - Variants.PLUSPLUS

    Returns
    ------------
    net
        Petri net
    im
        Initial marking
    fm
        Final marking
    """
    if pkgutil.find_loader("pandas"):
        import pandas

        if isinstance(log, pandas.core.frame.DataFrame):
            return exec_utils.get_variant(variant).apply_pandas(
                log, parameters=parameters)

    return exec_utils.get_variant(variant).apply(log_conversion.apply(
        log, parameters=parameters),
                                                 parameters=parameters)
Example #6
0
def apply(obj: Union[EventLog, Trace], pt: ProcessTree, parameters: Optional[Dict[Any, Any]] = None) -> Union[typing.AlignmentResult, typing.ListAlignments]:
    """
    Returns alignments for a process tree

    Parameters
    --------------
    obj
        Event log or trace (a conversion is done if necessary)
    pt
        Process tree
    parameters
        Parameters of the algorithm

    Returns
    --------------
    alignments
        Alignments
    """
    if parameters is None:
        parameters = {}

    obj = log_converter.apply(obj, variant=log_converter.Variants.TO_EVENT_LOG, parameters=parameters)

    leaves = frozenset(pt_util.get_leaves_as_tuples(pt))
    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY)
    if type(obj) is Trace:
        variant = tuple(x[activity_key] for x in obj)
        return align_variant(variant, leaves, pt)
    else:
        from pm4py.statistics.variants.log import get as variants_get
        variants = variants_get.get_variants(obj, parameters=parameters)
        progress = _construct_progress_bar(len(variants), parameters)
        ret = []
        bwc = align_variant([], leaves, pt)["cost"]
        align_dict = {}
        for trace in obj:
            variant = tuple(x[activity_key] for x in trace)
            if variant not in align_dict:
                align_dict[variant] = _apply_variant(variant, pt, leaves, bwc, parameters)
                if progress is not None:
                    progress.update()
            ret.append(align_dict[variant])
        _destroy_progress_bar(progress)
        return ret
Example #7
0
def apply_heu(log: EventLog, parameters: Optional[Dict[Any, Any]] = None) -> HeuristicsNet:
    """
    Discovers an heuristics net using the Heuristics Miner ++ algorithm

    Implements the approach described in
    Burattin, Andrea, and Alessandro Sperduti. "Heuristics Miner for Time Intervals." ESANN. 2010.

    https://andrea.burattin.net/public-files/publications/2010-esann-slides.pdf

    Parameters
    --------------
    log
        Event log
    parameters
        Parameters of the algorithm, including:
        - Parameters.ACTIVITY_KEY
        - Parameters.START_TIMESTAMP_KEY
        - Parameters.TIMESTAMP_KEY
        - Parameters.DEPENDENCY_THRESH
        - Parameters.AND_MEASURE_THRESH
        - Parameters.MIN_ACT_COUNT
        - Parameters.MIN_DFG_OCCURRENCES
        - Parameters.HEU_NET_DECORATION

    Returns
    --------------
    heu_net
        Heuristics net
    """
    if parameters is None:
        parameters = {}

    log = log_converter.apply(log, variant=log_converter.Variants.TO_EVENT_LOG, parameters=parameters)
    log = interval_lifecycle.to_interval(log, parameters=parameters)
    start_timestamp_key = exec_utils.get_param_value(Parameters.START_TIMESTAMP_KEY, parameters,
                                                     None)
    if start_timestamp_key is None:
        start_timestamp_key = xes.DEFAULT_START_TIMESTAMP_KEY
        parameters = copy(parameters)
        parameters[Parameters.START_TIMESTAMP_KEY] = start_timestamp_key
    start_activities, end_activities, activities_occurrences, dfg, performance_dfg, sojourn_time, concurrent_activities = discover_abstraction_log(
        log, parameters=parameters)
    return discover_heu_net_plus_plus(start_activities, end_activities, activities_occurrences, dfg, performance_dfg,
                                      sojourn_time, concurrent_activities, parameters=parameters)
Example #8
0
def apply(log_or_trace: Union[Trace, EventLog], parameters: Optional[Dict[Union[str, Parameters], Any]] = None) -> float:
    """
    Computes the cycle time starting from an event log or a trace object

    The definition that has been followed is the one proposed in:
    https://www.presentationeze.com/presentations/lean-manufacturing-just-in-time/lean-manufacturing-just-in-time-full-details/process-cycle-time-analysis/calculate-cycle-time/#:~:text=Cycle%20time%20%3D%20Average%20time%20between,is%2024%20minutes%20on%20average.

    So:
    Cycle time  = Average time between completion of units.

    Example taken from the website:
    Consider a manufacturing facility, which is producing 100 units of product per 40 hour week.
    The average throughput rate is 1 unit per 0.4 hours, which is one unit every 24 minutes.
    Therefore the cycle time is 24 minutes on average.

    Parameters
    ------------------
    log_or_trace
        Log or trace
    parameters
        Parameters of the algorithm, including:
        - Parameters.START_TIMESTAMP_KEY => the attribute acting as start timestamp
        - Parameters.TIMESTAMP_KEY => the attribute acting as timestamp

    Returns
    ------------------
    cycle_time
        Cycle time
    """
    if parameters is None:
        parameters = {}

    start_timestamp_key = exec_utils.get_param_value(Parameters.START_TIMESTAMP_KEY, parameters, xes_constants.DEFAULT_TIMESTAMP_KEY)
    timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters, xes_constants.DEFAULT_TIMESTAMP_KEY)

    if type(log_or_trace) is Trace:
        log = EventLog()
        log.append(log_or_trace)
    else:
        log = converter.apply(log_or_trace, variant=converter.Variants.TO_EVENT_LOG, parameters=parameters)

    events = [(x[start_timestamp_key].timestamp(), x[timestamp_key].timestamp()) for trace in log for x in trace]

    return compute.cycle_time(events, len(log))
Example #9
0
def apply(log, list_activities, parameters=None):
    """
    Finds the performance spectrum provided a log/dataframe
    and a list of activities

    Parameters
    -------------
    log
        Event log/Dataframe
    list_activities
        List of activities interesting for the performance spectrum (at least two)
    parameters
        Parameters of the algorithm, including:
            - Parameters.ACTIVITY_KEY
            - Parameters.TIMESTAMP_KEY

    Returns
    -------------
    ps
        Performance spectrum object (dictionary)
    """
    from pm4py.objects.conversion.log import converter as log_conversion

    if parameters is None:
        parameters = {}

    sample_size = exec_utils.get_param_value(Parameters.PARAMETER_SAMPLE_SIZE, parameters, 10000)

    if len(list_activities) < 2:
        raise Exception("performance spectrum can be applied providing at least two activities!")

    points = None

    if pkgutil.find_loader("pandas"):
        import pandas as pd
        if type(log) is pd.DataFrame:
            points = exec_utils.get_variant(Variants.DATAFRAME).apply(log, list_activities, sample_size, parameters)

    points = exec_utils.get_variant(Variants.LOG).apply(log_conversion.apply(log), list_activities, sample_size,
                                                        parameters)

    ps = {Outputs.LIST_ACTIVITIES.value: list_activities, Outputs.POINTS.value: points}

    return ps
Example #10
0
def apply(
    log: EventLog,
    parameters: Optional[Dict[Union[str, Parameters], Any]] = None
) -> Tuple[Any, List[str]]:
    """
    Extracts all the features for the traces of an event log (each trace becomes a vector of vectors, where each
    event has its own vector)

    Parameters
    -----------------
    log
        Event log
    parameters
        Parameters of the algorithm, including:
            - STR_EVENT_ATTRIBUTES => string event attributes to consider in the features extraction
            - NUM_EVENT_ATTRIBUTES => numeric event attributes to consider in the features extraction
            - FEATURE_NAMES => features to consider (in the given order)

    Returns
    -------------
    data
        Data to provide for decision tree learning
    feature_names
        Names of the features, in order
    """
    if parameters is None:
        parameters = {}

    str_ev_attr = exec_utils.get_param_value(Parameters.STR_EVENT_ATTRIBUTES,
                                             parameters, None)
    num_ev_attr = exec_utils.get_param_value(Parameters.NUM_EVENT_ATTRIBUTES,
                                             parameters, None)
    feature_names = exec_utils.get_param_value(Parameters.FEATURE_NAMES,
                                               parameters, None)

    log = converter.apply(log,
                          variant=converter.Variants.TO_EVENT_LOG,
                          parameters=parameters)

    if feature_names is None:
        feature_names = extract_all_ev_features_names_from_log(
            log, str_ev_attr, num_ev_attr, parameters=parameters)

    return extract_features(log, feature_names, parameters=parameters)
Example #11
0
def get_decision_tree(log, net, initial_marking, final_marking, decision_point=None, attributes=None, parameters=None):
    """
    Gets a decision tree classifier on a specific point of the model

    Parameters
    --------------
    log
        Event log
    net
        Petri net
    initial_marking
        Initial marking
    final_marking
        Final marking
    decision_point
        Point of the process in which a decision happens:
        - if not specified, the method crashes, but provides a list of possible decision points
        - if specified, the method goes on and produce the decision tree
    attributes
        Attributes of the log. If not specified, then an automatic attribute selection
        is performed.
    parameters
        Parameters of the algorithm

    Returns
    ---------------
    clf
        Decision tree
    feature_names
        The names of the features
    classes
        The classes
    """
    from sklearn import tree

    if parameters is None:
        parameters = {}
    log = log_converter.apply(log, parameters=parameters)
    X, y, targets = apply(log, net, initial_marking, final_marking, decision_point=decision_point,
                          attributes=attributes, parameters=parameters)
    dt = tree.DecisionTreeClassifier()
    dt = dt.fit(X, y)
    return dt, list(X.columns.values.tolist()), targets
Example #12
0
def apply(
    log: Union[EventLog, EventStream],
    parameters: Optional[Dict[Union[str, Parameters],
                              Any]] = None) -> List[int]:
    """
    Counts the intersections of each interval event with the other interval events of the log
    (all the events are considered, not looking at the activity)

    Parameters
    ----------------
    log
        Event log
    parameters
        Parameters of the algorithm, including:
        - Parameters.START_TIMESTAMP_KEY => the attribute to consider as start timestamp
        - Parameters.TIMESTAMP_KEY => the attribute to consider as timestamp

    Returns
    -----------------
    overlap
        For each interval event, ordered by the order of appearance in the log, associates the number
        of intersecting events.
    """
    if parameters is None:
        parameters = {}

    log = log_converter.apply(log,
                              variant=log_converter.Variants.TO_EVENT_LOG,
                              parameters=parameters)
    start_timestamp_key = exec_utils.get_param_value(
        Parameters.START_TIMESTAMP_KEY, parameters,
        xes_constants.DEFAULT_TIMESTAMP_KEY)
    timestamp_key = exec_utils.get_param_value(
        Parameters.TIMESTAMP_KEY, parameters,
        xes_constants.DEFAULT_TIMESTAMP_KEY)

    points = []
    for trace in log:
        for event in trace:
            points.append((event[start_timestamp_key].timestamp(),
                           event[timestamp_key].timestamp()))

    return compute.apply(points, parameters=parameters)
Example #13
0
def apply(log, net, marking, final_marking, parameters=None, variant=None):
    """
    Method to apply ET Conformance

    Parameters
    -----------
    log
        Trace log
    net
        Petri net
    marking
        Initial marking
    final_marking
        Final marking
    parameters
        Parameters of the algorithm, including:
            pm4py.util.constants.PARAMETER_CONSTANT_ACTIVITY_KEY -> Activity key
    variant
        Variant of the algorithm that should be applied:
            - Variants.ETCONFORMANCE_TOKEN
            - Variants.ALIGN_ETCONFORMANCE
    """
    warnings.warn("Use the pm4py.algo.evaluation.precision package")

    if parameters is None:
        parameters = {}

    log = log_conversion.apply(log, parameters, log_conversion.TO_EVENT_LOG)

    # execute the following part of code when the variant is not specified by the user
    if variant is None:
        if not (check_easy_soundness_net_in_fin_marking(
                net,
                marking,
                final_marking)):
            # in the case the net is not a easy sound workflow net, we must apply token-based replay
            variant = ETCONFORMANCE_TOKEN
        else:
            # otherwise, use the align-etconformance approach (safer, in the case the model contains duplicates)
            variant = ALIGN_ETCONFORMANCE

    return exec_utils.get_variant(variant).apply(log, net, marking,
                             final_marking, parameters=parameters)
Example #14
0
def __export_log(log, output_file_path, variant=Variants.ETREE, parameters=None):
    """
    Method to export a XES from a log

    Parameters
    -----------
    log
        Trace log
    output_file_path
        Output file path
    variant
        Selected variant of the algorithm
    parameters
        Parameters of the algorithm:
            Parameters.COMPRESS -> Indicates that the XES file must be compressed
    """
    parameters = dict() if parameters is None else parameters
    return exec_utils.get_variant(variant).apply(log_conversion.apply(log, parameters=parameters), output_file_path,
                                                 parameters=parameters)
Example #15
0
def apply(log: EventLog, value: Any, parameters: Optional[Dict[str, Any]] = None) -> EventLog:
    """
    Filters the trace of the log where the given attribute value is repeated
    (in a range of repetitions that is specified by the user)

    Parameters
    ----------------
    log
        Event log
    value
        Value that is investigated
    parameters
        Parameters of the filter, including:
        - Parameters.ATTRIBUTE_KEY => the attribute key
        - Parameters.MIN_REP => minimum number of repetitions
        - Parameters.MAX_REP => maximum number of repetitions

    Returns
    ----------------
    filtered_log
        Filtered log
    """
    if parameters is None:
        parameters = {}

    log = converter.apply(log, parameters=parameters)

    attribute_key = exec_utils.get_param_value(Parameters.ATTRIBUTE_KEY, parameters, xes_constants.DEFAULT_NAME_KEY)
    min_rep = exec_utils.get_param_value(Parameters.MIN_REP, parameters, 2)
    max_rep = exec_utils.get_param_value(Parameters.MAX_REP, parameters, sys.maxsize)

    filtered_log = EventLog(list(), attributes=log.attributes, extensions=log.extensions, classifiers=log.classifiers,
                            omni_present=log.omni_present, properties=log.properties)

    for trace in log:
        rep = 0
        for event in trace:
            if attribute_key in event and event[attribute_key] == value:
                rep += 1
        if min_rep <= rep <= max_rep:
            filtered_log.append(trace)

    return filtered_log
Example #16
0
def export(log, output_file_path, parameters=None):
    """
    Exports the given log to CSV format

    Parameters
    ----------
    log: :class:`pm4py.log.log.EventLog`
        Event log. Also, can take a log and convert it to event stream
    output_file_path:
        Output file path
    parameters
        Possible parameters of the algorithm
    """
    if parameters is None:
        parameters = {}
    del parameters

    df = converter.apply(log, variant=converter.Variants.TO_DATA_FRAME)
    df.to_csv(output_file_path, index=False)
Example #17
0
def apply(con,
          ref_type="EKKO",
          gjahr="2014",
          min_extr_date="2014-01-01 00:00:00",
          mandt="800",
          bukrs="1000",
          extra_els_query=None):
    dataframe = p2p_1d_dataframe.apply(con,
                                       gjahr=gjahr,
                                       ref_type=ref_type,
                                       min_extr_date=min_extr_date,
                                       mandt=mandt,
                                       bukrs=bukrs,
                                       extra_els_query=extra_els_query)
    log = log_converter.apply(dataframe,
                              parameters={"stream_postprocessing": True})
    print("converted dataframe")
    log = sorting.sort_timestamp(log, "time:timestamp")
    return log
def get_tangible_reachability_from_log_net_im_fm(log,
                                                 net,
                                                 im,
                                                 fm,
                                                 parameters=None):
    """
    Gets the tangible reachability graph from a log and an accepting Petri net

    Parameters
    ---------------
    log
        Event log
    net
        Petri net
    im
        Initial marking
    fm
        Final marking

    Returns
    ------------
    reachab_graph
        Reachability graph
    tangible_reach_graph
        Tangible reachability graph
    stochastic_info
        Stochastic information
    """
    if parameters is None:
        parameters = {}

    from pm4py.algo.simulation.montecarlo.utils import replay
    stochastic_info = replay.get_map_from_log_and_net(log_converter.apply(
        log, parameters=parameters),
                                                      net,
                                                      im,
                                                      fm,
                                                      parameters=parameters)

    reachability_graph, tangible_reachability_graph = get_tangible_reachability_from_net_im_sinfo(
        net, im, stochastic_info, parameters=parameters)

    return reachability_graph, tangible_reachability_graph, stochastic_info
Example #19
0
def get_slope(start_node, end_node, log):
    # Here we filter for a transition withing a given (filtered) log file
    # and we call the slope_from_dateseries on it in order to get the slope
    dataframe = log_converter.apply(
        log, variant=log_converter.Variants.TO_DATA_FRAME)
    unique_cases = dataframe['case:concept:name'].unique()
    nunique_cases = dataframe['case:concept:name'].nunique()
    time_series = []

    for n in range(nunique_cases):
        case_df = dataframe[dataframe['case:concept:name'] == unique_cases[n]]
        case_df.sort_values(by=['time:timestamp'])
        for index, row in case_df.iterrows():
            #print(row)
            if min(case_df.index) + len(case_df.index) - 1 == index:
                break
            elif row[ACTIVITY_NAMES] == start_node and case_df.loc[
                    index + 1, ACTIVITY_NAMES] == end_node:
                time_series.append(case_df.loc[index + 1, 'time:timestamp'])
        del case_df

    day_min = min(time_series).date()
    day_max = max(time_series).date()

    df = pd.DataFrame()

    date_series = []
    for x in time_series:
        date_series.append(x.date())

    daterange = pd.date_range(start=day_min, end=day_max)

    for single_date in daterange:
        #print(single_date)
        data = [{
            'day': single_date.strftime("%Y-%m-%d"),
            'count': date_series.count(single_date.date())
        }]
        df = df.append(data, ignore_index=True)

    slope = slope_from_dateseries(df)

    return slope
Example #20
0
def apply(log: Union[EventLog, EventStream, pd.DataFrame], parameters: Optional[Dict[Union[str, Parameters], Any]] = None, variant=DEFAULT_VARIANT) -> Tuple[PetriNet, Marking, Marking]:
    """
    Apply the Alpha Miner on top of a log

    Parameters
    -----------
    log
        Log
    variant
        Variant of the algorithm to use:
            - Variants.ALPHA_VERSION_CLASSIC
            - Variants.ALPHA_VERSION_PLUS
    parameters
        Possible parameters of the algorithm, including:
            Parameters.ACTIVITY_KEY -> Name of the attribute that contains the activity

    Returns
    -----------
    net
        Petri net
    marking
        Initial marking
    final_marking
        Final marking
    """
    if parameters is None:
        parameters = {}
    case_id_glue = exec_utils.get_param_value(Parameters.CASE_ID_KEY, parameters, pmutil.constants.CASE_CONCEPT_NAME)
    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_util.DEFAULT_NAME_KEY)
    start_timestamp_key = exec_utils.get_param_value(Parameters.START_TIMESTAMP_KEY, parameters,
                                                     None)
    timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters, xes_util.DEFAULT_TIMESTAMP_KEY)

    if pkgutil.find_loader("pandas"):
        import pandas
        if isinstance(log, pandas.core.frame.DataFrame) and variant == ALPHA_VERSION_CLASSIC:
            dfg = df_statistics.get_dfg_graph(log, case_id_glue=case_id_glue,
                                              activity_key=activity_key,
                                              timestamp_key=timestamp_key, start_timestamp_key=start_timestamp_key)
            return exec_utils.get_variant(variant).apply_dfg(dfg, parameters=parameters)
    return exec_utils.get_variant(variant).apply(log_conversion.apply(log, parameters, log_conversion.TO_EVENT_LOG),
                                                 parameters)
Example #21
0
def filter_log_on_max_no_activities(
    log: EventLog,
    max_no_activities: int = 25,
    parameters: Optional[Dict[Union[str, Parameters],
                              Any]] = None) -> EventLog:
    """
    Filter a log on a maximum number of activities

    Parameters
    -------------
    log
        Log
    max_no_activities
        Maximum number of activities
    parameters
        Parameters of the algorithm

    Returns
    -------------
    filtered_log
        Filtered version of the event log
    """
    if parameters is None:
        parameters = {}

    log = log_converter.apply(log,
                              variant=log_converter.Variants.TO_EVENT_LOG,
                              parameters=parameters)

    activity_key = parameters[
        PARAMETER_CONSTANT_ACTIVITY_KEY] if PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else DEFAULT_NAME_KEY
    parameters[PARAMETER_CONSTANT_ATTRIBUTE_KEY] = activity_key
    all_activities = sorted(
        [(x, y) for x, y in get_attribute_values(log, activity_key).items()],
        key=lambda x: x[1],
        reverse=True)
    activities = all_activities[:min(len(all_activities), max_no_activities)]
    activities = [x[0] for x in activities]

    if len(activities) < len(all_activities):
        log = apply_events(log, activities, parameters=parameters)
    return log
Example #22
0
def get_prefix_matrix(log, parameters=None):
    """
    Gets the prefix matrix from a log_skeleton object

    Parameters
    --------------
    log
        Log
    parameters
        Parameters of the algorithm: activity_key

    Returns
    --------------
    prefix_matrix
        Prefix matrix
    activities
        Sorted (by name) activities of the log_skeleton
    """
    if parameters is None:
        parameters = {}
    keep_unique = parameters[
        KEEP_UNIQUE] if KEEP_UNIQUE in parameters else False

    activity_key = parameters[
        constants.
        PARAMETER_CONSTANT_ACTIVITY_KEY] if constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY
    parameters[constants.PARAMETER_CONSTANT_ACTIVITY_KEY] = activity_key
    parameters[constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY] = activity_key

    if type(log) is EventStream:
        log = log_conversion.apply(log, parameters=parameters)
    variants_list = get_variants_list(log, parameters=parameters)
    activities = get_activities_list(log, parameters=parameters)

    if keep_unique:
        prefix_matrix, activities = get_prefix_matrix_from_variants_list(
            variants_list, activities, parameters=parameters)
    else:
        prefix_matrix, activities = get_prefix_matrix_from_event_log_not_unique(
            log, activities, parameters=parameters)

    return prefix_matrix, activities
Example #23
0
def attr_value_different_persons(log, A, parameters=None):
    """
    Checks whether an attribute value is assumed on events done by different resources

    Parameters
    ------------
    log
        Log
    A
        A attribute value
    parameters
        Parameters of the algorithm, including the attribute key and the positive parameter:
            - if True, then filters all the cases containing occurrences of A done by different resources
            - if False, then filters all the cases not containing occurrences of A done by different resources

    Returns
    -------------
    filtered_log
        Filtered log_skeleton
    """
    if parameters is None:
        parameters = {}

    if not isinstance(log, EventLog):
        log = log_converter.apply(log, variant=log_converter.TO_EVENT_LOG, parameters=parameters)

    attribute_key = exec_utils.get_param_value(Parameters.ATTRIBUTE_KEY, parameters, DEFAULT_NAME_KEY)
    resource_key = exec_utils.get_param_value(Parameters.RESOURCE_KEY, parameters, DEFAULT_RESOURCE_KEY)
    positive = exec_utils.get_param_value(Parameters.POSITIVE, parameters, True)

    new_log = EventLog()

    for trace in log:
        occ_A = set([trace[i][resource_key] for i in range(len(trace)) if
                     attribute_key in trace[i] and resource_key in trace[i] and trace[i][attribute_key] == A])
        if len(occ_A) > 1:
            if positive:
                new_log.append(trace)
        elif not positive:
            new_log.append(trace)

    return new_log
Example #24
0
def apply(log, list_activities, parameters=None):
    """
    Finds the performance spectrum provided a log/dataframe
    and a list of activities

    Parameters
    -------------
    log
        Event log/Dataframe
    list_activities
        List of activities interesting for the performance spectrum (at least two)
    parameters
        Parameters of the algorithm, including the activity key and the timestamp key

    Returns
    -------------
    ps
        Performance spectrum object (dictionary)
    """
    from pm4py.objects.conversion.log import converter as log_conversion

    if parameters is None:
        parameters = {}

    sample_size = parameters[
        PARAMETER_SAMPLE_SIZE] if PARAMETER_SAMPLE_SIZE in parameters else DEFAULT_SAMPLE_SIZE

    if len(list_activities) < 2:
        raise Exception(
            "performance spectrum can be applied providing at least two activities!"
        )

    if type(log) is pd.DataFrame:
        points = VERSIONS[DATAFRAME](log, list_activities, sample_size,
                                     parameters)
    else:
        points = VERSIONS[LOG](log_conversion.apply(log), list_activities,
                               sample_size, parameters)

    ps = {"list_activities": list_activities, "points": points}

    return ps
Example #25
0
def apply(log, parameters=None, variant=Variants.HANDOVER_LOG):
    """
    Calculates a SNA metric

    Parameters
    ------------
    log
        Log
    parameters
        Possible parameters of the algorithm
    variant
        Variant of the algorithm to apply. Possible values:
            - Variants.HANDOVER_LOG
            - Variants.WORKING_TOGETHER_LOG
            - Variants.SUBCONTRACTING_LOG
            - Variants.JOINTACTIVITIES_LOG
            - Variants.HANDOVER_PANDAS
            - Variants.WORKING_TOGETHER_PANDAS
            - Variants.SUBCONTRACTING_PANDAS
            - Variants.JOINTACTIVITIES_PANDAS

    Returns
    -----------
    tuple
        Tuple containing the metric matrix and the resources list
    """
    if parameters is None:
        parameters = {}

    enable_metric_normalization = exec_utils.get_param_value(
        Parameters.METRIC_NORMALIZATION, parameters, False)

    if variant in [
            Variants.HANDOVER_LOG, Variants.WORKING_TOGETHER_LOG,
            Variants.JOINTACTIVITIES_LOG, Variants.SUBCONTRACTING_LOG
    ]:
        log = log_conversion.apply(log, parameters=parameters)
    sna = exec_utils.get_variant(variant).apply(log, parameters=parameters)
    abs_max = np.max(np.abs(sna[0]))
    if enable_metric_normalization and abs_max > 0:
        sna[0] = sna[0] / abs_max
    return sna
Example #26
0
def filter_on_ncases(log: EventLog, max_no_cases: int = 1000) -> EventLog:
    """
    Get only a specified number of traces from a log

    Parameters
    -----------
    log
        Log
    max_no_cases
        Desidered number of traces from the log

    Returns
    -----------
    filtered_log
        Filtered log
    """
    log = log_converter.apply(log, variant=log_converter.Variants.TO_EVENT_LOG)

    filtered_log = EventLog(log[:min(len(log), max_no_cases)])
    return filtered_log
Example #27
0
 def get_log_obj_type(self, objtype):
     columns = [
         x
         for x in self.exploded_dataframe.columns if x.startswith("event_")
     ] + [objtype]
     dataframe = self.exploded_dataframe[columns].dropna(how="any",
                                                         subset=[objtype])
     dataframe = succint_mdl_to_exploded_mdl.apply(dataframe)
     dataframe = dataframe.rename(
         columns={
             "event_activity": "concept:name",
             "event_timestamp": "time:timestamp",
             objtype: "case:concept:name"
         })
     stream = EventStream(dataframe.to_dict('r'))
     log = log_conv_factory.apply(stream)
     log = sorting.sort_timestamp(log, "time:timestamp")
     exported_log = base64.b64encode(
         xes_exporter.export_log_as_string(log)).decode("utf-8")
     return self.name + "_" + objtype, "xes", exported_log
def import_xes():
    pd.set_option('display.max_columns', None)
    pd.options.display.width = None
    global event_log
    parameters = {
        constants.PARAMETER_CONSTANT_ACTIVITY_KEY: c.ACTIVITY_ATTRIBUTE_NAME
    }
    event_log = xes_importer.apply('input_event_logs/{}.xes'.format(
        c.FILE_NAME),
                                   parameters=parameters)
    df = log_converter.apply(event_log,
                             variant=log_converter.Variants.TO_DATA_FRAME,
                             parameters=parameters)
    attribute_list_copy = []
    for attribute in c.ATTRIBUTE_LIST:
        if attribute in df.columns:
            attribute_list_copy.append(attribute)
    if c.ORG_RESOURCE_ATTRIBUTE_NAME in df:
        df[c.ORG_RESOURCE_ATTRIBUTE_NAME].fillna('missing', inplace=True)
    return df[attribute_list_copy]
Example #29
0
def get_start_activities(
    log: EventLog,
    parameters: Optional[Dict[Union[str, Parameters], Any]] = None
) -> Dict[str, int]:
    """
    Get the start attributes of the log along with their count

    Parameters
    ----------
    log
        Log
    parameters
        Parameters of the algorithm, including:
            Parameters.ACTIVITY_KEY -> Attribute key (must be specified if different from concept:name)

    Returns
    ----------
    start_activities
        Dictionary of start attributes associated with their count
    """
    if parameters is None:
        parameters = {}
    attribute_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                               parameters, DEFAULT_NAME_KEY)

    log = log_converter.apply(log,
                              variant=log_converter.Variants.TO_EVENT_LOG,
                              parameters=parameters)

    start_activities = {}

    for trace in log:
        if len(trace) > 0:
            if attribute_key in trace[0]:
                activity_first_event = trace[0][attribute_key]
                if activity_first_event not in start_activities:
                    start_activities[activity_first_event] = 0
                start_activities[activity_first_event] = start_activities[
                    activity_first_event] + 1

    return start_activities
Example #30
0
def __export_log_as_string(log, variant=Variants.ETREE, parameters=None):
    """
    Method to export a XES from a log as a string

    Parameters
    -----------
    log
        Trace log
    variant
        Selected variant of the algorithm
    parameters
        Parameters of the algorithm

    Returns
    -----------
    string
        String describing the XES
    """
    parameters = dict() if parameters is None else parameters
    variant = variant if isinstance(variant, Variants) else Variants.ETREE
    return variant.value.apply(log_conversion.apply(log, parameters=parameters), parameters=parameters)