Example #1
0
def filter_time_range(log: Union[EventLog, pd.DataFrame],
                      dt1: str,
                      dt2: str,
                      mode="events") -> Union[EventLog, pd.DataFrame]:
    """
    Filter a log on a time interval

    Parameters
    ----------------
    log
        Log object
    dt1
        Left extreme of the interval
    dt2
        Right extreme of the interval
    mode
        Modality of filtering (events, traces_contained, traces_intersecting)
        events: any event that fits the time frame is retained
        traces_contained: any trace completely contained in the timeframe is retained
        traces_intersecting: any trace intersecting with the time-frame is retained.

    Returns
    ----------------
    filtered_log
        Filtered log
    """
    if type(log) not in [pd.DataFrame, EventLog, EventStream]:
        raise Exception(
            "the method can be applied only to a traditional event log!")

    if check_is_pandas_dataframe(log):
        from pm4py.algo.filtering.pandas.timestamp import timestamp_filter
        if mode == "events":
            return timestamp_filter.apply_events(
                log, dt1, dt2, parameters=get_properties(log))
        elif mode == "traces_contained":
            return timestamp_filter.filter_traces_contained(
                log, dt1, dt2, parameters=get_properties(log))
        elif mode == "traces_intersecting":
            return timestamp_filter.filter_traces_intersecting(
                log, dt1, dt2, parameters=get_properties(log))
        else:
            warnings.warn('mode provided: ' + mode +
                          ' is not recognized; original log returned!')
            return log
    else:
        from pm4py.algo.filtering.log.timestamp import timestamp_filter
        if mode == "events":
            return timestamp_filter.apply_events(
                log, dt1, dt2, parameters=get_properties(log))
        elif mode == "traces_contained":
            return timestamp_filter.filter_traces_contained(
                log, dt1, dt2, parameters=get_properties(log))
        elif mode == "traces_intersecting":
            return timestamp_filter.filter_traces_intersecting(
                log, dt1, dt2, parameters=get_properties(log))
        else:
            warnings.warn('mode provided: ' + mode +
                          ' is not recognized; original log returned!')
            return log
Example #2
0
def filter_time_range(log, dt1, dt2, mode="events"):
    """
    Filter a log on a time interval

    Parameters
    ----------------
    log
        Log object
    dt1
        Left extreme of the interval
    dt2
        Right extreme of the interval
    mode
        Modality of filtering (events, traces_contained, traces_intersecting)
        events: any event that fits the time frame is retained
        traces_contained: any trace completely contained in the timeframe is retained
        traces_intersecting: any trace intersecting with the time-frame is retained.

    Returns
    ----------------
    filtered_log
        Filtered log
    """
    if check_is_dataframe(log):
        from pm4py.algo.filtering.pandas.timestamp import timestamp_filter
        if mode == "events":
            return timestamp_filter.apply_events(log, dt1, dt2)
        elif mode == "traces_contained":
            return timestamp_filter.filter_traces_contained(log, dt1, dt2)
        elif mode == "traces_intersecting":
            return timestamp_filter.filter_traces_intersecting(log, dt1, dt2)
        else:
            warnings.warn('mode provided: ' + mode +
                          ' is not recognized; original log returned!')
            return log
    else:
        from pm4py.algo.filtering.log.timestamp import timestamp_filter
        if mode == "events":
            return timestamp_filter.apply_events(log, dt1, dt2)
        elif mode == "traces_contained":
            return timestamp_filter.filter_traces_contained(log, dt1, dt2)
        elif mode == "traces_intersecting":
            return timestamp_filter.filter_traces_intersecting(log, dt1, dt2)
        else:
            warnings.warn('mode provided: ' + mode +
                          ' is not recognized; original log returned!')
            return log
Example #3
0
 def test_filtering_timeframe(self):
     # to avoid static method warnings in tests,
     # that by construction of the unittest package have to be expressed in such way
     self.dummy_variable = "dummy_value"
     input_log = os.path.join(INPUT_DATA_DIR, "receipt.csv")
     df = csv_import_adapter.import_dataframe_from_path(input_log, sep=',')
     df1 = timestamp_filter.apply_events(df, "2011-03-09 00:00:00", "2012-01-18 23:59:59")
     df2 = timestamp_filter.filter_traces_intersecting(df, "2011-03-09 00:00:00", "2012-01-18 23:59:59")
     df3 = timestamp_filter.filter_traces_contained(df, "2011-03-09 00:00:00", "2012-01-18 23:59:59")
     del df1
     del df2
     del df3
Example #4
0
def average_case_duration(
        df: pd.DataFrame,
        t1: Union[datetime, str],
        t2: Union[datetime, str],
        r: str,
        parameters: Optional[Dict[Union[str, Parameters],
                                  Any]] = None) -> float:
    """
    The average duration of cases completed during a given time slot in which a given resource was involved.

    Metric RBI 4.4 in Pika, Anastasiia, et al.
    "Mining resource profiles from event logs." ACM Transactions on Management Information Systems (TMIS) 8.1 (2017): 1-30.

    Parameters
    -----------------
    df
        Dataframe
    t1
        Left interval
    t2
        Right interval
    r
        Resource

    Returns
    ----------------
    metric
        Value of the metric
    """
    if parameters is None:
        parameters = {}

    resource_key = exec_utils.get_param_value(
        Parameters.RESOURCE_KEY, parameters,
        xes_constants.DEFAULT_RESOURCE_KEY)

    from pm4py.algo.filtering.pandas.attributes import attributes_filter
    parameters_filter = {
        attributes_filter.Parameters.ATTRIBUTE_KEY: resource_key
    }
    df = attributes_filter.apply(df, [r], parameters=parameters_filter)

    from pm4py.algo.filtering.pandas.timestamp import timestamp_filter
    df = timestamp_filter.filter_traces_intersecting(df,
                                                     t1,
                                                     t2,
                                                     parameters=parameters)

    from pm4py.statistics.traces.generic.pandas import case_statistics
    cd = case_statistics.get_cases_description(df,
                                               parameters=parameters).values()
    return mean(x["caseDuration"] for x in cd)
 def test_filtering_timeframe(self):
     # to avoid static method warnings in tests,
     # that by construction of the unittest package have to be expressed in such way
     self.dummy_variable = "dummy_value"
     input_log = os.path.join(INPUT_DATA_DIR, "receipt.csv")
     df = pd.read_csv(input_log)
     df = dataframe_utils.convert_timestamp_columns_in_df(df)
     df1 = timestamp_filter.apply_events(df, "2011-03-09 00:00:00", "2012-01-18 23:59:59")
     df2 = timestamp_filter.filter_traces_intersecting(df, "2011-03-09 00:00:00", "2012-01-18 23:59:59")
     df3 = timestamp_filter.filter_traces_contained(df, "2011-03-09 00:00:00", "2012-01-18 23:59:59")
     del df1
     del df2
     del df3
Example #6
0
def filter_timestamp(log, dt1, dt2, how="events"):
    """
    Filter a log_skeleton on a time interval

    Parameters
    ----------------
    log
        Log object
    dt1
        Left extreme of the interval
    dt2
        Right extreme of the interval
    how
        Modality of filtering (events, traces_contained, traces_intersecting)

    Returns
    ----------------
    filtered_log
        Filtered log_skeleton
    """
    if check_is_dataframe(log):
        from pm4py.algo.filtering.pandas.timestamp import timestamp_filter
        if how == "events":
            return timestamp_filter.apply_events(log, dt1, dt2)
        elif how == "traces_contained":
            return timestamp_filter.filter_traces_contained(log, dt1, dt2)
        elif how == "traces_intersecting":
            return timestamp_filter.filter_traces_intersecting(log, dt1, dt2)
    else:
        from pm4py.algo.filtering.log.timestamp import timestamp_filter
        if how == "events":
            return timestamp_filter.apply_events(log, dt1, dt2)
        elif how == "traces_contained":
            return timestamp_filter.filter_traces_contained(log, dt1, dt2)
        elif how == "traces_intersecting":
            return timestamp_filter.filter_traces_intersecting(log, dt1, dt2)
def apply(dataframe, filter, parameters=None):
    """
    Apply a timestamp filter to the dataframe

    Parameters
    ------------
    dataframe
        Dataframe where the filter should be applied
    filter
        Filter (two timestamps separated by @@@)
    parameters
        Parameters of the algorithm
    """
    if parameters is None:
        parameters = {}

    dt1 = str(datetime.utcfromtimestamp(int(filter[1].split("@@@")[0])))
    dt2 = str(datetime.utcfromtimestamp(int(filter[1].split("@@@")[1])))

    return timestamp_filter.filter_traces_intersecting(dataframe, dt1, dt2, parameters=parameters)