コード例 #1
0
def filter_time_range(log: Union[EventLog, pd.DataFrame],
                      dt1: str,
                      dt2: str,
                      mode="events") -> Union[EventLog, pd.DataFrame]:
    """
    Filter a log on a time interval

    Parameters
    ----------------
    log
        Log object
    dt1
        Left extreme of the interval
    dt2
        Right extreme of the interval
    mode
        Modality of filtering (events, traces_contained, traces_intersecting)
        events: any event that fits the time frame is retained
        traces_contained: any trace completely contained in the timeframe is retained
        traces_intersecting: any trace intersecting with the time-frame is retained.

    Returns
    ----------------
    filtered_log
        Filtered log
    """
    if type(log) not in [pd.DataFrame, EventLog, EventStream]:
        raise Exception(
            "the method can be applied only to a traditional event log!")

    if check_is_pandas_dataframe(log):
        from pm4py.algo.filtering.pandas.timestamp import timestamp_filter
        if mode == "events":
            return timestamp_filter.apply_events(
                log, dt1, dt2, parameters=get_properties(log))
        elif mode == "traces_contained":
            return timestamp_filter.filter_traces_contained(
                log, dt1, dt2, parameters=get_properties(log))
        elif mode == "traces_intersecting":
            return timestamp_filter.filter_traces_intersecting(
                log, dt1, dt2, parameters=get_properties(log))
        else:
            warnings.warn('mode provided: ' + mode +
                          ' is not recognized; original log returned!')
            return log
    else:
        from pm4py.algo.filtering.log.timestamp import timestamp_filter
        if mode == "events":
            return timestamp_filter.apply_events(
                log, dt1, dt2, parameters=get_properties(log))
        elif mode == "traces_contained":
            return timestamp_filter.filter_traces_contained(
                log, dt1, dt2, parameters=get_properties(log))
        elif mode == "traces_intersecting":
            return timestamp_filter.filter_traces_intersecting(
                log, dt1, dt2, parameters=get_properties(log))
        else:
            warnings.warn('mode provided: ' + mode +
                          ' is not recognized; original log returned!')
            return log
コード例 #2
0
def apply(dataframe, filter, parameters=None):
    """
    Apply a timestamp filter to the dataframe

    Parameters
    ------------
    dataframe
        Dataframe where the filter should be applied
    filter
        Filter (two timestamps separated by @@@)
    parameters
        Parameters of the algorithm
    """
    if parameters is None:
        parameters = {}

    dt1 = str(datetime.utcfromtimestamp(int(filter[1][1].split("@@@")[0])))
    dt2 = str(datetime.utcfromtimestamp(int(filter[1][1].split("@@@")[1])))

    new_parameters = copy(parameters)
    new_parameters[PARAMETER_CONSTANT_TIMESTAMP_KEY] = filter[1][0]

    return timestamp_filter.apply_events(dataframe,
                                         dt1,
                                         dt2,
                                         parameters=new_parameters)
コード例 #3
0
ファイル: filtering.py プロジェクト: adamburkegh/pm4py-core
def filter_time_range(log, dt1, dt2, mode="events"):
    """
    Filter a log on a time interval

    Parameters
    ----------------
    log
        Log object
    dt1
        Left extreme of the interval
    dt2
        Right extreme of the interval
    mode
        Modality of filtering (events, traces_contained, traces_intersecting)
        events: any event that fits the time frame is retained
        traces_contained: any trace completely contained in the timeframe is retained
        traces_intersecting: any trace intersecting with the time-frame is retained.

    Returns
    ----------------
    filtered_log
        Filtered log
    """
    if check_is_dataframe(log):
        from pm4py.algo.filtering.pandas.timestamp import timestamp_filter
        if mode == "events":
            return timestamp_filter.apply_events(log, dt1, dt2)
        elif mode == "traces_contained":
            return timestamp_filter.filter_traces_contained(log, dt1, dt2)
        elif mode == "traces_intersecting":
            return timestamp_filter.filter_traces_intersecting(log, dt1, dt2)
        else:
            warnings.warn('mode provided: ' + mode +
                          ' is not recognized; original log returned!')
            return log
    else:
        from pm4py.algo.filtering.log.timestamp import timestamp_filter
        if mode == "events":
            return timestamp_filter.apply_events(log, dt1, dt2)
        elif mode == "traces_contained":
            return timestamp_filter.filter_traces_contained(log, dt1, dt2)
        elif mode == "traces_intersecting":
            return timestamp_filter.filter_traces_intersecting(log, dt1, dt2)
        else:
            warnings.warn('mode provided: ' + mode +
                          ' is not recognized; original log returned!')
            return log
コード例 #4
0
 def test_filtering_timeframe(self):
     # to avoid static method warnings in tests,
     # that by construction of the unittest package have to be expressed in such way
     self.dummy_variable = "dummy_value"
     input_log = os.path.join(INPUT_DATA_DIR, "receipt.csv")
     df = csv_import_adapter.import_dataframe_from_path(input_log, sep=',')
     df1 = timestamp_filter.apply_events(df, "2011-03-09 00:00:00", "2012-01-18 23:59:59")
     df2 = timestamp_filter.filter_traces_intersecting(df, "2011-03-09 00:00:00", "2012-01-18 23:59:59")
     df3 = timestamp_filter.filter_traces_contained(df, "2011-03-09 00:00:00", "2012-01-18 23:59:59")
     del df1
     del df2
     del df3
コード例 #5
0
 def test_filtering_timeframe(self):
     # to avoid static method warnings in tests,
     # that by construction of the unittest package have to be expressed in such way
     self.dummy_variable = "dummy_value"
     input_log = os.path.join(INPUT_DATA_DIR, "receipt.csv")
     df = pd.read_csv(input_log)
     df = dataframe_utils.convert_timestamp_columns_in_df(df)
     df1 = timestamp_filter.apply_events(df, "2011-03-09 00:00:00", "2012-01-18 23:59:59")
     df2 = timestamp_filter.filter_traces_intersecting(df, "2011-03-09 00:00:00", "2012-01-18 23:59:59")
     df3 = timestamp_filter.filter_traces_contained(df, "2011-03-09 00:00:00", "2012-01-18 23:59:59")
     del df1
     del df2
     del df3
コード例 #6
0
def filter_timestamp(log, dt1, dt2, how="events"):
    """
    Filter a log_skeleton on a time interval

    Parameters
    ----------------
    log
        Log object
    dt1
        Left extreme of the interval
    dt2
        Right extreme of the interval
    how
        Modality of filtering (events, traces_contained, traces_intersecting)

    Returns
    ----------------
    filtered_log
        Filtered log_skeleton
    """
    if check_is_dataframe(log):
        from pm4py.algo.filtering.pandas.timestamp import timestamp_filter
        if how == "events":
            return timestamp_filter.apply_events(log, dt1, dt2)
        elif how == "traces_contained":
            return timestamp_filter.filter_traces_contained(log, dt1, dt2)
        elif how == "traces_intersecting":
            return timestamp_filter.filter_traces_intersecting(log, dt1, dt2)
    else:
        from pm4py.algo.filtering.log.timestamp import timestamp_filter
        if how == "events":
            return timestamp_filter.apply_events(log, dt1, dt2)
        elif how == "traces_contained":
            return timestamp_filter.filter_traces_contained(log, dt1, dt2)
        elif how == "traces_intersecting":
            return timestamp_filter.filter_traces_intersecting(log, dt1, dt2)
コード例 #7
0
def apply(dataframe, filter, parameters=None):
    """
    Apply a timestamp filter to the dataframe

    Parameters
    ------------
    dataframe
        Dataframe where the filter should be applied
    filter
        Filter (two timestamps separated by @@@)
    parameters
        Parameters of the algorithm
    """
    if parameters is None:
        parameters = {}

    dt1 = str(datetime.utcfromtimestamp(int(filter[1].split("@@@")[0])))
    dt2 = str(datetime.utcfromtimestamp(int(filter[1].split("@@@")[1])))

    return timestamp_filter.apply_events(dataframe, dt1, dt2, parameters=parameters)