예제 #1
0
def apply_events(log, dt1, dt2, parameters=None):
    """
    Get a new log containing all the events contained in the given interval

    Parameters
    -----------
    log
        Log
    dt1
        Lower bound to the interval
    dt2
        Upper bound to the interval
    parameters
        Possible parameters of the algorithm, including:
            timestamp_key -> Attribute to use as timestamp

    Returns
    ------------
    filtered_log
        Filtered log
    """
    if parameters is None:
        parameters = {}
    timestamp_key = parameters[
        PARAMETER_CONSTANT_TIMESTAMP_KEY] if PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else DEFAULT_TIMESTAMP_KEY
    dt1 = get_dt_from_string(dt1)
    dt2 = get_dt_from_string(dt2)

    stream = log_converter.apply(log, variant=log_converter.TO_EVENT_STREAM)
    filtered_stream = EventStream([x for x in stream if dt1 < x[timestamp_key].replace(tzinfo=None) < dt2])
    filtered_log = log_converter.apply(filtered_stream)

    return filtered_log
def apply_events(df, dt1, dt2, parameters=None):
    """
    Get a new trace log containing all the events contained in the given interval

    Parameters
    ----------
    df
        Pandas dataframe
    dt1
        Lower bound to the interval (possibly expressed as string, but automatically converted)
    dt2
        Upper bound to the interval (possibly expressed as string, but automatically converted)
    parameters
        Possible parameters of the algorithm, including:
            timestamp_key -> Attribute to use as timestamp

    Returns
    ----------
    df
        Filtered dataframe
    """
    if parameters is None:
        parameters = {}

    timestamp_key = parameters[
        PARAMETER_CONSTANT_TIMESTAMP_KEY] if PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else DEFAULT_TIMESTAMP_KEY
    dt1 = get_dt_from_string(dt1)
    dt2 = get_dt_from_string(dt2)

    df = df[df[timestamp_key] > dt1]
    df = df[df[timestamp_key] < dt2]

    return df
예제 #3
0
def filter_traces_contained(log, dt1, dt2, parameters=None):
    """
    Get traces that are contained in the given interval

    Parameters
    -----------
    log
        Trace log_skeleton
    dt1
        Lower bound to the interval
    dt2
        Upper bound to the interval
    parameters
        Possible parameters of the algorithm, including:
            Parameters.TIMESTAMP_KEY -> Attribute to use as timestamp

    Returns
    ------------
    filtered_log
        Filtered log_skeleton
    """
    if parameters is None:
        parameters = {}
    timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY,
                                               parameters,
                                               DEFAULT_TIMESTAMP_KEY)
    dt1 = get_dt_from_string(dt1)
    dt2 = get_dt_from_string(dt2)
    filtered_log = EventLog([
        trace for trace in log if is_contained(trace, dt1, dt2, timestamp_key)
    ])
    return filtered_log
예제 #4
0
def filter_traces_intersecting(log, dt1, dt2, parameters=None):
    """
    Filter traces intersecting the given interval

    Parameters
    -----------
    log
        Trace log
    dt1
        Lower bound to the interval
    dt2
        Upper bound to the interval
    parameters
        Possible parameters of the algorithm, including:
            timestamp_key -> Attribute to use as timestamp

    Returns
    ------------
    filtered_log
        Filtered log
    """
    if parameters is None:
        parameters = {}
    timestamp_key = parameters[
        PARAMETER_CONSTANT_TIMESTAMP_KEY] if PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else DEFAULT_TIMESTAMP_KEY
    dt1 = get_dt_from_string(dt1)
    dt2 = get_dt_from_string(dt2)
    filtered_log = EventLog([trace for trace in log if is_intersecting(trace, dt1, dt2, timestamp_key)])
    return filtered_log
def filter_traces_contained(df, dt1, dt2, parameters=None):
    """Gets traces that are contained in the given interval
    """

    if parameters is None:
        parameters = {}
    timestamp_key = parameters[
        PARAMETER_CONSTANT_TIMESTAMP_KEY] if PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else DEFAULT_TIMESTAMP_KEY
    case_id_glue = parameters[
        PARAMETER_CONSTANT_CASEID_KEY] if PARAMETER_CONSTANT_CASEID_KEY in parameters else CASE_CONCEPT_NAME
    dt1 = get_dt_from_string(dt1)
    dt2 = get_dt_from_string(dt2)
    df_converted = importer.convert_timestamp_to_utc_in_df(
        df, timest_columns={timestamp_key})
    df_ordered = df_converted.orderBy(case_id_glue, timestamp_key)
    w = Window().partitionBy(case_id_glue).orderBy(timestamp_key)
    w2 = Window().partitionBy(case_id_glue).orderBy(F.desc(timestamp_key))
    stacked = df_ordered.withColumn(timestamp_key + "_last",
                                    F.max(df_ordered[timestamp_key]).over(w2))
    stacked = stacked.withColumn(timestamp_key + "_first",
                                 F.min(stacked[timestamp_key]).over(w))
    stacked = stacked.filter(stacked[timestamp_key + "_first"] > dt1)
    stacked = stacked.filter(stacked[timestamp_key + "_last"] < dt2)
    stacked_dropped = stacked.drop(timestamp_key + "_last",
                                   timestamp_key + "_first")

    return stacked_dropped
예제 #6
0
def filter_traces_attribute_in_timeframe(
    log: EventLog,
    attribute: str,
    attribute_value: Any,
    dt1: Union[str, datetime.datetime],
    dt2: Union[str, datetime.datetime],
    parameters: Optional[Dict[Union[str, Parameters],
                              Any]] = None) -> EventLog:
    """
    Get a new log containing all the traces that have an event in the given interval with the specified attribute value 

    Parameters
    -----------
    log
        Log
    attribute
        The attribute to filter on
    attribute_value
        The attribute value to filter on
    dt1
        Lower bound to the interval
    dt2
        Upper bound to the interval
    parameters
        Possible parameters of the algorithm, including:
            Parameters.TIMESTAMP_KEY -> Attribute to use as timestamp

    Returns
    ------------
    filtered_log
        Filtered log
    """
    if parameters is None:
        parameters = {}

    log = log_converter.apply(log,
                              variant=log_converter.Variants.TO_EVENT_LOG,
                              parameters=parameters)

    timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY,
                                               parameters,
                                               DEFAULT_TIMESTAMP_KEY)
    dt1 = get_dt_from_string(dt1)
    dt2 = get_dt_from_string(dt2)

    filtered_log = EventLog([
        trace for trace in log if has_attribute_in_timeframe(
            trace, attribute, attribute_value, dt1, dt2, timestamp_key)
    ],
                            attributes=log.attributes,
                            extensions=log.extensions,
                            omni_present=log.omni_present,
                            classifiers=log.classifiers)
    return filtered_log
예제 #7
0
def filter_traces_intersecting(df, dt1, dt2, parameters=None):
    """
    Filter traces intersecting the given interval

    Parameters
    ----------
    df
        Pandas dataframe
    dt1
        Lower bound to the interval (possibly expressed as string, but automatically converted)
    dt2
        Upper bound to the interval (possibly expressed as string, but automatically converted)
    parameters
        Possible parameters of the algorithm, including:
            Parameters.TIMESTAMP_KEY -> Attribute to use as timestamp
            Parameters.CASE_ID_KEY -> Column that contains the timestamp

    Returns
    ----------
    df
        Filtered dataframe
    """
    if parameters is None:
        parameters = {}
    timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY,
                                               parameters,
                                               DEFAULT_TIMESTAMP_KEY)
    case_id_glue = exec_utils.get_param_value(Parameters.CASE_ID_KEY,
                                              parameters, CASE_CONCEPT_NAME)
    dt1 = get_dt_from_string(dt1)
    dt2 = get_dt_from_string(dt2)
    needs_conversion = check_pandas_ge_024()
    if needs_conversion:
        dt1 = dt1.replace(tzinfo=pytz.utc)
        dt2 = dt2.replace(tzinfo=pytz.utc)
        dt1 = pd.to_datetime(dt1, utc=True)
        dt2 = pd.to_datetime(dt2, utc=True)
    grouped_df = df[[case_id_glue, timestamp_key]].groupby(df[case_id_glue])
    first = grouped_df.first()
    last = grouped_df.last()
    last.columns = [str(col) + '_2' for col in last.columns]
    stacked = pd.concat([first, last], axis=1)
    stacked1 = stacked[stacked[timestamp_key] > dt1]
    stacked1 = stacked1[stacked1[timestamp_key] < dt2]
    stacked2 = stacked[stacked[timestamp_key + "_2"] > dt1]
    stacked2 = stacked2[stacked2[timestamp_key + "_2"] < dt2]
    stacked3 = stacked[stacked[timestamp_key] < dt1]
    stacked3 = stacked3[stacked3[timestamp_key + "_2"] > dt2]
    stacked = pd.concat([stacked1, stacked2, stacked3], axis=0)
    i1 = df.set_index(case_id_glue).index
    i2 = stacked.set_index(case_id_glue).index
    return df[i1.isin(i2)]
예제 #8
0
def apply_events(
    log: EventLog,
    dt1: Union[str, datetime.datetime],
    dt2: Union[str, datetime.datetime],
    parameters: Optional[Dict[Union[str, Parameters],
                              Any]] = None) -> EventLog:
    """
    Get a new log containing all the events contained in the given interval

    Parameters
    -----------
    log
        Log
    dt1
        Lower bound to the interval
    dt2
        Upper bound to the interval
    parameters
        Possible parameters of the algorithm, including:
            Parameters.TIMESTAMP_KEY -> Attribute to use as timestamp

    Returns
    ------------
    filtered_log
        Filtered log
    """
    if parameters is None:
        parameters = {}
    timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY,
                                               parameters,
                                               DEFAULT_TIMESTAMP_KEY)
    dt1 = get_dt_from_string(dt1)
    dt2 = get_dt_from_string(dt2)

    stream = log_converter.apply(log,
                                 variant=log_converter.TO_EVENT_STREAM,
                                 parameters={"deepcopy": False})
    filtered_stream = EventStream([
        x
        for x in stream if dt1 <= x[timestamp_key].replace(tzinfo=None) <= dt2
    ],
                                  attributes=log.attributes,
                                  extensions=log.extensions,
                                  omni_present=log.omni_present,
                                  classifiers=log.classifiers,
                                  properties=log.properties)
    filtered_log = log_converter.apply(
        filtered_stream, variant=log_converter.Variants.TO_EVENT_LOG)

    return filtered_log
예제 #9
0
def filter_on_trace_attribute(
    log: EventLog,
    dt1: Union[str, datetime.datetime],
    dt2: Union[str, datetime.datetime],
    parameters: Optional[Dict[Union[str, Parameters],
                              Any]] = None) -> EventLog:
    """
    Filters the traces of the event log that have a given trace attribute
    falling in the provided range

    Parameters
    -----------------
    log
        Event log
    dt1
        Left extreme of the time interval
    dt2
        Right extreme of the time interval
    parameters
        Parameters of the filtering, including:
        - Parameters.TIMESTAMP_KEY => trace attribute to use for the filtering

    Returns
    ------------------
    filtered_log
        Filtered event log
    """
    if parameters is None:
        parameters = {}

    log = log_converter.apply(log,
                              variant=log_converter.Variants.TO_EVENT_LOG,
                              parameters=parameters)

    trace_attribute = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY,
                                                 parameters,
                                                 DEFAULT_TIMESTAMP_KEY)
    dt1 = get_dt_from_string(dt1)
    dt2 = get_dt_from_string(dt2)
    filtered_log = EventLog([
        trace for trace in log
        if trace_attr_is_contained(trace, dt1, dt2, trace_attribute)
    ],
                            attributes=log.attributes,
                            extensions=log.extensions,
                            omni_present=log.omni_present,
                            classifiers=log.classifiers,
                            properties=log.properties)
    return filtered_log
예제 #10
0
def apply_events(df, dt1, dt2, parameters=None):
    """Gets a new Spark DataFrame with all the events contained in the given interval
    """

    if parameters is None:
        parameters = {}
    timestamp_key = parameters[
        PARAMETER_CONSTANT_TIMESTAMP_KEY] if PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else DEFAULT_TIMESTAMP_KEY

    if df.schema[timestamp_key].dataType != StringType():
        dt1 = get_dt_from_string(dt1)
        dt2 = get_dt_from_string(dt2)
    filtered_df = df.filter((df[timestamp_key] > dt1)
                            & (df[timestamp_key] < dt2))

    return filtered_df
예제 #11
0
def filter_traces_intersecting(
    log: EventLog,
    dt1: Union[str, datetime.datetime],
    dt2: Union[str, datetime.datetime],
    parameters: Optional[Dict[Union[str, Parameters],
                              Any]] = None) -> EventLog:
    """
    Filter traces intersecting the given interval

    Parameters
    -----------
    log
        Trace log
    dt1
        Lower bound to the interval
    dt2
        Upper bound to the interval
    parameters
        Possible parameters of the algorithm, including:
            Parameters.TIMESTAMP_KEY -> Attribute to use as timestamp

    Returns
    ------------
    filtered_log
        Filtered log
    """
    if parameters is None:
        parameters = {}

    log = log_converter.apply(log,
                              variant=log_converter.Variants.TO_EVENT_LOG,
                              parameters=parameters)

    timestamp_key = parameters[
        PARAMETER_CONSTANT_TIMESTAMP_KEY] if PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else DEFAULT_TIMESTAMP_KEY
    dt1 = get_dt_from_string(dt1)
    dt2 = get_dt_from_string(dt2)
    filtered_log = EventLog([
        trace
        for trace in log if is_intersecting(trace, dt1, dt2, timestamp_key)
    ],
                            attributes=log.attributes,
                            extensions=log.extensions,
                            omni_present=log.omni_present,
                            classifiers=log.classifiers,
                            properties=log.properties)
    return filtered_log
예제 #12
0
def filter_traces_contained(df, dt1, dt2, parameters=None):
    """
    Get traces that are contained in the given interval

    Parameters
    ----------
    df
        Pandas dataframe
    dt1
        Lower bound to the interval (possibly expressed as string, but automatically converted)
    dt2
        Upper bound to the interval (possibly expressed as string, but automatically converted)
    parameters
        Possible parameters of the algorithm, including:
            timestamp_key -> Attribute to use as timestamp
            case_id_glue -> Column that contains the timestamp

    Returns
    ----------
    df
        Filtered dataframe
    """
    if parameters is None:
        parameters = {}
    timestamp_key = parameters[
        PARAMETER_CONSTANT_TIMESTAMP_KEY] if PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else DEFAULT_TIMESTAMP_KEY
    case_id_glue = parameters[
        PARAMETER_CONSTANT_CASEID_KEY] if PARAMETER_CONSTANT_CASEID_KEY in parameters else CASE_CONCEPT_NAME
    dt1 = get_dt_from_string(dt1)
    dt2 = get_dt_from_string(dt2)
    needs_conversion = check_pandas_ge_024()
    if needs_conversion:
        dt1 = dt1.replace(tzinfo=pytz.utc)
        dt2 = dt2.replace(tzinfo=pytz.utc)
        dt1 = pd.to_datetime(dt1, utc=True)
        dt2 = pd.to_datetime(dt2, utc=True)
    grouped_df = df[[case_id_glue, timestamp_key]].groupby(df[case_id_glue])
    first = grouped_df.first()
    last = grouped_df.last()
    last.columns = [str(col) + '_2' for col in last.columns]
    stacked = pd.concat([first, last], axis=1)
    stacked = stacked[stacked[timestamp_key] > dt1]
    stacked = stacked[stacked[timestamp_key + "_2"] < dt2]
    i1 = df.set_index(case_id_glue).index
    i2 = stacked.set_index(case_id_glue).index
    return df[i1.isin(i2)]
def filter_traces_intersecting(df, dt1, dt2, parameters=None):
    """
    Filter traces intersecting the given interval

    Parameters
    ----------
    df
        Pandas dataframe
    dt1
        Lower bound to the interval (possibly expressed as string, but automatically converted)
    dt2
        Upper bound to the interval (possibly expressed as string, but automatically converted)
    parameters
        Possible parameters of the algorithm, including:
            timestamp_key -> Attribute to use as timestamp
            case_id_glue -> Column that contains the timestamp

    Returns
    ----------
    df
        Filtered dataframe
    """
    if parameters is None:
        parameters = {}
    timestamp_key = parameters[
        PARAMETER_CONSTANT_TIMESTAMP_KEY] if PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else DEFAULT_TIMESTAMP_KEY
    case_id_glue = parameters[
        PARAMETER_CONSTANT_CASEID_KEY] if PARAMETER_CONSTANT_CASEID_KEY in parameters else CASE_CONCEPT_NAME
    dt1 = get_dt_from_string(dt1)
    dt2 = get_dt_from_string(dt2)
    grouped_df = df[[case_id_glue, timestamp_key]].groupby(df[case_id_glue])
    first = grouped_df.first()
    last = grouped_df.last()
    last.columns = [str(col) + '_2' for col in last.columns]
    stacked = pd.concat([first, last], axis=1)
    stacked1 = stacked[stacked[timestamp_key] > dt1]
    stacked1 = stacked1[stacked1[timestamp_key] < dt2]
    stacked2 = stacked[stacked[timestamp_key + "_2"] > dt1]
    stacked2 = stacked2[stacked2[timestamp_key + "_2"] < dt2]
    stacked3 = stacked[stacked[timestamp_key] < dt1]
    stacked3 = stacked3[stacked3[timestamp_key + "_2"] > dt2]
    stacked = pd.concat([stacked1, stacked2, stacked3], axis=0)
    i1 = df.set_index(case_id_glue).index
    i2 = stacked.set_index(case_id_glue).index
    return df[i1.isin(i2)]
예제 #14
0
def filter_traces_contained(df: pd.DataFrame, dt1: Union[str, datetime.datetime], dt2: Union[str, datetime.datetime], parameters: Optional[Dict[Union[str, Parameters], Any]] = None) -> pd.DataFrame:
    """
    Get traces that are contained in the given interval

    Parameters
    ----------
    df
        Pandas dataframe
    dt1
        Lower bound to the interval (possibly expressed as string, but automatically converted)
    dt2
        Upper bound to the interval (possibly expressed as string, but automatically converted)
    parameters
        Possible parameters of the algorithm, including:
            Parameters.TIMESTAMP_KEY -> Attribute to use as timestamp
            Parameters.CASE_ID_KEY -> Column that contains the timestamp

    Returns
    ----------
    df
        Filtered dataframe
    """
    if parameters is None:
        parameters = {}
    timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters, DEFAULT_TIMESTAMP_KEY)
    case_id_glue = exec_utils.get_param_value(Parameters.CASE_ID_KEY, parameters, CASE_CONCEPT_NAME)
    dt1 = get_dt_from_string(dt1)
    dt2 = get_dt_from_string(dt2)
    dt1 = dt1.replace(tzinfo=pytz.utc)
    dt2 = dt2.replace(tzinfo=pytz.utc)
    dt1 = pd.to_datetime(dt1, utc=True)
    dt2 = pd.to_datetime(dt2, utc=True)
    grouped_df = df[[case_id_glue, timestamp_key]].groupby(df[case_id_glue])
    first = grouped_df.first()
    last = grouped_df.last()
    last.columns = [str(col) + '_2' for col in last.columns]
    stacked = pd.concat([first, last], axis=1)
    stacked = stacked[stacked[timestamp_key] >= dt1]
    stacked = stacked[stacked[timestamp_key + "_2"] <= dt2]
    i1 = df.set_index(case_id_glue).index
    i2 = stacked.set_index(case_id_glue).index
    ret = df[i1.isin(i2)]

    ret.attrs = copy(df.attrs) if hasattr(df, 'attrs') else {}
    return ret
예제 #15
0
def filter_traces_attribute_in_timeframe(df: pd.DataFrame, attribute: str, attribute_value: str, dt1: Union[str, datetime.datetime], dt2: Union[str, datetime.datetime], parameters: Optional[Dict[Union[str, Parameters], Any]] = None) -> pd.DataFrame:
    """
    Get a new log containing all the traces that have an event in the given interval with the specified attribute value 

    Parameters
    -----------
    df
        Dataframe
    attribute
        The attribute to filter on
    attribute_value
        The attribute value to filter on
    dt1
        Lower bound to the interval
    dt2
        Upper bound to the interval
    parameters
        Possible parameters of the algorithm, including:
            Parameters.TIMESTAMP_KEY -> Attribute to use as timestamp

    Returns
    ------------
    df
        Filtered dataframe
    """

    if parameters is None:
        parameters = {}
    timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters, DEFAULT_TIMESTAMP_KEY)
    case_id_glue = exec_utils.get_param_value(Parameters.CASE_ID_KEY, parameters, CASE_CONCEPT_NAME)
    
    dt1 = get_dt_from_string(dt1)
    dt2 = get_dt_from_string(dt2)
    dt1 = dt1.replace(tzinfo=pytz.utc)
    dt2 = dt2.replace(tzinfo=pytz.utc)
    dt1 = pd.to_datetime(dt1, utc=True)
    dt2 = pd.to_datetime(dt2, utc=True)
    
    filtered = df[df[attribute] == attribute_value]
    filtered = filtered[filtered[timestamp_key] >= dt1]
    filtered = filtered[filtered[timestamp_key] <= dt2]
    filtered = df[df[case_id_glue].isin(filtered[case_id_glue])]

    filtered.attrs = copy(df.attrs) if hasattr(df, 'attrs') else {}
    return filtered
예제 #16
0
def apply_events(df, dt1, dt2, parameters=None):
    """
    Get a new log containing all the events contained in the given interval

    Parameters
    ----------
    df
        Pandas dataframe
    dt1
        Lower bound to the interval (possibly expressed as string, but automatically converted)
    dt2
        Upper bound to the interval (possibly expressed as string, but automatically converted)
    parameters
        Possible parameters of the algorithm, including:
            Parameters.TIMESTAMP_KEY -> Attribute to use as timestamp

    Returns
    ----------
    df
        Filtered dataframe
    """
    if parameters is None:
        parameters = {}

    timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY,
                                               parameters,
                                               DEFAULT_TIMESTAMP_KEY)
    dt1 = get_dt_from_string(dt1)
    dt2 = get_dt_from_string(dt2)
    needs_conversion = check_pandas_ge_024()
    if needs_conversion:
        dt1 = dt1.replace(tzinfo=pytz.utc)
        dt2 = dt2.replace(tzinfo=pytz.utc)
        dt1 = pd.to_datetime(dt1, utc=True)
        dt2 = pd.to_datetime(dt2, utc=True)

    df = df[df[timestamp_key] > dt1]
    df = df[df[timestamp_key] < dt2]

    return df
예제 #17
0
def apply_timestamp(ocel: OCEL,
                    min_timest: Union[datetime.datetime, str],
                    max_timest: Union[datetime.datetime, str],
                    parameters: Optional[Dict[Any, Any]] = None) -> OCEL:
    """
    Filters the object-centric event log keeping events in the provided timestamp range

    Parameters
    -----------------
    ocel
        Object-centric event log
    min_timest
        Left extreme of the allowed timestamp interval (provided in the format: YYYY-mm-dd HH:MM:SS)
    max_timest
        Right extreme of the allowed timestamp interval (provided in the format: YYYY-mm-dd HH:MM:SS)
    parameters
        Parameters of the algorithm, including:
        - Parameters.TIMESTAMP_KEY => the attribute to use as timestamp

    Returns
    -----------------
    filtered_ocel
        Filtered object-centric event log
    """
    if parameters is None:
        parameters = {}

    timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY,
                                               parameters,
                                               ocel.event_timestamp)
    min_timest = get_dt_from_string(min_timest)
    max_timest = get_dt_from_string(max_timest)

    ocel = copy(ocel)
    ocel.events = ocel.events[ocel.events[timestamp_key] >= min_timest]
    ocel.events = ocel.events[ocel.events[timestamp_key] <= max_timest]

    return filtering_utils.propagate_event_filtering(ocel,
                                                     parameters=parameters)
예제 #18
0
def apply_events(df: pd.DataFrame, dt1: Union[str, datetime.datetime], dt2: Union[str, datetime.datetime], parameters: Optional[Dict[Union[str, Parameters], Any]] = None) -> pd.DataFrame:
    """
    Get a new log containing all the events contained in the given interval

    Parameters
    ----------
    df
        Pandas dataframe
    dt1
        Lower bound to the interval (possibly expressed as string, but automatically converted)
    dt2
        Upper bound to the interval (possibly expressed as string, but automatically converted)
    parameters
        Possible parameters of the algorithm, including:
            Parameters.TIMESTAMP_KEY -> Attribute to use as timestamp

    Returns
    ----------
    df
        Filtered dataframe
    """
    if parameters is None:
        parameters = {}

    timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters, DEFAULT_TIMESTAMP_KEY)
    dt1 = get_dt_from_string(dt1)
    dt2 = get_dt_from_string(dt2)
    dt1 = dt1.replace(tzinfo=pytz.utc)
    dt2 = dt2.replace(tzinfo=pytz.utc)
    dt1 = pd.to_datetime(dt1, utc=True)
    dt2 = pd.to_datetime(dt2, utc=True)

    ret = df[df[timestamp_key] >= dt1]
    ret = ret[ret[timestamp_key] <= dt2]

    ret.attrs = copy(df.attrs) if hasattr(df, 'attrs') else {}
    return ret
예제 #19
0
def apply_events(trace_log, dt1, dt2, parameters=None):
    """
    Get a new trace log containing all the events contained in the given interval

    Parameters
    -----------
    trace_log
        Trace log
    dt1
        Lower bound to the interval
    dt2
        Upper bound to the interval
    parameters
        Possible parameters of the algorithm, including:
            timestamp_key -> Attribute to use as timestamp

    Returns
    ------------
    filtered_log
        Filtered trace log
    """
    if parameters is None:
        parameters = {}
    timestamp_key = parameters[
        PARAMETER_CONSTANT_TIMESTAMP_KEY] if PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else DEFAULT_TIMESTAMP_KEY
    dt1 = get_dt_from_string(dt1)
    dt2 = get_dt_from_string(dt2)

    event_log = transform.transform_trace_log_to_event_log(trace_log)
    filtered_event_log = EventLog([
        x for x in event_log
        if dt1 < x[timestamp_key].replace(tzinfo=None) < dt2
    ])
    filtered_trace_log = transform.transform_event_log_to_trace_log(
        filtered_event_log)

    return filtered_trace_log
예제 #20
0
def modi_apply_events(log, dt1, dt2, parameters=None):
    """
    Get a new log containing all the events contained in the given interval

    Parameters
    -----------
    log
        Log
    dt1
        Lower bound to the interval
    dt2
        Upper bound to the interval
    parameters
        Possible parameters of the algorithm, including:
            timestamp_key -> Attribute to use as timestamp

    Returns
    ------------
    filtered_log
        Filtered log
    """
    if parameters is None:
        parameters = {}
    timestamp_key = parameters[
        PARAMETER_CONSTANT_TIMESTAMP_KEY] if PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else DEFAULT_TIMESTAMP_KEY
    dt1 = get_dt_from_string(dt1)
    dt2 = get_dt_from_string(dt2)

    stream = log_converter.apply(log, variant=log_converter.TO_EVENT_STREAM)
    valid_stream = list()
    for i, x in enumerate(stream):
        if i != len(stream) and x['lifecycle:transition'] == 'start' and stream[i+1]['lifecycle:transition'] == 'complete':
            if x[timestamp_key].replace(tzinfo=None) < dt1 and stream[i+1][timestamp_key].replace(tzinfo=None) > dt1:
                x[timestamp_key] = dt1
                valid_stream.append(x)
            elif dt1 < x[timestamp_key].replace(tzinfo=None) < dt2:
                x[timestamp_key] = x[timestamp_key].replace(tzinfo=None)
                valid_stream.append(x)

        elif i != 0 and x['lifecycle:transition'] == 'complete' and stream[i-1]['lifecycle:transition'] == 'start':
            if x[timestamp_key].replace(tzinfo=None) > dt2 and stream[i-1][timestamp_key].replace(tzinfo=None) < dt2:
                x[timestamp_key] = dt2
                valid_stream.append(x)
            elif dt1 < x[timestamp_key].replace(tzinfo=None) < dt2:
                x[timestamp_key] = x[timestamp_key].replace(tzinfo=None)
                valid_stream.append(x)
        """
        if dt1 < x[timestamp_key].replace(tzinfo=None) < dt2:
            x[timestamp_key] = x[timestamp_key].replace(tzinfo=None)
            valid_stream.append(x)
        else:
            if i != len(stream) and x['lifecycle:transition'] == 'start' and stream[i+1]['lifecycle:transition'] == 'complete':
                if x[timestamp_key].replace(tzinfo=None) < dt1 and stream[i+1][timestamp_key].replace(tzinfo=None) < dt2:
                    x[timestamp_key] = dt1
                    valid_stream.append(x)
            elif i != 0 and x['lifecycle:transition'] == 'complete' and stream[i-1]['lifecycle:transition'] == 'start':
                if x[timestamp_key].replace(tzinfo=None) > dt2 and stream[i-1][timestamp_key].replace(tzinfo=None) > dt1:
                    x[timestamp_key] = dt2
                    valid_stream.append(x)
        """




    filtered_stream = EventStream(valid_stream)
    filtered_log = log_converter.apply(filtered_stream)

    return filtered_log