def apply_events(log, dt1, dt2, parameters=None): """ Get a new log containing all the events contained in the given interval Parameters ----------- log Log dt1 Lower bound to the interval dt2 Upper bound to the interval parameters Possible parameters of the algorithm, including: timestamp_key -> Attribute to use as timestamp Returns ------------ filtered_log Filtered log """ if parameters is None: parameters = {} timestamp_key = parameters[ PARAMETER_CONSTANT_TIMESTAMP_KEY] if PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else DEFAULT_TIMESTAMP_KEY dt1 = get_dt_from_string(dt1) dt2 = get_dt_from_string(dt2) stream = log_converter.apply(log, variant=log_converter.TO_EVENT_STREAM) filtered_stream = EventStream([x for x in stream if dt1 < x[timestamp_key].replace(tzinfo=None) < dt2]) filtered_log = log_converter.apply(filtered_stream) return filtered_log
def apply_events(df, dt1, dt2, parameters=None): """ Get a new trace log containing all the events contained in the given interval Parameters ---------- df Pandas dataframe dt1 Lower bound to the interval (possibly expressed as string, but automatically converted) dt2 Upper bound to the interval (possibly expressed as string, but automatically converted) parameters Possible parameters of the algorithm, including: timestamp_key -> Attribute to use as timestamp Returns ---------- df Filtered dataframe """ if parameters is None: parameters = {} timestamp_key = parameters[ PARAMETER_CONSTANT_TIMESTAMP_KEY] if PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else DEFAULT_TIMESTAMP_KEY dt1 = get_dt_from_string(dt1) dt2 = get_dt_from_string(dt2) df = df[df[timestamp_key] > dt1] df = df[df[timestamp_key] < dt2] return df
def filter_traces_contained(log, dt1, dt2, parameters=None): """ Get traces that are contained in the given interval Parameters ----------- log Trace log_skeleton dt1 Lower bound to the interval dt2 Upper bound to the interval parameters Possible parameters of the algorithm, including: Parameters.TIMESTAMP_KEY -> Attribute to use as timestamp Returns ------------ filtered_log Filtered log_skeleton """ if parameters is None: parameters = {} timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters, DEFAULT_TIMESTAMP_KEY) dt1 = get_dt_from_string(dt1) dt2 = get_dt_from_string(dt2) filtered_log = EventLog([ trace for trace in log if is_contained(trace, dt1, dt2, timestamp_key) ]) return filtered_log
def filter_traces_intersecting(log, dt1, dt2, parameters=None): """ Filter traces intersecting the given interval Parameters ----------- log Trace log dt1 Lower bound to the interval dt2 Upper bound to the interval parameters Possible parameters of the algorithm, including: timestamp_key -> Attribute to use as timestamp Returns ------------ filtered_log Filtered log """ if parameters is None: parameters = {} timestamp_key = parameters[ PARAMETER_CONSTANT_TIMESTAMP_KEY] if PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else DEFAULT_TIMESTAMP_KEY dt1 = get_dt_from_string(dt1) dt2 = get_dt_from_string(dt2) filtered_log = EventLog([trace for trace in log if is_intersecting(trace, dt1, dt2, timestamp_key)]) return filtered_log
def filter_traces_contained(df, dt1, dt2, parameters=None): """Gets traces that are contained in the given interval """ if parameters is None: parameters = {} timestamp_key = parameters[ PARAMETER_CONSTANT_TIMESTAMP_KEY] if PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else DEFAULT_TIMESTAMP_KEY case_id_glue = parameters[ PARAMETER_CONSTANT_CASEID_KEY] if PARAMETER_CONSTANT_CASEID_KEY in parameters else CASE_CONCEPT_NAME dt1 = get_dt_from_string(dt1) dt2 = get_dt_from_string(dt2) df_converted = importer.convert_timestamp_to_utc_in_df( df, timest_columns={timestamp_key}) df_ordered = df_converted.orderBy(case_id_glue, timestamp_key) w = Window().partitionBy(case_id_glue).orderBy(timestamp_key) w2 = Window().partitionBy(case_id_glue).orderBy(F.desc(timestamp_key)) stacked = df_ordered.withColumn(timestamp_key + "_last", F.max(df_ordered[timestamp_key]).over(w2)) stacked = stacked.withColumn(timestamp_key + "_first", F.min(stacked[timestamp_key]).over(w)) stacked = stacked.filter(stacked[timestamp_key + "_first"] > dt1) stacked = stacked.filter(stacked[timestamp_key + "_last"] < dt2) stacked_dropped = stacked.drop(timestamp_key + "_last", timestamp_key + "_first") return stacked_dropped
def filter_traces_attribute_in_timeframe( log: EventLog, attribute: str, attribute_value: Any, dt1: Union[str, datetime.datetime], dt2: Union[str, datetime.datetime], parameters: Optional[Dict[Union[str, Parameters], Any]] = None) -> EventLog: """ Get a new log containing all the traces that have an event in the given interval with the specified attribute value Parameters ----------- log Log attribute The attribute to filter on attribute_value The attribute value to filter on dt1 Lower bound to the interval dt2 Upper bound to the interval parameters Possible parameters of the algorithm, including: Parameters.TIMESTAMP_KEY -> Attribute to use as timestamp Returns ------------ filtered_log Filtered log """ if parameters is None: parameters = {} log = log_converter.apply(log, variant=log_converter.Variants.TO_EVENT_LOG, parameters=parameters) timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters, DEFAULT_TIMESTAMP_KEY) dt1 = get_dt_from_string(dt1) dt2 = get_dt_from_string(dt2) filtered_log = EventLog([ trace for trace in log if has_attribute_in_timeframe( trace, attribute, attribute_value, dt1, dt2, timestamp_key) ], attributes=log.attributes, extensions=log.extensions, omni_present=log.omni_present, classifiers=log.classifiers) return filtered_log
def filter_traces_intersecting(df, dt1, dt2, parameters=None): """ Filter traces intersecting the given interval Parameters ---------- df Pandas dataframe dt1 Lower bound to the interval (possibly expressed as string, but automatically converted) dt2 Upper bound to the interval (possibly expressed as string, but automatically converted) parameters Possible parameters of the algorithm, including: Parameters.TIMESTAMP_KEY -> Attribute to use as timestamp Parameters.CASE_ID_KEY -> Column that contains the timestamp Returns ---------- df Filtered dataframe """ if parameters is None: parameters = {} timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters, DEFAULT_TIMESTAMP_KEY) case_id_glue = exec_utils.get_param_value(Parameters.CASE_ID_KEY, parameters, CASE_CONCEPT_NAME) dt1 = get_dt_from_string(dt1) dt2 = get_dt_from_string(dt2) needs_conversion = check_pandas_ge_024() if needs_conversion: dt1 = dt1.replace(tzinfo=pytz.utc) dt2 = dt2.replace(tzinfo=pytz.utc) dt1 = pd.to_datetime(dt1, utc=True) dt2 = pd.to_datetime(dt2, utc=True) grouped_df = df[[case_id_glue, timestamp_key]].groupby(df[case_id_glue]) first = grouped_df.first() last = grouped_df.last() last.columns = [str(col) + '_2' for col in last.columns] stacked = pd.concat([first, last], axis=1) stacked1 = stacked[stacked[timestamp_key] > dt1] stacked1 = stacked1[stacked1[timestamp_key] < dt2] stacked2 = stacked[stacked[timestamp_key + "_2"] > dt1] stacked2 = stacked2[stacked2[timestamp_key + "_2"] < dt2] stacked3 = stacked[stacked[timestamp_key] < dt1] stacked3 = stacked3[stacked3[timestamp_key + "_2"] > dt2] stacked = pd.concat([stacked1, stacked2, stacked3], axis=0) i1 = df.set_index(case_id_glue).index i2 = stacked.set_index(case_id_glue).index return df[i1.isin(i2)]
def apply_events( log: EventLog, dt1: Union[str, datetime.datetime], dt2: Union[str, datetime.datetime], parameters: Optional[Dict[Union[str, Parameters], Any]] = None) -> EventLog: """ Get a new log containing all the events contained in the given interval Parameters ----------- log Log dt1 Lower bound to the interval dt2 Upper bound to the interval parameters Possible parameters of the algorithm, including: Parameters.TIMESTAMP_KEY -> Attribute to use as timestamp Returns ------------ filtered_log Filtered log """ if parameters is None: parameters = {} timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters, DEFAULT_TIMESTAMP_KEY) dt1 = get_dt_from_string(dt1) dt2 = get_dt_from_string(dt2) stream = log_converter.apply(log, variant=log_converter.TO_EVENT_STREAM, parameters={"deepcopy": False}) filtered_stream = EventStream([ x for x in stream if dt1 <= x[timestamp_key].replace(tzinfo=None) <= dt2 ], attributes=log.attributes, extensions=log.extensions, omni_present=log.omni_present, classifiers=log.classifiers, properties=log.properties) filtered_log = log_converter.apply( filtered_stream, variant=log_converter.Variants.TO_EVENT_LOG) return filtered_log
def filter_on_trace_attribute( log: EventLog, dt1: Union[str, datetime.datetime], dt2: Union[str, datetime.datetime], parameters: Optional[Dict[Union[str, Parameters], Any]] = None) -> EventLog: """ Filters the traces of the event log that have a given trace attribute falling in the provided range Parameters ----------------- log Event log dt1 Left extreme of the time interval dt2 Right extreme of the time interval parameters Parameters of the filtering, including: - Parameters.TIMESTAMP_KEY => trace attribute to use for the filtering Returns ------------------ filtered_log Filtered event log """ if parameters is None: parameters = {} log = log_converter.apply(log, variant=log_converter.Variants.TO_EVENT_LOG, parameters=parameters) trace_attribute = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters, DEFAULT_TIMESTAMP_KEY) dt1 = get_dt_from_string(dt1) dt2 = get_dt_from_string(dt2) filtered_log = EventLog([ trace for trace in log if trace_attr_is_contained(trace, dt1, dt2, trace_attribute) ], attributes=log.attributes, extensions=log.extensions, omni_present=log.omni_present, classifiers=log.classifiers, properties=log.properties) return filtered_log
def apply_events(df, dt1, dt2, parameters=None): """Gets a new Spark DataFrame with all the events contained in the given interval """ if parameters is None: parameters = {} timestamp_key = parameters[ PARAMETER_CONSTANT_TIMESTAMP_KEY] if PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else DEFAULT_TIMESTAMP_KEY if df.schema[timestamp_key].dataType != StringType(): dt1 = get_dt_from_string(dt1) dt2 = get_dt_from_string(dt2) filtered_df = df.filter((df[timestamp_key] > dt1) & (df[timestamp_key] < dt2)) return filtered_df
def filter_traces_intersecting( log: EventLog, dt1: Union[str, datetime.datetime], dt2: Union[str, datetime.datetime], parameters: Optional[Dict[Union[str, Parameters], Any]] = None) -> EventLog: """ Filter traces intersecting the given interval Parameters ----------- log Trace log dt1 Lower bound to the interval dt2 Upper bound to the interval parameters Possible parameters of the algorithm, including: Parameters.TIMESTAMP_KEY -> Attribute to use as timestamp Returns ------------ filtered_log Filtered log """ if parameters is None: parameters = {} log = log_converter.apply(log, variant=log_converter.Variants.TO_EVENT_LOG, parameters=parameters) timestamp_key = parameters[ PARAMETER_CONSTANT_TIMESTAMP_KEY] if PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else DEFAULT_TIMESTAMP_KEY dt1 = get_dt_from_string(dt1) dt2 = get_dt_from_string(dt2) filtered_log = EventLog([ trace for trace in log if is_intersecting(trace, dt1, dt2, timestamp_key) ], attributes=log.attributes, extensions=log.extensions, omni_present=log.omni_present, classifiers=log.classifiers, properties=log.properties) return filtered_log
def filter_traces_contained(df, dt1, dt2, parameters=None): """ Get traces that are contained in the given interval Parameters ---------- df Pandas dataframe dt1 Lower bound to the interval (possibly expressed as string, but automatically converted) dt2 Upper bound to the interval (possibly expressed as string, but automatically converted) parameters Possible parameters of the algorithm, including: timestamp_key -> Attribute to use as timestamp case_id_glue -> Column that contains the timestamp Returns ---------- df Filtered dataframe """ if parameters is None: parameters = {} timestamp_key = parameters[ PARAMETER_CONSTANT_TIMESTAMP_KEY] if PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else DEFAULT_TIMESTAMP_KEY case_id_glue = parameters[ PARAMETER_CONSTANT_CASEID_KEY] if PARAMETER_CONSTANT_CASEID_KEY in parameters else CASE_CONCEPT_NAME dt1 = get_dt_from_string(dt1) dt2 = get_dt_from_string(dt2) needs_conversion = check_pandas_ge_024() if needs_conversion: dt1 = dt1.replace(tzinfo=pytz.utc) dt2 = dt2.replace(tzinfo=pytz.utc) dt1 = pd.to_datetime(dt1, utc=True) dt2 = pd.to_datetime(dt2, utc=True) grouped_df = df[[case_id_glue, timestamp_key]].groupby(df[case_id_glue]) first = grouped_df.first() last = grouped_df.last() last.columns = [str(col) + '_2' for col in last.columns] stacked = pd.concat([first, last], axis=1) stacked = stacked[stacked[timestamp_key] > dt1] stacked = stacked[stacked[timestamp_key + "_2"] < dt2] i1 = df.set_index(case_id_glue).index i2 = stacked.set_index(case_id_glue).index return df[i1.isin(i2)]
def filter_traces_intersecting(df, dt1, dt2, parameters=None): """ Filter traces intersecting the given interval Parameters ---------- df Pandas dataframe dt1 Lower bound to the interval (possibly expressed as string, but automatically converted) dt2 Upper bound to the interval (possibly expressed as string, but automatically converted) parameters Possible parameters of the algorithm, including: timestamp_key -> Attribute to use as timestamp case_id_glue -> Column that contains the timestamp Returns ---------- df Filtered dataframe """ if parameters is None: parameters = {} timestamp_key = parameters[ PARAMETER_CONSTANT_TIMESTAMP_KEY] if PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else DEFAULT_TIMESTAMP_KEY case_id_glue = parameters[ PARAMETER_CONSTANT_CASEID_KEY] if PARAMETER_CONSTANT_CASEID_KEY in parameters else CASE_CONCEPT_NAME dt1 = get_dt_from_string(dt1) dt2 = get_dt_from_string(dt2) grouped_df = df[[case_id_glue, timestamp_key]].groupby(df[case_id_glue]) first = grouped_df.first() last = grouped_df.last() last.columns = [str(col) + '_2' for col in last.columns] stacked = pd.concat([first, last], axis=1) stacked1 = stacked[stacked[timestamp_key] > dt1] stacked1 = stacked1[stacked1[timestamp_key] < dt2] stacked2 = stacked[stacked[timestamp_key + "_2"] > dt1] stacked2 = stacked2[stacked2[timestamp_key + "_2"] < dt2] stacked3 = stacked[stacked[timestamp_key] < dt1] stacked3 = stacked3[stacked3[timestamp_key + "_2"] > dt2] stacked = pd.concat([stacked1, stacked2, stacked3], axis=0) i1 = df.set_index(case_id_glue).index i2 = stacked.set_index(case_id_glue).index return df[i1.isin(i2)]
def filter_traces_contained(df: pd.DataFrame, dt1: Union[str, datetime.datetime], dt2: Union[str, datetime.datetime], parameters: Optional[Dict[Union[str, Parameters], Any]] = None) -> pd.DataFrame: """ Get traces that are contained in the given interval Parameters ---------- df Pandas dataframe dt1 Lower bound to the interval (possibly expressed as string, but automatically converted) dt2 Upper bound to the interval (possibly expressed as string, but automatically converted) parameters Possible parameters of the algorithm, including: Parameters.TIMESTAMP_KEY -> Attribute to use as timestamp Parameters.CASE_ID_KEY -> Column that contains the timestamp Returns ---------- df Filtered dataframe """ if parameters is None: parameters = {} timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters, DEFAULT_TIMESTAMP_KEY) case_id_glue = exec_utils.get_param_value(Parameters.CASE_ID_KEY, parameters, CASE_CONCEPT_NAME) dt1 = get_dt_from_string(dt1) dt2 = get_dt_from_string(dt2) dt1 = dt1.replace(tzinfo=pytz.utc) dt2 = dt2.replace(tzinfo=pytz.utc) dt1 = pd.to_datetime(dt1, utc=True) dt2 = pd.to_datetime(dt2, utc=True) grouped_df = df[[case_id_glue, timestamp_key]].groupby(df[case_id_glue]) first = grouped_df.first() last = grouped_df.last() last.columns = [str(col) + '_2' for col in last.columns] stacked = pd.concat([first, last], axis=1) stacked = stacked[stacked[timestamp_key] >= dt1] stacked = stacked[stacked[timestamp_key + "_2"] <= dt2] i1 = df.set_index(case_id_glue).index i2 = stacked.set_index(case_id_glue).index ret = df[i1.isin(i2)] ret.attrs = copy(df.attrs) if hasattr(df, 'attrs') else {} return ret
def filter_traces_attribute_in_timeframe(df: pd.DataFrame, attribute: str, attribute_value: str, dt1: Union[str, datetime.datetime], dt2: Union[str, datetime.datetime], parameters: Optional[Dict[Union[str, Parameters], Any]] = None) -> pd.DataFrame: """ Get a new log containing all the traces that have an event in the given interval with the specified attribute value Parameters ----------- df Dataframe attribute The attribute to filter on attribute_value The attribute value to filter on dt1 Lower bound to the interval dt2 Upper bound to the interval parameters Possible parameters of the algorithm, including: Parameters.TIMESTAMP_KEY -> Attribute to use as timestamp Returns ------------ df Filtered dataframe """ if parameters is None: parameters = {} timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters, DEFAULT_TIMESTAMP_KEY) case_id_glue = exec_utils.get_param_value(Parameters.CASE_ID_KEY, parameters, CASE_CONCEPT_NAME) dt1 = get_dt_from_string(dt1) dt2 = get_dt_from_string(dt2) dt1 = dt1.replace(tzinfo=pytz.utc) dt2 = dt2.replace(tzinfo=pytz.utc) dt1 = pd.to_datetime(dt1, utc=True) dt2 = pd.to_datetime(dt2, utc=True) filtered = df[df[attribute] == attribute_value] filtered = filtered[filtered[timestamp_key] >= dt1] filtered = filtered[filtered[timestamp_key] <= dt2] filtered = df[df[case_id_glue].isin(filtered[case_id_glue])] filtered.attrs = copy(df.attrs) if hasattr(df, 'attrs') else {} return filtered
def apply_events(df, dt1, dt2, parameters=None): """ Get a new log containing all the events contained in the given interval Parameters ---------- df Pandas dataframe dt1 Lower bound to the interval (possibly expressed as string, but automatically converted) dt2 Upper bound to the interval (possibly expressed as string, but automatically converted) parameters Possible parameters of the algorithm, including: Parameters.TIMESTAMP_KEY -> Attribute to use as timestamp Returns ---------- df Filtered dataframe """ if parameters is None: parameters = {} timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters, DEFAULT_TIMESTAMP_KEY) dt1 = get_dt_from_string(dt1) dt2 = get_dt_from_string(dt2) needs_conversion = check_pandas_ge_024() if needs_conversion: dt1 = dt1.replace(tzinfo=pytz.utc) dt2 = dt2.replace(tzinfo=pytz.utc) dt1 = pd.to_datetime(dt1, utc=True) dt2 = pd.to_datetime(dt2, utc=True) df = df[df[timestamp_key] > dt1] df = df[df[timestamp_key] < dt2] return df
def apply_timestamp(ocel: OCEL, min_timest: Union[datetime.datetime, str], max_timest: Union[datetime.datetime, str], parameters: Optional[Dict[Any, Any]] = None) -> OCEL: """ Filters the object-centric event log keeping events in the provided timestamp range Parameters ----------------- ocel Object-centric event log min_timest Left extreme of the allowed timestamp interval (provided in the format: YYYY-mm-dd HH:MM:SS) max_timest Right extreme of the allowed timestamp interval (provided in the format: YYYY-mm-dd HH:MM:SS) parameters Parameters of the algorithm, including: - Parameters.TIMESTAMP_KEY => the attribute to use as timestamp Returns ----------------- filtered_ocel Filtered object-centric event log """ if parameters is None: parameters = {} timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters, ocel.event_timestamp) min_timest = get_dt_from_string(min_timest) max_timest = get_dt_from_string(max_timest) ocel = copy(ocel) ocel.events = ocel.events[ocel.events[timestamp_key] >= min_timest] ocel.events = ocel.events[ocel.events[timestamp_key] <= max_timest] return filtering_utils.propagate_event_filtering(ocel, parameters=parameters)
def apply_events(df: pd.DataFrame, dt1: Union[str, datetime.datetime], dt2: Union[str, datetime.datetime], parameters: Optional[Dict[Union[str, Parameters], Any]] = None) -> pd.DataFrame: """ Get a new log containing all the events contained in the given interval Parameters ---------- df Pandas dataframe dt1 Lower bound to the interval (possibly expressed as string, but automatically converted) dt2 Upper bound to the interval (possibly expressed as string, but automatically converted) parameters Possible parameters of the algorithm, including: Parameters.TIMESTAMP_KEY -> Attribute to use as timestamp Returns ---------- df Filtered dataframe """ if parameters is None: parameters = {} timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters, DEFAULT_TIMESTAMP_KEY) dt1 = get_dt_from_string(dt1) dt2 = get_dt_from_string(dt2) dt1 = dt1.replace(tzinfo=pytz.utc) dt2 = dt2.replace(tzinfo=pytz.utc) dt1 = pd.to_datetime(dt1, utc=True) dt2 = pd.to_datetime(dt2, utc=True) ret = df[df[timestamp_key] >= dt1] ret = ret[ret[timestamp_key] <= dt2] ret.attrs = copy(df.attrs) if hasattr(df, 'attrs') else {} return ret
def apply_events(trace_log, dt1, dt2, parameters=None): """ Get a new trace log containing all the events contained in the given interval Parameters ----------- trace_log Trace log dt1 Lower bound to the interval dt2 Upper bound to the interval parameters Possible parameters of the algorithm, including: timestamp_key -> Attribute to use as timestamp Returns ------------ filtered_log Filtered trace log """ if parameters is None: parameters = {} timestamp_key = parameters[ PARAMETER_CONSTANT_TIMESTAMP_KEY] if PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else DEFAULT_TIMESTAMP_KEY dt1 = get_dt_from_string(dt1) dt2 = get_dt_from_string(dt2) event_log = transform.transform_trace_log_to_event_log(trace_log) filtered_event_log = EventLog([ x for x in event_log if dt1 < x[timestamp_key].replace(tzinfo=None) < dt2 ]) filtered_trace_log = transform.transform_event_log_to_trace_log( filtered_event_log) return filtered_trace_log
def modi_apply_events(log, dt1, dt2, parameters=None): """ Get a new log containing all the events contained in the given interval Parameters ----------- log Log dt1 Lower bound to the interval dt2 Upper bound to the interval parameters Possible parameters of the algorithm, including: timestamp_key -> Attribute to use as timestamp Returns ------------ filtered_log Filtered log """ if parameters is None: parameters = {} timestamp_key = parameters[ PARAMETER_CONSTANT_TIMESTAMP_KEY] if PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else DEFAULT_TIMESTAMP_KEY dt1 = get_dt_from_string(dt1) dt2 = get_dt_from_string(dt2) stream = log_converter.apply(log, variant=log_converter.TO_EVENT_STREAM) valid_stream = list() for i, x in enumerate(stream): if i != len(stream) and x['lifecycle:transition'] == 'start' and stream[i+1]['lifecycle:transition'] == 'complete': if x[timestamp_key].replace(tzinfo=None) < dt1 and stream[i+1][timestamp_key].replace(tzinfo=None) > dt1: x[timestamp_key] = dt1 valid_stream.append(x) elif dt1 < x[timestamp_key].replace(tzinfo=None) < dt2: x[timestamp_key] = x[timestamp_key].replace(tzinfo=None) valid_stream.append(x) elif i != 0 and x['lifecycle:transition'] == 'complete' and stream[i-1]['lifecycle:transition'] == 'start': if x[timestamp_key].replace(tzinfo=None) > dt2 and stream[i-1][timestamp_key].replace(tzinfo=None) < dt2: x[timestamp_key] = dt2 valid_stream.append(x) elif dt1 < x[timestamp_key].replace(tzinfo=None) < dt2: x[timestamp_key] = x[timestamp_key].replace(tzinfo=None) valid_stream.append(x) """ if dt1 < x[timestamp_key].replace(tzinfo=None) < dt2: x[timestamp_key] = x[timestamp_key].replace(tzinfo=None) valid_stream.append(x) else: if i != len(stream) and x['lifecycle:transition'] == 'start' and stream[i+1]['lifecycle:transition'] == 'complete': if x[timestamp_key].replace(tzinfo=None) < dt1 and stream[i+1][timestamp_key].replace(tzinfo=None) < dt2: x[timestamp_key] = dt1 valid_stream.append(x) elif i != 0 and x['lifecycle:transition'] == 'complete' and stream[i-1]['lifecycle:transition'] == 'start': if x[timestamp_key].replace(tzinfo=None) > dt2 and stream[i-1][timestamp_key].replace(tzinfo=None) > dt1: x[timestamp_key] = dt2 valid_stream.append(x) """ filtered_stream = EventStream(valid_stream) filtered_log = log_converter.apply(filtered_stream) return filtered_log