def map_(func, log): ''' Maps the log_skeleton according to a given lambda function. domain and target of the function need to be of the same type (either trace or event) otherwise, the map behaves unexpected Parameters ---------- func log Returns ------- ''' if isinstance(log, log_inst.EventLog): return log_inst.EventLog(list(map(func, log)), attributes=log.attributes, classifiers=log.classifiers, omni_present=log.omni_present, extensions=log.extensions) elif isinstance(log, log_inst.EventStream): return log_inst.EventStream(list(map(func, log)), attributes=log.attributes, classifiers=log.classifiers, omni_present=log.omni_present, extensions=log.extensions) else: warnings.warn( 'input log_skeleton object not of appropriate type, map() not applied' ) return log
def filter_(func, log): ''' Filters the log_skeleton according to a given lambda function. Parameters ---------- func log Returns ------- ''' if isinstance(log, log_inst.EventLog): return log_inst.EventLog(list(filter(func, log)), attributes=log.attributes, classifiers=log.classifiers, omni_present=log.omni_present, extensions=log.extensions) elif isinstance(log, log_inst.EventStream): return log_inst.EventStream(list(filter(func, log)), attributes=log.attributes, classifiers=log.classifiers, omni_present=log.omni_present, extensions=log.extensions) else: warnings.warn( 'input log_skeleton object not of appropriate type, filter() not applied' ) return log
def __transform_event_log_to_event_stream(log, include_case_attributes=True, case_attribute_prefix=pmutil.CASE_ATTRIBUTE_PREFIX, enable_deepcopy=False): """ Converts the event log to an event stream Parameters ---------- log: :class:`pm4py.log.log.EventLog` An Event log include_case_attributes: Default is True case_attribute_prefix: Default is 'case:' enable_deepcopy Enables deepcopy (avoid references between input and output objects) Returns ------- log : :class:`pm4py.log.log.EventLog` An Event stream """ events = [] for trace in log: for event in trace: new_event = deepcopy(event) if enable_deepcopy else event if include_case_attributes: for key, value in trace.attributes.items(): new_event[case_attribute_prefix + key] = value # fix 14/02/2019: since the XES standard does not force to specify a case ID, when event log->event stream # conversion is done, the possibility to get back the original event log is lost if pmutil.CASE_ATTRIBUTE_GLUE not in new_event: new_event[pmutil.CASE_ATTRIBUTE_GLUE] = str(hash(trace)) events.append(new_event) return log_instance.EventStream(events, attributes=log.attributes, classifiers=log.classifiers, omni_present=log.omni_present, extensions=log.extensions)
def sort_log(log, key, reverse=False): """ Sorts the event log according to a given key. Parameters ---------- log event log object; either EventLog or EventStream key sorting key reverse indicates whether sorting should be reversed or not Returns ------- sorted event log if object provided is correct; original log if not correct """ if isinstance(log, log_inst.EventLog): return log_inst.EventLog(sorted(log, key=key, reverse=reverse), attributes=log.attributes, classifiers=log.classifiers, omni_present=log.omni_present, extensions=log.extensions) elif isinstance(log, log_inst.EventStream): return log_inst.EventStream(sorted(log, key=key, reverse=reverse), attributes=log.attributes, classifiers=log.classifiers, omni_present=log.omni_present, extensions=log.extensions) else: warnings.warn( 'input log object not of appropriate type, sorted() not applied') return log
def apply(log, parameters=None): if parameters is None: parameters = {} if isinstance(log, pandas.core.frame.DataFrame): list_events = log.to_dict('records') enable_postprocessing = parameters[ STREAM_POSTPROCESSING] if STREAM_POSTPROCESSING in parameters else False if enable_postprocessing: list_events = postprocess_stream(list_events) log = log_instance.EventStream(list_events, attributes={'origin': 'csv'}) if isinstance(log, pm4py.objects.log.log.EventLog): parameters = parameters if parameters is not None else dict() if log_util.PARAMETER_KEY_CASE_ATTRIBUTE_PRFIX in parameters: case_pref = parameters[log_util.PARAMETER_KEY_CASE_ATTRIBUTE_PRFIX] else: case_pref = log_util.CASE_ATTRIBUTE_PREFIX enable_deepcopy = parameters[ DEEPCOPY] if DEEPCOPY in parameters else False return transform_event_log_to_event_stream( log, include_case_attributes=True, case_attribute_prefix=case_pref, enable_deepcopy=enable_deepcopy) return log
def import_event_stream(path, sep=None, quote=None, header=None, inferSchema=True, timest_columns=None, sort=True, sort_field="time:timestamp", ascending=True, numPartition=DEFAULT_NUM_PARTITION): """Imports an `EventStream` from the given path of CSV format file """ spark_df = import_sparkdf_from_path(path, sep=sep, quote=quote, header=header, inferSchema=inferSchema, timest_columns=timest_columns, sort=sort, sort_field=sort_field, ascending=ascending, numPartition=numPartition) rdd = spark_df.rdd.map(lambda row: row.asDict()) event_stream = rdd.collect() event_stream = log_instance.EventStream(event_stream, attributes={'origin': 'csv'}) #pair_rdd = rdd.map(lambda s: (s[0], (s[1], s[2]))) #pair_rdd_group = pair_rdd.groupByKey().mapVal0ues(list) #return pair_rdd_group.collect() return event_stream
def filter_log(f, log): """ Filters the log according to a given (lambda) function. Parameters ---------- f function that specifies the filter criterion, may be a lambda log event log; either EventLog or EventStream Object Returns ------- log filtered event log if object provided is correct; original log if not correct """ if isinstance(log, log_inst.EventLog): return log_inst.EventLog(list(filter(f, log)), attributes=log.attributes, classifiers=log.classifiers, omni_present=log.omni_present, extensions=log.extensions) elif isinstance(log, log_inst.EventStream): return log_inst.EventStream(list(filter(f, log)), attributes=log.attributes, classifiers=log.classifiers, omni_present=log.omni_present, extensions=log.extensions) else: warnings.warn( 'input log object not of appropriate type, filter() not applied') return log
def transform_event_log_to_event_stream(log, include_case_attributes=True, case_attribute_prefix=log_util.CASE_ATTRIBUTE_PREFIX): """ Converts the log to an event stream Parameters ---------- log: :class:`pm4py.log.log.EventLog` A log include_case_attributes: Default is True case_attribute_prefix: Default is 'case:' Returns ------- log : :class:`pm4py.log.log.EventLog` An Event log """ events = [] for trace in log: for event in trace: if include_case_attributes: for key, value in trace.attributes.items(): event[case_attribute_prefix + key] = value events.append(event) return log_instance.EventStream(events, attributes=log.attributes, classifiers=log.classifiers, omni_present=log.omni_present, extensions=log.extensions)
def apply(log, parameters=None): """ Converts the event log to an event stream Parameters ---------- log: :class:`pm4py.log.log.EventLog` An Event log include_case_attributes: Default is True case_attribute_prefix: Default is 'case:' enable_deepcopy Enables deepcopy (avoid references between input and output objects) Returns ------- log : :class:`pm4py.log.log.EventLog` An Event stream """ if parameters is None: parameters = {} stream_post_processing = exec_utils.get_param_value( Parameters.STREAM_POST_PROCESSING, parameters, False) case_pref = exec_utils.get_param_value(Parameters.CASE_ATTRIBUTE_PREFIX, parameters, 'case:') enable_deepcopy = exec_utils.get_param_value(Parameters.DEEP_COPY, parameters, True) include_case_attributes = exec_utils.get_param_value( Parameters.INCLUDE_CASE_ATTRIBUTES, parameters, True) compress = exec_utils.get_param_value(Parameters.COMPRESS, parameters, True) if pkgutil.find_loader("pandas"): import pandas if isinstance(log, pandas.DataFrame): extensions = __detect_extensions(log) list_events = pandas_utils.to_dict_records(log) if stream_post_processing: list_events = __postprocess_stream(list_events) if compress: list_events = __compress(list_events) for i in range(len(list_events)): list_events[i] = Event(list_events[i]) log = log_instance.EventStream(list_events, attributes={'origin': 'csv'}) for ex in extensions: log.extensions[ex.name] = { xes_constants.KEY_PREFIX: ex.prefix, xes_constants.KEY_URI: ex.uri } if isinstance(log, EventLog): return __transform_event_log_to_event_stream( log, include_case_attributes=include_case_attributes, case_attribute_prefix=case_pref, enable_deepcopy=enable_deepcopy) return log
def import_event_stream(path, sort=True, sort_field="time:timestamp", ascending=True, numPartition=DEFAULT_NUM_PARTITION): """Imports an `EventStream` from the given path of PARQUET format file """ spark_df = import_sparkdf_from_path(path, sort=sort, sort_field=sort_field, ascending=ascending, numPartition=numPartition) rdd = spark_df.rdd.map(lambda row: row.asDict()) event_stream = rdd.collect() event_stream = log_instance.EventStream(event_stream, attributes={'origin': 'parquet'}) return event_stream
def sort_(func, log, reverse=False): if isinstance(log, log_inst.EventLog): return log_inst.EventLog(sorted(log, key=func, reverse=reverse), attributes=log.attributes, classifiers=log.classifiers, omni_present=log.omni_present, extensions=log.extensions) elif isinstance(log, log_inst.EventStream): return log_inst.EventStream(sorted(log, key=func, reverse=reverse), attributes=log.attributes, classifiers=log.classifiers, omni_present=log.omni_present, extensions=log.extensions) else: warnings.warn('input log object not of appropriate type, map() not applied') return log
def apply(log, parameters=None): """ Converts the event log to an event stream Parameters ---------- log: :class:`pm4py.log.log.EventLog` An Event log include_case_attributes: Default is True case_attribute_prefix: Default is 'case:' enable_deepcopy Enables deepcopy (avoid references between input and output objects) Returns ------- log : :class:`pm4py.log.log.EventLog` An Event stream """ if parameters is None: parameters = {} stream_post_processing = exec_utils.get_param_value( Parameters.STREAM_POST_PROCESSING, parameters, False) case_pref = exec_utils.get_param_value(Parameters.CASE_ATTRIBUTE_PREFIX, parameters, 'case:') enable_deepcopy = exec_utils.get_param_value(Parameters.DEEP_COPY, parameters, False) if isinstance(log, pandas.core.frame.DataFrame): list_events = log.to_dict('records') if stream_post_processing: list_events = __postprocess_stream(list_events) for i in range(len(list_events)): list_events[i] = Event(list_events[i]) log = log_instance.EventStream(list_events, attributes={'origin': 'csv'}) if isinstance(log, EventLog): return __transform_event_log_to_event_stream( log, include_case_attributes=True, case_attribute_prefix=case_pref, enable_deepcopy=enable_deepcopy) return log
def apply(log, parameters=None): """ Converts the event log to an event stream Parameters ---------- log: :class:`pm4py.log.log.EventLog` An Event log include_case_attributes: Default is True case_attribute_prefix: Default is 'case:' enable_deepcopy Enables deepcopy (avoid references between input and output objects) Returns ------- log : :class:`pm4py.log.log.EventLog` An Event stream """ parameters = dict() if parameters is None else __parse_parameters( parameters) if isinstance(log, pandas.core.frame.DataFrame): list_events = log.to_dict('records') if Parameters.STREAM_POST_PROCESSING in parameters and parameters[ Parameters.STREAM_POST_PROCESSING]: list_events = __postprocess_stream(list_events) log = log_instance.EventStream(list_events, attributes={'origin': 'csv'}) if isinstance(log, EventLog): case_pref = parameters[ Parameters. CASE_ATTRIBUTE_PREFIX] if Parameters.CASE_ATTRIBUTE_PREFIX in parameters else Parameters.CASE_ATTRIBUTE_PREFIX.value enable_deepcopy = parameters[ Parameters. DEEP_COPY] if Parameters.DEEP_COPY in parameters else Parameters.DEEP_COPY.value return __transform_event_log_to_event_stream( log, include_case_attributes=True, case_attribute_prefix=case_pref, enable_deepcopy=enable_deepcopy) return log
def transform_event_log_to_event_stream( log, include_case_attributes=True, case_attribute_prefix=log_util.CASE_ATTRIBUTE_PREFIX, enable_deepcopy=False): """ Converts the event log to an event stream Parameters ---------- log: :class:`pm4py.log.log.EventLog` An Event log include_case_attributes: Default is True case_attribute_prefix: Default is 'case:' enable_deepcopy Enables deepcopy (avoid references between input and output objects) Returns ------- log : :class:`pm4py.log.log.EventLog` An Event stream """ if enable_deepcopy: log = deepcopy(log) events = [] for trace in log: for event in trace: if include_case_attributes: for key, value in trace.attributes.items(): event[case_attribute_prefix + key] = value events.append(event) return log_instance.EventStream(events, attributes=log.attributes, classifiers=log.classifiers, omni_present=log.omni_present, extensions=log.extensions)