def import_log(path, parameters=None, variant=ITERPARSE): """ Import a XES log into a EventLog object Parameters ----------- path Log path parameters Parameters of the algorithm, including timestamp_sort -> Specify if we should sort log by timestamp timestamp_key -> If sort is enabled, then sort the log by using this key reverse_sort -> Specify in which direction the log should be sorted index_trace_indexes -> Specify if trace indexes should be added as event attribute for each event max_no_traces_to_import -> Specify the maximum number of traces to import from the log (read in order in the XML file) variant Variant of the algorithm to use, including: iterparse, nonstandard Returns ----------- log Trace log object """ if path.endswith("gz"): path = compression.decompress(path) return VERSIONS[variant](path, parameters=parameters)
def __import_log(path, parameters=None, variant=Variants.ITERPARSE): """ Import a XES log into a EventLog object Parameters ----------- path Log path parameters Parameters of the algorithm, including Parameters.TIMESTAMP_SORT -> Specify if we should sort log by timestamp Parameters.TIMESTAMP_KEY -> If sort is enabled, then sort the log by using this key Parameters.REVERSE_SORT -> Specify in which direction the log should be sorted Parameters.INSERT_TRACE_INDICES -> Specify if trace indexes should be added as event attribute for each event Parameters.MAX_TRACES -> Specify the maximum number of traces to import from the log (read in order in the XML file) variant Variant of the algorithm to use, including: - Variants.ITERPARSE - Variants.LINE_BY_LINE Returns ----------- log Trace log object """ # supporting .xes.gz file types if path.endswith("gz"): path = compression.decompress(path) # backward compatibility if variant == 'nonstandard': variant = Variants.LINE_BY_LINE elif variant == 'iterparse': variant = Variants.ITERPARSE return exec_utils.get_variant(variant).apply(path, parameters=parameters)
def import_log(filename, parameters=None): """ Imports an XES file into a log object Parameters ---------- filename: Absolute filename parameters Parameters of the algorithm, including timestamp_sort -> Specify if we should sort log by timestamp timestamp_key -> If sort is enabled, then sort the log by using this key reverse_sort -> Specify in which direction the log should be sorted index_trace_indexes -> Specify if trace indexes should be added as event attribute for each event max_no_traces_to_import -> Specify the maximum number of traces to import from the log (read in order in the XML file) Returns ------- log : :class:`pm4py.log.log.TraceLog` A trace log """ if parameters is None: parameters = {} timestamp_sort = False timestamp_key = "time:timestamp" reverse_sort = False insert_trace_indexes = False max_no_traces_to_import = 1000000000 if "timestamp_sort" in parameters: timestamp_sort = parameters["timestamp_sort"] if "timestamp_key" in parameters: timestamp_key = parameters["timestamp_key"] if "reverse_sort" in parameters: reverse_sort = parameters["reverse_sort"] if "insert_trace_indexes" in parameters: insert_trace_indexes = parameters["insert_trace_indexes"] if "max_no_traces_to_import" in parameters: max_no_traces_to_import = parameters["max_no_traces_to_import"] if filename.endswith("gz"): filename = compression.decompress(filename) context = etree.iterparse(filename, events=['start', 'end']) log = None trace = None event = None tree = {} for tree_event, elem in context: if tree_event == EVENT_START: # starting to read parent = tree[ elem.getparent()] if elem.getparent() in tree else None if elem.tag.endswith(log_lib.util.xes.TAG_STRING): if parent is not None: tree = __parse_attribute( elem, parent, elem.get(log_lib.util.xes.KEY_KEY), elem.get(log_lib.util.xes.KEY_VALUE), tree) continue elif elem.tag.endswith(log_lib.util.xes.TAG_DATE): try: dt = ciso8601.parse_datetime( elem.get(log_lib.util.xes.KEY_VALUE)) tree = __parse_attribute( elem, parent, elem.get(log_lib.util.xes.KEY_KEY), dt, tree) except TypeError: logging.info("failed to parse date: " + str(elem.get(log_lib.util.xes.KEY_VALUE))) except ValueError: logging.info("failed to parse date: " + str(elem.get(log_lib.util.xes.KEY_VALUE))) continue elif elem.tag.endswith(log_lib.util.xes.TAG_EVENT): if event is not None: raise SyntaxError( 'file contains <event> in another <event> tag') event = log_lib.log.Event() tree[elem] = event continue elif elem.tag.endswith(log_lib.util.xes.TAG_TRACE): if len(log) >= max_no_traces_to_import: break if trace is not None: raise SyntaxError( 'file contains <trace> in another <trace> tag') trace = log_lib.log.Trace() tree[elem] = trace.attributes continue elif elem.tag.endswith(log_lib.util.xes.TAG_FLOAT): if parent is not None: try: val = float(elem.get(log_lib.util.xes.KEY_VALUE)) tree = __parse_attribute( elem, parent, elem.get(log_lib.util.xes.KEY_KEY), val, tree) except ValueError: logging.info("failed to parse float: " + str(elem.get(log_lib.util.xes.KEY_VALUE))) continue elif elem.tag.endswith(log_lib.util.xes.TAG_INT): if parent is not None: try: val = int(elem.get(log_lib.util.xes.KEY_VALUE)) tree = __parse_attribute( elem, parent, elem.get(log_lib.util.xes.KEY_KEY), val, tree) except ValueError: logging.info("failed to parse int: " + str(elem.get(log_lib.util.xes.KEY_VALUE))) continue elif elem.tag.endswith(log_lib.util.xes.TAG_BOOLEAN): if parent is not None: try: val = bool(elem.get(log_lib.util.xes.KEY_VALUE)) tree = __parse_attribute( elem, parent, elem.get(log_lib.util.xes.KEY_KEY), val, tree) except ValueError: logging.info("failed to parse boolean: " + str(elem.get(log_lib.util.xes.KEY_VALUE))) continue elif elem.tag.endswith(log_lib.util.xes.TAG_LIST): if parent is not None: # lists have no value, hence we put None as a value tree = __parse_attribute( elem, parent, elem.get(log_lib.util.xes.KEY_KEY), None, tree) continue elif elem.tag.endswith(log_lib.util.xes.TAG_ID): if parent is not None: tree = __parse_attribute( elem, parent, elem.get(log_lib.util.xes.KEY_KEY), elem.get(log_lib.util.xes.KEY_VALUE), tree) continue elif elem.tag.endswith(log_lib.util.xes.TAG_EXTENSION): if log is None: raise SyntaxError('extension found outside of <log> tag') if elem.get( log_lib.util.xes.KEY_NAME) is not None and elem.get( log_lib.util.xes.KEY_PREFIX ) is not None and elem.get( log_lib.util.xes.KEY_URI) is not None: log.extensions[elem.get(log_lib.util.xes.KEY_NAME)] = { log_lib.util.xes.KEY_PREFIX: elem.get(log_lib.util.xes.KEY_PREFIX), log_lib.util.xes.KEY_URI: elem.get(log_lib.util.xes.KEY_URI) } continue elif elem.tag.endswith(log_lib.util.xes.TAG_GLOBAL): if log is None: raise SyntaxError('global found outside of <log> tag') if elem.get(log_lib.util.xes.KEY_SCOPE) is not None: log.omni_present[elem.get(log_lib.util.xes.KEY_SCOPE)] = {} tree[elem] = log.omni_present[elem.get( log_lib.util.xes.KEY_SCOPE)] continue elif elem.tag.endswith(log_lib.util.xes.TAG_CLASSIFIER): if log is None: raise SyntaxError('classifier found outside of <log> tag') if elem.get(log_lib.util.xes.KEY_KEYS) is not None: log.classifiers[elem.get( log_lib.util.xes.KEY_NAME)] = elem.get( log_lib.util.xes.KEY_KEYS).split() continue elif elem.tag.endswith(log_lib.util.xes.TAG_LOG): if log is not None: raise SyntaxError('file contains > 1 <log> tags') log = log_lib.log.TraceLog() tree[elem] = log.attributes continue elif tree_event == EVENT_END: if elem in tree: del tree[elem] elem.clear() if elem.getprevious() is not None: try: del elem.getparent()[0] except TypeError: pass if elem.tag.endswith(log_lib.util.xes.TAG_EVENT): if trace is not None: trace.append(event) event = None continue elif elem.tag.endswith(log_lib.util.xes.TAG_TRACE): log.append(trace) trace = None continue elif elem.tag.endswith(log_lib.util.xes.TAG_LOG): continue del context if timestamp_sort: log.sort(timestamp_key=timestamp_key, reverse_sort=reverse_sort) if insert_trace_indexes: log.insert_trace_index_as_event_attribute() return log
def import_log(filename, parameters=None): """ Import a TraceLog object from a XML file containing the traces, the events and the simple attributes of them Parameters ----------- filename XES file to parse parameters Parameters of the algorithm, including timestamp_sort -> Specify if we should sort log by timestamp timestamp_key -> If sort is enabled, then sort the log by using this key reverse_sort -> Specify in which direction the log should be sorted index_trace_indexes -> Specify if trace indexes should be added as event attribute for each event max_no_traces_to_import -> Specify the maximum number of traces to import from the log (read in order in the XML file) Returns ----------- xes XES file """ if parameters is None: parameters = {} timestamp_sort = False timestamp_key = "time:timestamp" reverse_sort = False insert_trace_indexes = False max_no_traces_to_import = 1000000000 if "timestamp_sort" in parameters: timestamp_sort = parameters["timestamp_sort"] if "timestamp_key" in parameters: timestamp_key = parameters["timestamp_key"] if "reverse_sort" in parameters: reverse_sort = parameters["reverse_sort"] if "insert_trace_indexes" in parameters: insert_trace_indexes = parameters["insert_trace_indexes"] if "max_no_traces_to_import" in parameters: max_no_traces_to_import = parameters["max_no_traces_to_import"] if filename.endswith("gz"): filename = compression.decompress(filename) log = log_lib.log.TraceLog() tracecount = 0 trace = None event = None with open(filename, "r") as f: for line in f: content = line.split("\"") tag = content[0].split("<")[1] if trace is not None: if event is not None: if len(content) == 5: if tag.startswith("string"): event[content[1]] = content[3] elif tag.startswith("date"): event[content[1]] = ciso8601.parse_datetime( content[3]) elif tag.startswith("int"): event[content[1]] = int(content[3]) elif tag.startswith("float"): event[content[1]] = float(content[3]) else: event[content[1]] = content[3] elif tag.startswith("/event"): trace.append(event) event = None elif tag.startswith("event"): event = log_lib.log.Event() elif len(content) == 5: if tag.startswith("string"): trace.attributes[content[1]] = content[3] elif tag.startswith("date"): trace.attributes[content[1]] = ciso8601.parse_datetime( content[3]) elif tag.startswith("int"): trace.attributes[content[1]] = int(content[3]) elif tag.startswith("float"): trace.attributes[content[1]] = float(content[3]) else: trace.attributes[content[1]] = content[3] elif tag.startswith("/trace"): log.append(trace) tracecount += 1 if tracecount > max_no_traces_to_import: break trace = None elif tag.startswith("trace"): trace = log_lib.log.Trace() if timestamp_sort: log.sort(timestamp_key=timestamp_key, reverse_sort=reverse_sort) if insert_trace_indexes: log.insert_trace_index_as_event_attribute() return log