예제 #1
0
def __export_log(log, output_file_path, parameters=None):
    """
    Export XES log from a PM4PY log

    Parameters
    ----------
    log: :class:`pm4py.log.log.EventLog`
        PM4PY log
    output_file_path:
        Output file path
    parameters
        Parameters of the algorithm

    """
    parameters = dict() if parameters is None else parameters

    # Gets the XML tree to export
    tree = __export_log_tree(log)
    # Effectively do the export of the event log
    tree.write(output_file_path,
               pretty_print=True,
               xml_declaration=True,
               encoding="utf-8")
    compress = param_util.fetch(Parameters.COMPRESS, parameters)
    if compress:
        compression.compress(output_file_path)
예제 #2
0
def import_log(filename, parameters=None):
    """
    Import a log object from a XML file
    containing the traces, the events and the simple attributes of them

    Parameters
    -----------
    filename
        XES file to parse
    parameters
        Parameters of the algorithm, including
            Parameters.TIMESTAMP_SORT -> Specify if we should sort log by timestamp
            Parameters.TIMESTAMP_KEY -> If sort is enabled, then sort the log by using this key
            Parameters.REVERSE_SORT -> Specify in which direction the log should be sorted
            Parameters.INSERT_TRACE_INDICES -> Specify if trace indexes should be added as event attribute for each event
            Parameters.MAX_TRACES -> Specify the maximum number of traces to import from the log (read in order in the XML file)
            Parameters.MAX_BYTES -> Maximum number of bytes to read
            Parameters.SKYP_BYTES -> Number of bytes to skip


    Returns
    -----------
    xes
        XES file
    """
    if parameters is None:
        parameters = {}

    date_parser = dt_parser.get()
    timestamp_sort = param_util.fetch(Parameters.TIMESTAMP_SORT, parameters)
    timestamp_key = param_util.fetch(Parameters.TIMESTAMP_KEY, parameters)
    reverse_sort = param_util.fetch(Parameters.REVERSE_SORT, parameters)
    insert_trace_indexes = param_util.fetch(Parameters.INSERT_TRACE_INDICES, parameters)
    max_no_traces_to_import = param_util.fetch(Parameters.MAX_TRACES, parameters)
    skip_bytes = param_util.fetch(Parameters.SKYP_BYTES, parameters)
    max_bytes_to_read = param_util.fetch(Parameters.MAX_BYTES, parameters)

    file_size = os.stat(filename).st_size

    if file_size > max_bytes_to_read:
        skip_bytes = file_size - max_bytes_to_read

    log = EventLog()
    tracecount = 0
    trace = None
    event = None

    f = open(filename, "r")
    f.seek(skip_bytes)

    for line in f:
        content = line.split("\"")
        if len(content) > 0:
            tag = content[0].split("<")[-1]
            if trace is not None:
                if event is not None:
                    if len(content) == 5:
                        if tag.startswith("string"):
                            event[content[1]] = content[3]
                        elif tag.startswith("date"):
                            event[content[1]] = date_parser.apply(content[3])
                        elif tag.startswith("int"):
                            event[content[1]] = int(content[3])
                        elif tag.startswith("float"):
                            event[content[1]] = float(content[3])
                        else:
                            event[content[1]] = content[3]
                    elif tag.startswith("/event"):
                        trace.append(event)
                        event = None
                elif tag.startswith("event"):
                    event = Event()
                elif len(content) == 5:
                    if tag.startswith("string"):
                        trace.attributes[content[1]] = content[3]
                    elif tag.startswith("date"):
                        trace.attributes[content[1]] = date_parser.apply(content[3])
                    elif tag.startswith("int"):
                        trace.attributes[content[1]] = int(content[3])
                    elif tag.startswith("float"):
                        trace.attributes[content[1]] = float(content[3])
                    else:
                        trace.attributes[content[1]] = content[3]
                elif tag.startswith("/trace"):
                    log.append(trace)
                    tracecount += 1
                    if tracecount > max_no_traces_to_import:
                        break
                    trace = None
            elif tag.startswith("trace"):
                trace = Trace()
    f.close()

    if timestamp_sort:
        log = sorting.sort_timestamp(log, timestamp_key=timestamp_key, reverse_sort=reverse_sort)
    if insert_trace_indexes:
        log.insert_trace_index_as_event_attribute()

    return log
예제 #3
0
def import_log(filename, parameters=None):
    """
    Imports an XES file into a log object

    Parameters
    ----------
    filename:
        Absolute filename
    parameters
        Parameters of the algorithm, including
            Parameters.TIMESTAMP_SORT -> Specify if we should sort log by timestamp
            Parameters.TIMESTAMP_KEY -> If sort is enabled, then sort the log by using this key
            Parameters.REVERSE_SORT -> Specify in which direction the log should be sorted
            Parameters.INSERT_TRACE_INDICES -> Specify if trace indexes should be added as event attribute for each event
            Parameters.MAX_TRACES -> Specify the maximum number of traces to import from the log (read in order in the XML file)

    Returns
    -------
    log : :class:`pm4py.log.log.EventLog`
        A log
    """

    parameters = dict() if parameters is None else parameters

    insert_trace_indexes = param_util.fetch(Parameters.INSERT_TRACE_INDICES,
                                            parameters)
    max_no_traces_to_import = param_util.fetch(Parameters.MAX_TRACES,
                                               parameters)

    date_parser = dt_parser.get()
    context = etree.iterparse(filename, events=[_EVENT_START, _EVENT_END])

    # check to see if log has a namespace before looking for traces  (but this might be more effort than worth)
    # but you could just assume that log use on the standard namespace desbried in XES
    # to only find elements that start a trace use tag="{http://www.xes-standard.org}trace"
    # or just use the {*} syntax to match to all namespaces with a trace element

    #count number of traces and setup progress bar
    no_trace = sum([
        1 for trace in etree.iterparse(
            filename, events=[_EVENT_START], tag="{*}trace")
    ])

    # make tqdm facultative
    progress = None
    if pkgutil.find_loader("tqdm"):
        from tqdm.auto import tqdm
        progress = tqdm(total=no_trace,
                        desc="parsing log, completed traces :: ")

    log = None
    trace = None
    event = None

    tree = {}
    for tree_event, elem in context:
        if tree_event == _EVENT_START:  # starting to read
            parent = tree[
                elem.getparent()] if elem.getparent() in tree else None

            if elem.tag.endswith(xes_constants.TAG_STRING):
                if parent is not None:
                    tree = __parse_attribute(elem, parent,
                                             elem.get(xes_constants.KEY_KEY),
                                             elem.get(xes_constants.KEY_VALUE),
                                             tree)
                continue

            elif elem.tag.endswith(xes_constants.TAG_DATE):
                try:
                    dt = date_parser.apply(elem.get(xes_constants.KEY_VALUE))
                    tree = __parse_attribute(elem, parent,
                                             elem.get(xes_constants.KEY_KEY),
                                             dt, tree)
                except TypeError:
                    logging.info("failed to parse date: " +
                                 str(elem.get(xes_constants.KEY_VALUE)))
                except ValueError:
                    logging.info("failed to parse date: " +
                                 str(elem.get(xes_constants.KEY_VALUE)))
                continue

            elif elem.tag.endswith(xes_constants.TAG_EVENT):
                if event is not None:
                    raise SyntaxError(
                        'file contains <event> in another <event> tag')
                event = Event()
                tree[elem] = event
                continue

            elif elem.tag.endswith(xes_constants.TAG_TRACE):
                if len(log) >= max_no_traces_to_import:
                    break
                if trace is not None:
                    raise SyntaxError(
                        'file contains <trace> in another <trace> tag')
                trace = Trace()
                tree[elem] = trace.attributes
                continue

            elif elem.tag.endswith(xes_constants.TAG_FLOAT):
                if parent is not None:
                    try:
                        val = float(elem.get(xes_constants.KEY_VALUE))
                        tree = __parse_attribute(
                            elem, parent, elem.get(xes_constants.KEY_KEY), val,
                            tree)
                    except ValueError:
                        logging.info("failed to parse float: " +
                                     str(elem.get(xes_constants.KEY_VALUE)))
                continue

            elif elem.tag.endswith(xes_constants.TAG_INT):
                if parent is not None:
                    try:
                        val = int(elem.get(xes_constants.KEY_VALUE))
                        tree = __parse_attribute(
                            elem, parent, elem.get(xes_constants.KEY_KEY), val,
                            tree)
                    except ValueError:
                        logging.info("failed to parse int: " +
                                     str(elem.get(xes_constants.KEY_VALUE)))
                continue

            elif elem.tag.endswith(xes_constants.TAG_BOOLEAN):
                if parent is not None:
                    try:
                        val0 = elem.get(xes_constants.KEY_VALUE)
                        val = False
                        if str(val0).lower() == "true":
                            val = True
                        tree = __parse_attribute(
                            elem, parent, elem.get(xes_constants.KEY_KEY), val,
                            tree)
                    except ValueError:
                        logging.info("failed to parse boolean: " +
                                     str(elem.get(xes_constants.KEY_VALUE)))
                continue

            elif elem.tag.endswith(xes_constants.TAG_LIST):
                if parent is not None:
                    # lists have no value, hence we put None as a value
                    tree = __parse_attribute(elem, parent,
                                             elem.get(xes_constants.KEY_KEY),
                                             None, tree)
                continue

            elif elem.tag.endswith(xes_constants.TAG_ID):
                if parent is not None:
                    tree = __parse_attribute(elem, parent,
                                             elem.get(xes_constants.KEY_KEY),
                                             elem.get(xes_constants.KEY_VALUE),
                                             tree)
                continue

            elif elem.tag.endswith(xes_constants.TAG_EXTENSION):
                if log is None:
                    raise SyntaxError('extension found outside of <log> tag')
                if elem.get(xes_constants.KEY_NAME) is not None and elem.get(
                        xes_constants.KEY_PREFIX) is not None and elem.get(
                            xes_constants.KEY_URI) is not None:
                    log.extensions[elem.get(xes_constants.KEY_NAME)] = {
                        xes_constants.KEY_PREFIX:
                        elem.get(xes_constants.KEY_PREFIX),
                        xes_constants.KEY_URI:
                        elem.get(xes_constants.KEY_URI)
                    }
                continue

            elif elem.tag.endswith(xes_constants.TAG_GLOBAL):
                if log is None:
                    raise SyntaxError('global found outside of <log> tag')
                if elem.get(xes_constants.KEY_SCOPE) is not None:
                    log.omni_present[elem.get(xes_constants.KEY_SCOPE)] = {}
                    tree[elem] = log.omni_present[elem.get(
                        xes_constants.KEY_SCOPE)]
                continue

            elif elem.tag.endswith(xes_constants.TAG_CLASSIFIER):
                if log is None:
                    raise SyntaxError('classifier found outside of <log> tag')
                if elem.get(xes_constants.KEY_KEYS) is not None:
                    classifier_value = elem.get(xes_constants.KEY_KEYS)
                    if "'" in classifier_value:
                        log.classifiers[elem.get(xes_constants.KEY_NAME)] = [
                            x for x in classifier_value.split("'")
                            if x.strip()
                        ]
                    else:
                        log.classifiers[elem.get(xes_constants.KEY_NAME
                                                 )] = classifier_value.split()
                continue

            elif elem.tag.endswith(xes_constants.TAG_LOG):
                if log is not None:
                    raise SyntaxError('file contains > 1 <log> tags')
                log = EventLog()
                tree[elem] = log.attributes
                continue

        elif tree_event == _EVENT_END:
            if elem in tree:
                del tree[elem]
            elem.clear()
            if elem.getprevious() is not None:
                try:
                    del elem.getparent()[0]
                except TypeError:
                    pass

            if elem.tag.endswith(xes_constants.TAG_EVENT):
                if trace is not None:
                    trace.append(event)
                    event = None
                continue

            elif elem.tag.endswith(xes_constants.TAG_TRACE):
                log.append(trace)

                #update progress bar as we have a completed trace
                if progress is not None:
                    progress.update()

                trace = None
                continue

            elif elem.tag.endswith(xes_constants.TAG_LOG):
                continue

    #gracefully close progress bar
    if progress is not None:
        progress.close()
    del context, progress

    if Parameters.TIMESTAMP_SORT in parameters and parameters[
            Parameters.TIMESTAMP_SORT]:
        log = sorting.sort_timestamp(
            log,
            timestamp_key=param_util.fetch(Parameters.TIMESTAMP_KEY,
                                           parameters),
            reverse_sort=param_util.fetch(Parameters.REVERSE_SORT, parameters))
    if insert_trace_indexes:
        log = index_attribute.insert_event_index_as_event_attribute(log)

    return log