Esempio n. 1
0
def import_log(path, parameters=None, variant=ITERPARSE):
    """
    Import a XES log into a EventLog object

    Parameters
    -----------
    path
        Log path
    parameters
        Parameters of the algorithm, including
            timestamp_sort -> Specify if we should sort log by timestamp
            timestamp_key -> If sort is enabled, then sort the log by using this key
            reverse_sort -> Specify in which direction the log should be sorted
            index_trace_indexes -> Specify if trace indexes should be added as event attribute for each event
            max_no_traces_to_import -> Specify the maximum number of traces to import from the log
            (read in order in the XML file)
    variant
        Variant of the algorithm to use, including:
            iterparse, nonstandard

    Returns
    -----------
    log
        Trace log object
    """
    if path.endswith("gz"):
        path = compression.decompress(path)

    return VERSIONS[variant](path, parameters=parameters)
Esempio n. 2
0
def __import_log(path, parameters=None, variant=Variants.ITERPARSE):
    """
    Import a XES log into a EventLog object

    Parameters
    -----------
    path
        Log path
    parameters
        Parameters of the algorithm, including
            Parameters.TIMESTAMP_SORT -> Specify if we should sort log by timestamp
            Parameters.TIMESTAMP_KEY -> If sort is enabled, then sort the log by using this key
            Parameters.REVERSE_SORT -> Specify in which direction the log should be sorted
            Parameters.INSERT_TRACE_INDICES -> Specify if trace indexes should be added as event attribute for each event
            Parameters.MAX_TRACES -> Specify the maximum number of traces to import from the log (read in order in the XML file)
    variant
        Variant of the algorithm to use, including:
            - Variants.ITERPARSE
            - Variants.LINE_BY_LINE

    Returns
    -----------
    log
        Trace log object
    """

    # supporting .xes.gz file types
    if path.endswith("gz"):
        path = compression.decompress(path)

    # backward compatibility
    if variant == 'nonstandard':
        variant = Variants.LINE_BY_LINE
    elif variant == 'iterparse':
        variant = Variants.ITERPARSE

    return exec_utils.get_variant(variant).apply(path, parameters=parameters)
Esempio n. 3
0
def import_log(filename, parameters=None):
    """
    Imports an XES file into a log object

    Parameters
    ----------
    filename:
        Absolute filename
    parameters
        Parameters of the algorithm, including
            timestamp_sort -> Specify if we should sort log by timestamp
            timestamp_key -> If sort is enabled, then sort the log by using this key
            reverse_sort -> Specify in which direction the log should be sorted
            index_trace_indexes -> Specify if trace indexes should be added as event attribute for each event
            max_no_traces_to_import -> Specify the maximum number of traces to import from the log
            (read in order in the XML file)

    Returns
    -------
    log : :class:`pm4py.log.log.TraceLog`
        A trace log
    """

    if parameters is None:
        parameters = {}

    timestamp_sort = False
    timestamp_key = "time:timestamp"
    reverse_sort = False
    insert_trace_indexes = False
    max_no_traces_to_import = 1000000000

    if "timestamp_sort" in parameters:
        timestamp_sort = parameters["timestamp_sort"]
    if "timestamp_key" in parameters:
        timestamp_key = parameters["timestamp_key"]
    if "reverse_sort" in parameters:
        reverse_sort = parameters["reverse_sort"]
    if "insert_trace_indexes" in parameters:
        insert_trace_indexes = parameters["insert_trace_indexes"]
    if "max_no_traces_to_import" in parameters:
        max_no_traces_to_import = parameters["max_no_traces_to_import"]

    if filename.endswith("gz"):
        filename = compression.decompress(filename)

    context = etree.iterparse(filename, events=['start', 'end'])

    log = None
    trace = None
    event = None

    tree = {}

    for tree_event, elem in context:
        if tree_event == EVENT_START:  # starting to read
            parent = tree[
                elem.getparent()] if elem.getparent() in tree else None

            if elem.tag.endswith(log_lib.util.xes.TAG_STRING):
                if parent is not None:
                    tree = __parse_attribute(
                        elem, parent, elem.get(log_lib.util.xes.KEY_KEY),
                        elem.get(log_lib.util.xes.KEY_VALUE), tree)
                continue

            elif elem.tag.endswith(log_lib.util.xes.TAG_DATE):
                try:
                    dt = ciso8601.parse_datetime(
                        elem.get(log_lib.util.xes.KEY_VALUE))
                    tree = __parse_attribute(
                        elem, parent, elem.get(log_lib.util.xes.KEY_KEY), dt,
                        tree)
                except TypeError:
                    logging.info("failed to parse date: " +
                                 str(elem.get(log_lib.util.xes.KEY_VALUE)))
                except ValueError:
                    logging.info("failed to parse date: " +
                                 str(elem.get(log_lib.util.xes.KEY_VALUE)))
                continue

            elif elem.tag.endswith(log_lib.util.xes.TAG_EVENT):
                if event is not None:
                    raise SyntaxError(
                        'file contains <event> in another <event> tag')
                event = log_lib.log.Event()
                tree[elem] = event
                continue

            elif elem.tag.endswith(log_lib.util.xes.TAG_TRACE):
                if len(log) >= max_no_traces_to_import:
                    break
                if trace is not None:
                    raise SyntaxError(
                        'file contains <trace> in another <trace> tag')
                trace = log_lib.log.Trace()
                tree[elem] = trace.attributes
                continue

            elif elem.tag.endswith(log_lib.util.xes.TAG_FLOAT):
                if parent is not None:
                    try:
                        val = float(elem.get(log_lib.util.xes.KEY_VALUE))
                        tree = __parse_attribute(
                            elem, parent, elem.get(log_lib.util.xes.KEY_KEY),
                            val, tree)
                    except ValueError:
                        logging.info("failed to parse float: " +
                                     str(elem.get(log_lib.util.xes.KEY_VALUE)))
                continue

            elif elem.tag.endswith(log_lib.util.xes.TAG_INT):
                if parent is not None:
                    try:
                        val = int(elem.get(log_lib.util.xes.KEY_VALUE))
                        tree = __parse_attribute(
                            elem, parent, elem.get(log_lib.util.xes.KEY_KEY),
                            val, tree)
                    except ValueError:
                        logging.info("failed to parse int: " +
                                     str(elem.get(log_lib.util.xes.KEY_VALUE)))
                continue

            elif elem.tag.endswith(log_lib.util.xes.TAG_BOOLEAN):
                if parent is not None:
                    try:
                        val = bool(elem.get(log_lib.util.xes.KEY_VALUE))
                        tree = __parse_attribute(
                            elem, parent, elem.get(log_lib.util.xes.KEY_KEY),
                            val, tree)
                    except ValueError:
                        logging.info("failed to parse boolean: " +
                                     str(elem.get(log_lib.util.xes.KEY_VALUE)))
                continue

            elif elem.tag.endswith(log_lib.util.xes.TAG_LIST):
                if parent is not None:
                    # lists have no value, hence we put None as a value
                    tree = __parse_attribute(
                        elem, parent, elem.get(log_lib.util.xes.KEY_KEY), None,
                        tree)
                continue

            elif elem.tag.endswith(log_lib.util.xes.TAG_ID):
                if parent is not None:
                    tree = __parse_attribute(
                        elem, parent, elem.get(log_lib.util.xes.KEY_KEY),
                        elem.get(log_lib.util.xes.KEY_VALUE), tree)
                continue

            elif elem.tag.endswith(log_lib.util.xes.TAG_EXTENSION):
                if log is None:
                    raise SyntaxError('extension found outside of <log> tag')
                if elem.get(
                        log_lib.util.xes.KEY_NAME) is not None and elem.get(
                            log_lib.util.xes.KEY_PREFIX
                        ) is not None and elem.get(
                            log_lib.util.xes.KEY_URI) is not None:
                    log.extensions[elem.get(log_lib.util.xes.KEY_NAME)] = {
                        log_lib.util.xes.KEY_PREFIX:
                        elem.get(log_lib.util.xes.KEY_PREFIX),
                        log_lib.util.xes.KEY_URI:
                        elem.get(log_lib.util.xes.KEY_URI)
                    }
                continue

            elif elem.tag.endswith(log_lib.util.xes.TAG_GLOBAL):
                if log is None:
                    raise SyntaxError('global found outside of <log> tag')
                if elem.get(log_lib.util.xes.KEY_SCOPE) is not None:
                    log.omni_present[elem.get(log_lib.util.xes.KEY_SCOPE)] = {}
                    tree[elem] = log.omni_present[elem.get(
                        log_lib.util.xes.KEY_SCOPE)]
                continue

            elif elem.tag.endswith(log_lib.util.xes.TAG_CLASSIFIER):
                if log is None:
                    raise SyntaxError('classifier found outside of <log> tag')
                if elem.get(log_lib.util.xes.KEY_KEYS) is not None:
                    log.classifiers[elem.get(
                        log_lib.util.xes.KEY_NAME)] = elem.get(
                            log_lib.util.xes.KEY_KEYS).split()
                continue

            elif elem.tag.endswith(log_lib.util.xes.TAG_LOG):
                if log is not None:
                    raise SyntaxError('file contains > 1 <log> tags')
                log = log_lib.log.TraceLog()
                tree[elem] = log.attributes
                continue

        elif tree_event == EVENT_END:
            if elem in tree:
                del tree[elem]
            elem.clear()
            if elem.getprevious() is not None:
                try:
                    del elem.getparent()[0]
                except TypeError:
                    pass

            if elem.tag.endswith(log_lib.util.xes.TAG_EVENT):
                if trace is not None:
                    trace.append(event)
                    event = None
                continue

            elif elem.tag.endswith(log_lib.util.xes.TAG_TRACE):
                log.append(trace)
                trace = None
                continue

            elif elem.tag.endswith(log_lib.util.xes.TAG_LOG):
                continue

    del context

    if timestamp_sort:
        log.sort(timestamp_key=timestamp_key, reverse_sort=reverse_sort)
    if insert_trace_indexes:
        log.insert_trace_index_as_event_attribute()

    return log
Esempio n. 4
0
def import_log(filename, parameters=None):
    """
    Import a TraceLog object from a XML file
    containing the traces, the events and the simple attributes of them

    Parameters
    -----------
    filename
        XES file to parse
    parameters
        Parameters of the algorithm, including
            timestamp_sort -> Specify if we should sort log by timestamp
            timestamp_key -> If sort is enabled, then sort the log by using this key
            reverse_sort -> Specify in which direction the log should be sorted
            index_trace_indexes -> Specify if trace indexes should be added as event attribute for each event
            max_no_traces_to_import -> Specify the maximum number of traces to import from the log
            (read in order in the XML file)

    Returns
    -----------
    xes
        XES file
    """
    if parameters is None:
        parameters = {}

    timestamp_sort = False
    timestamp_key = "time:timestamp"
    reverse_sort = False
    insert_trace_indexes = False
    max_no_traces_to_import = 1000000000

    if "timestamp_sort" in parameters:
        timestamp_sort = parameters["timestamp_sort"]
    if "timestamp_key" in parameters:
        timestamp_key = parameters["timestamp_key"]
    if "reverse_sort" in parameters:
        reverse_sort = parameters["reverse_sort"]
    if "insert_trace_indexes" in parameters:
        insert_trace_indexes = parameters["insert_trace_indexes"]
    if "max_no_traces_to_import" in parameters:
        max_no_traces_to_import = parameters["max_no_traces_to_import"]

    if filename.endswith("gz"):
        filename = compression.decompress(filename)

    log = log_lib.log.TraceLog()
    tracecount = 0
    trace = None
    event = None
    with open(filename, "r") as f:
        for line in f:
            content = line.split("\"")
            tag = content[0].split("<")[1]
            if trace is not None:
                if event is not None:
                    if len(content) == 5:
                        if tag.startswith("string"):
                            event[content[1]] = content[3]
                        elif tag.startswith("date"):
                            event[content[1]] = ciso8601.parse_datetime(
                                content[3])
                        elif tag.startswith("int"):
                            event[content[1]] = int(content[3])
                        elif tag.startswith("float"):
                            event[content[1]] = float(content[3])
                        else:
                            event[content[1]] = content[3]
                    elif tag.startswith("/event"):
                        trace.append(event)
                        event = None
                elif tag.startswith("event"):
                    event = log_lib.log.Event()
                elif len(content) == 5:
                    if tag.startswith("string"):
                        trace.attributes[content[1]] = content[3]
                    elif tag.startswith("date"):
                        trace.attributes[content[1]] = ciso8601.parse_datetime(
                            content[3])
                    elif tag.startswith("int"):
                        trace.attributes[content[1]] = int(content[3])
                    elif tag.startswith("float"):
                        trace.attributes[content[1]] = float(content[3])
                    else:
                        trace.attributes[content[1]] = content[3]
                elif tag.startswith("/trace"):
                    log.append(trace)
                    tracecount += 1
                    if tracecount > max_no_traces_to_import:
                        break
                    trace = None
            elif tag.startswith("trace"):
                trace = log_lib.log.Trace()

    if timestamp_sort:
        log.sort(timestamp_key=timestamp_key, reverse_sort=reverse_sort)
    if insert_trace_indexes:
        log.insert_trace_index_as_event_attribute()

    return log