예제 #1
0
def log():
    log = XFactory.create_log()
    # add log classifier
    clf = XEventNameClassifier()
    log.get_classifiers().append(clf)
    # add global trace attributes
    glb_t_attr = XFactory.create_attribute_discrete('glb_t_attr', 0)
    log.get_global_trace_attributes().append(glb_t_attr)
    # add global event attributes
    glb_e_attr = XFactory.create_attribute_discrete('glb_e_attr', 0)
    log.get_global_event_attributes().append(glb_e_attr)
    # add log attributes
    str_attr = XFactory.create_attribute_literal('l_attr', 'UNKNOWN')
    log.get_attributes()['l_attr'] = str_attr
    # add extension
    meta_concept = XExtensionParser().parse(
        "http://www.xes-standard.org/meta_concept.xesext")
    log.get_extensions().add(meta_concept)
    # add a trace
    tracelen = 2
    trace0 = XFactory.create_trace()
    # add some trace attributes
    bool_attr = XFactory.create_attribute_boolean('t_attr', True)
    # add some trace features
    trace0.get_attributes()['t_attr'] = bool_attr
    for i in range(tracelen):
        event = XFactory.create_event()
        # add an attribute
        int_attr = XFactory.create_attribute_discrete('e_attr', 0)
        event.get_attributes()['e_attr0'] = int_attr
        trace0.append(event)
    log.append(trace0)
    return log
예제 #2
0
def merge_and_label(normLogs, devLogs):

    assert (len(normLogs) > 0 and len(devLogs) > 0)

    merged_log = XFactory.create_log(normLogs[0].get_attributes().clone())

    for elem in normLogs[0].get_extensions():
        merged_log.get_extensions().add(elem)

    merged_log.__classifiers = normLogs[0].get_classifiers().copy()
    merged_log.__globalTraceAttributes = normLogs[
        0].get_global_trace_attributes().copy()
    merged_log.__globalEventAttributes = normLogs[
        0].get_global_event_attributes().copy()

    merged_log.get_global_trace_attributes().append(
        XAttributeLiteral("Label", "0"))

    for log in normLogs:
        for trace in log:
            trace.get_attributes()["Label"] = XAttributeLiteral("Label", "0")
            merged_log.append(trace)

    for log in devLogs:
        for trace in log:
            trace.get_attributes()["Label"] = XAttributeLiteral("Label", "1")
            merged_log.append(trace)

    return merged_log
예제 #3
0
def cohort_to_event_log(cohort, trace_type, verbose=False, remove_unlisted=True, remove_duplicates=True,
                        event_filter=None, trace_filter=None, cores=multiprocessing.cpu_count(), window_size=200,
                        abstraction_path=None, abstraction_exact_match=False, abstraction_delimiter=";"):
    """Converts a fiber cohort to an xes event log.
    Therefore it slices the cohort to smaller windows (because of memory restrictions) and calls the method
    `cohort_to_event_log_for_window` with the slices.

    Keyword arguments:
    cohort -- the fiber cohort
    trace_type -- the type of a trace (`mrn` or `visit`)
    verbose -- flag if the events should contain original non abstracted values (default False)
    remove_unlisted -- flag if a trace should only contain listed events (default True)
    remove_duplicates -- flag if duplicate events should be removed (default True)
    event_filter -- a custom filter to filter events (default None)
    trace_filter -- a custom filter to filter traces (default None)
    cores -- the number of cores which should be used to process the cohort (default amount of CPUs)
    window_size -- the number of patients per window (default 500)
    abstraction_path -- the path to the abstraction file (default None)
    abstraction_exact_match -- flag if the abstraction algorithm should only abstract exacted matches (default False)
    abstraction_delimiter -- the delimiter of the abstraction file (default ;)
    """
    manager = multiprocessing.Manager()
    traces = manager.list()

    mrns = list(cohort.mrns())
    window_amount = math.ceil(len(mrns)/window_size)

    # Spawn a new process for each window to free memory after each window completion
    for i in range(0, window_amount):
        print("Start window {current_window} / {max_window}".format(current_window=(i + 1), max_window=window_amount))
        window_start_time = time.perf_counter()
        mrns_in_window = mrns[i * window_size: (i + 1) * window_size]
        cohort_for_window = Cohort(condition.MRNs(mrns_in_window))

        p = multiprocessing.Process(target=cohort_to_event_log_for_window, args=(
            cohort_for_window,
            trace_type,
            verbose,
            remove_unlisted,
            remove_duplicates,
            event_filter,
            trace_filter,
            cores,
            abstraction_path,
            abstraction_exact_match,
            abstraction_delimiter,
            traces
        ))
        p.start()
        p.join()
        print("Finished window {current_window} / {max_window} in {window_time} s".format(
                current_window=(i + 1),
                max_window=window_amount,
                window_time=(time.perf_counter() - window_start_time)
            ))

    log = XFactory.create_log()
    for trace in traces:
        log.append(trace)
    return log
예제 #4
0
def extract(indexer, manifestPath, xesFilePath):
    manifest = json.load(open(manifestPath))
    log = XFactory.create_log()
    switches = {}

    with open("./transactionsFromIndexer.txt", "w") as f:
        f.write("")

    for key in manifest:
        if (key == "xesExtensions"): setExtension(log, manifest[key])
        elif (key == "xesClassifiers"): setClassifiers(log, manifest[key])
        elif (key == "xesGlobals"): setGlobals(log, manifest[key])
        elif (key == "switches"): switches = manifest[key]

    try:
        mappings = manifest["mappings"]
    except:
        print("Missing mappings in the manifest!")
    mapLog(log, mappings, indexer, switches)

    with open(xesFilePath, "w") as file:
        XesXmlSerializer().serialize(log, file)
예제 #5
0
with open("xes_file/csv_file.csv") as file:
    first_line = file.readline().split(";")
    dictionary = {}
    for i in range(len(first_line)):
        if "yyyy" in first_line[i]:
            # Convert csv date format in xes date format
            first_line[i] = first_line[i].replace("dd", "%d").\
                replace("MM", "%m").replace("yyyy", "%Y").replace("HH", "%H").\
                replace("mm", "%M")

        dictionary[str(i)] = first_line[i].strip("\n")

    first_event = file.readline().split(";")
    actual_trace = first_event[0]

    log = XFactory.create_log()
    trace = XFactory.create_trace()
    trace.append(convert_line_in_event(dictionary, first_event))

    for line in file.readlines():
        line_list = line.split(";")
        event = convert_line_in_event(dictionary, line_list)
        if line_list[0] == actual_trace:  # View the Case Id
            trace.append(event)
        else:
            log.append(trace)
            trace = XFactory.create_trace()
            trace.append(event)

# Save log in xes format
with open("xes_file/csv_log_in_xes_format.xes", "w") as file:
예제 #6
0
    factory = XFactory()

    print(os.listdir(datadir))

    for dir in os.listdir(datadir):
        if not os.path.isdir(os.path.join(datadir, dir)):
            continue
        outdir = os.path.join(datadir, dir, 'l1000')
        os.makedirs(outdir)

        for xlog_filepath in os.listdir(os.path.join(datadir, dir, 'l5000')):
            if '.xes.gz' not in xlog_filepath:
                continue

            print('Processing {}'.format(xlog_filepath))

            with open(os.path.join(datadir, dir, xlog_filepath), 'r') as f:
                xlog = XUniversalParser().parse(f)[0]

            assert isinstance(xlog, XLog)

            new_xlog = factory.create_log(xlog.get_attributes())
            traces = np.random.choice(xlog, nb_traces, replace=False)
            new_xlog.get_classifiers().append(xlog.get_classifiers()[0])

            for t in traces:
                new_xlog.append(t)

            with open(outdir + os.sep + xlog_filepath, 'w') as f:
                XesXmlGZIPSerializer().serialize(new_xlog, f)
예제 #7
0
    ['3', 1400.0]
]

TRACE_DF = pd.DataFrame(TRACES, columns=TRACE_DF_COLUMNS)

NAME_AND_LIFECYCLE_CLF = XEventAndClassifier([XEventNameClassifier(), XEventLifeTransClassifier()])

CLASSIFIERS = {
    XEventNameClassifier().name(): [const.CONCEPT_NAME],
    NAME_AND_LIFECYCLE_CLF.name(): [const.CONCEPT_NAME, const.LIFECYCLE_TRANS]
}

LOG_TABLE = LogTable(event_df=EVENT_DF, trace_df=TRACE_DF,
                     attributes=LOG_ATTRIBUTE_DICT, classifiers=CLASSIFIERS)

XLOG = XFactory.create_log()
XLOG_NAME = 'Test log'
CONCEPT_EXTENSION.assign_name(XLOG, XLOG_NAME)
TOTAL_TIME = 100
TOTAL_TIME_ATTRIBUTE = XFactory.create_attribute_continuous('total_time', TOTAL_TIME)
XLOG.get_attributes()['total_time'] = TOTAL_TIME_ATTRIBUTE

for caseid, cost_total in TRACES:
    xtrace = XFactory.create_trace()

    CONCEPT_EXTENSION.assign_name(xtrace, caseid)
    COST_EXTENSION.assign_total(xtrace, cost_total)

    trace_events = filter(lambda event: event[0] == caseid, EVENTS)

    for _, concept_name, cost_unit, lifecyle, org, timestamp in trace_events:
예제 #8
0
    CONVERGENCE_TOLERANCE = 0.001
    NUM_THREADS = 8

    kmeans = KMeans(n_clusters=NUM_CLUSTERS,
                    max_iter=MAX_ITERATIONS,
                    init=INITIALIZE_CLUSTERS,
                    tol=CONVERGENCE_TOLERANCE,
                    n_jobs=NUM_THREADS)

    # Create the cluster with the log vector
    kmeans.fit(log_vector)

    # Create new log with the attribute for the original log
    new_logs = {}
    for i in range(len(kmeans.cluster_centers_)):
        new_log = XFactory.create_log(log.get_attributes().clone())
        for elem in log.get_extensions():
            new_log.get_extensions().add(elem)

        new_log.__classifiers = log.get_classifiers().copy()
        new_log.__globalTraceAttributes = log.get_global_trace_attributes(
        ).copy()
        new_log.__globalEventAttributes = log.get_global_event_attributes(
        ).copy()

        new_logs[str(i)] = new_log

    # Distribute the trace depending the cluster.
    for point, trace in zip(log_vector, log):
        cluster = kmeans.predict([point])[0]
        new_logs[str(cluster)].append(trace)
예제 #9
0
        split = l.strip().split("\t")
        acceptance = split[0].strip()
        acceptances.append(acceptance)

        trace = split[1].strip().split(";") if len(split) > 1 else []
        if trace != ['']:
            traces.append(trace)

        logging.info("trace {}, acc {}: {}".format(idx, acceptance, trace))

    logging.info("num traces: {}".format(len(traces)))
    logging.info("num acceptances: {}".format(len(acceptances)))
    logging.info("alphabet: {}".format(
        reduce(lambda x, y: x.union(y), map(set, traces))))

    positive_log = XFactory.create_log()
    negative_log = XFactory.create_log()

    assert len(acceptances) == len(traces)
    for acc, t in zip(acceptances, traces):
        trace = XFactory.create_trace()
        for e in t:
            event = XFactory.create_event()
            attribute = XFactory.create_attribute_literal("concept:name", e)
            event.get_attributes()["string"] = attribute
            trace.append(event)
        if acc == "Y":
            positive_log.append(trace)
        else:
            negative_log.append(trace)