예제 #1
0
def log():
    log = XFactory.create_log()
    # add log classifier
    clf = XEventNameClassifier()
    log.get_classifiers().append(clf)
    # add global trace attributes
    glb_t_attr = XFactory.create_attribute_discrete('glb_t_attr', 0)
    log.get_global_trace_attributes().append(glb_t_attr)
    # add global event attributes
    glb_e_attr = XFactory.create_attribute_discrete('glb_e_attr', 0)
    log.get_global_event_attributes().append(glb_e_attr)
    # add log attributes
    str_attr = XFactory.create_attribute_literal('l_attr', 'UNKNOWN')
    log.get_attributes()['l_attr'] = str_attr
    # add extension
    meta_concept = XExtensionParser().parse(
        "http://www.xes-standard.org/meta_concept.xesext")
    log.get_extensions().add(meta_concept)
    # add a trace
    tracelen = 2
    trace0 = XFactory.create_trace()
    # add some trace attributes
    bool_attr = XFactory.create_attribute_boolean('t_attr', True)
    # add some trace features
    trace0.get_attributes()['t_attr'] = bool_attr
    for i in range(tracelen):
        event = XFactory.create_event()
        # add an attribute
        int_attr = XFactory.create_attribute_discrete('e_attr', 0)
        event.get_attributes()['e_attr0'] = int_attr
        trace0.append(event)
    log.append(trace0)
    return log
예제 #2
0
def merge_and_label(normLogs, devLogs):

    assert (len(normLogs) > 0 and len(devLogs) > 0)

    merged_log = XFactory.create_log(normLogs[0].get_attributes().clone())

    for elem in normLogs[0].get_extensions():
        merged_log.get_extensions().add(elem)

    merged_log.__classifiers = normLogs[0].get_classifiers().copy()
    merged_log.__globalTraceAttributes = normLogs[
        0].get_global_trace_attributes().copy()
    merged_log.__globalEventAttributes = normLogs[
        0].get_global_event_attributes().copy()

    merged_log.get_global_trace_attributes().append(
        XAttributeLiteral("Label", "0"))

    for log in normLogs:
        for trace in log:
            trace.get_attributes()["Label"] = XAttributeLiteral("Label", "0")
            merged_log.append(trace)

    for log in devLogs:
        for trace in log:
            trace.get_attributes()["Label"] = XAttributeLiteral("Label", "1")
            merged_log.append(trace)

    return merged_log
예제 #3
0
def cohort_to_event_log(cohort, trace_type, verbose=False, remove_unlisted=True, remove_duplicates=True,
                        event_filter=None, trace_filter=None, cores=multiprocessing.cpu_count(), window_size=200,
                        abstraction_path=None, abstraction_exact_match=False, abstraction_delimiter=";"):
    """Converts a fiber cohort to an xes event log.
    Therefore it slices the cohort to smaller windows (because of memory restrictions) and calls the method
    `cohort_to_event_log_for_window` with the slices.

    Keyword arguments:
    cohort -- the fiber cohort
    trace_type -- the type of a trace (`mrn` or `visit`)
    verbose -- flag if the events should contain original non abstracted values (default False)
    remove_unlisted -- flag if a trace should only contain listed events (default True)
    remove_duplicates -- flag if duplicate events should be removed (default True)
    event_filter -- a custom filter to filter events (default None)
    trace_filter -- a custom filter to filter traces (default None)
    cores -- the number of cores which should be used to process the cohort (default amount of CPUs)
    window_size -- the number of patients per window (default 500)
    abstraction_path -- the path to the abstraction file (default None)
    abstraction_exact_match -- flag if the abstraction algorithm should only abstract exacted matches (default False)
    abstraction_delimiter -- the delimiter of the abstraction file (default ;)
    """
    manager = multiprocessing.Manager()
    traces = manager.list()

    mrns = list(cohort.mrns())
    window_amount = math.ceil(len(mrns)/window_size)

    # Spawn a new process for each window to free memory after each window completion
    for i in range(0, window_amount):
        print("Start window {current_window} / {max_window}".format(current_window=(i + 1), max_window=window_amount))
        window_start_time = time.perf_counter()
        mrns_in_window = mrns[i * window_size: (i + 1) * window_size]
        cohort_for_window = Cohort(condition.MRNs(mrns_in_window))

        p = multiprocessing.Process(target=cohort_to_event_log_for_window, args=(
            cohort_for_window,
            trace_type,
            verbose,
            remove_unlisted,
            remove_duplicates,
            event_filter,
            trace_filter,
            cores,
            abstraction_path,
            abstraction_exact_match,
            abstraction_delimiter,
            traces
        ))
        p.start()
        p.join()
        print("Finished window {current_window} / {max_window} in {window_time} s".format(
                current_window=(i + 1),
                max_window=window_amount,
                window_time=(time.perf_counter() - window_start_time)
            ))

    log = XFactory.create_log()
    for trace in traces:
        log.append(trace)
    return log
예제 #4
0
def convert_line_in_event(type_for_attribute: dict, attribute_list: list):
    """Read one line and convert in a Xes Event object

    :param type_for_attribute: dictionary with the type of all attribute.
    :param attribute_list: List with the attribute in string format
    :return: An XEvent with the respective attribute
    """
    attribute_map = XFactory.create_attribute_map()
    for index in range(2, len(attribute_list)):
        attribute_string = attribute_list[index]
        attribute_type = type_for_attribute[str(index)]
        if attribute_type == "Activity" or attribute_type == "Resource":
            attribute = XFactory.create_attribute_literal(attribute_type, attribute_string)
        elif "%Y" in attribute_type:  # "date"
            attribute = XFactory.create_attribute_timestamp("time", datetime.strptime(attribute_string, attribute_type))
        else:  # Cost
            attribute = XFactory.create_attribute_discrete(attribute_type, int(attribute_string))
        attribute_map[attribute.get_key()] = attribute
    return XFactory.create_event(attribute_map)
예제 #5
0
def attributeFactory(key, value, type):
    attribute = None
    if (type in ["bool", "boolean"]):
        if (value.lower() in ["True", "true"]): value = True
        elif (value.lower() in ["False", "false"]): value = False
        else: value = None
        attribute = XFactory.create_attribute_boolean(key, value)
    elif (type == "float"):
        attribute = XFactory.create_attribute_continuous(key, float(value))
    elif (type == "int"):
        attribute = XFactory.create_attribute_discrete(key, int(value))
    elif (type == "string"):
        attribute = XFactory.create_attribute_literal(key, value)
    elif (type == "date"):
        attributeValue = datetime.utcfromtimestamp(float(value))
        attribute = XFactory.create_attribute_timestamp(key, attributeValue)
    else:
        print('Not supported attribute type: "' + type + '"')
        sys.exit(1)
    # Not yet implemented: map, id, list, container
    return attribute
예제 #6
0
def setEvent(txn, eventMapping, trace, switches):
    event = XFactory.create_event()
    ok = True

    for attributeKey in eventMapping:
        attributeData = getAttributeData(txn, attributeKey, eventMapping,
                                         switches)
        if (attributeData != None):
            event.get_attributes()[attributeKey] = attributeFactory(
                attributeKey, attributeData[0], attributeData[1])
        elif ("nullable" not in eventMapping[attributeKey]
              or not eventMapping[attributeKey]["nullable"]):
            ok = False
            break

    if (ok and len(event.get_attributes()) != 0): trace.insert_ordered(event)
예제 #7
0
def setTrace(idAttrKey, idAttrValue, idAttrType, transaction, traceMap,
             switches):
    trace = XFactory.create_trace()
    trace.get_attributes()[idAttrKey] = attributeFactory(
        idAttrKey, idAttrValue, idAttrType)
    for attributeKey in traceMap:
        if (attributeKey != "identifier:id"):
            attributeData = getAttributeData(transaction, attributeKey,
                                             traceMap, switches)
            if (attributeData != None):
                attributeValue = attributeData[0]
                attributeType = attributeData[1]
                trace.get_attributes()[attributeKey] = attributeFactory(
                    attributeKey, attributeValue, attributeType)
            elif ("nullable" not in traceMap[attributeKey]
                  or not traceMap[attributeKey]["nullable"]):
                return None
    return trace
예제 #8
0
def extract(indexer, manifestPath, xesFilePath):
    manifest = json.load(open(manifestPath))
    log = XFactory.create_log()
    switches = {}

    with open("./transactionsFromIndexer.txt", "w") as f:
        f.write("")

    for key in manifest:
        if (key == "xesExtensions"): setExtension(log, manifest[key])
        elif (key == "xesClassifiers"): setClassifiers(log, manifest[key])
        elif (key == "xesGlobals"): setGlobals(log, manifest[key])
        elif (key == "switches"): switches = manifest[key]

    try:
        mappings = manifest["mappings"]
    except:
        print("Missing mappings in the manifest!")
    mapLog(log, mappings, indexer, switches)

    with open(xesFilePath, "w") as file:
        XesXmlSerializer().serialize(log, file)
예제 #9
0
from opyenxes.out.XesXmlSerializer import XesXmlSerializer
from sklearn.cluster import KMeans
from opyenxes.factory.XFactory import XFactory
import random

if __name__ == '__main__':
    path = "input_log.xes"

    with open(path) as log_file:
        logs = XUniversalParser().parse(log_file)

    classifier_doctype = XEventAttributeClassifier("doctype", ["doctype"])
    classifier_subprocess = XEventAttributeClassifier("subprocess",
                                                      ["subprocess"])

    new_log = XFactory.create_log()

    for log in logs:

        random_list_of_traces = random.sample(log, 5)

        for trace in random_list_of_traces:
            list_trace = []
            new_trace = XFactory.create_trace()
            for event in trace:

                doctype = classifier_doctype.get_class_identity(event)
                subprocess = classifier_subprocess.get_class_identity(event)

                if len(list_trace) != 0:
                    if list_trace[-1][0] == doctype and list_trace[-1][
예제 #10
0
"""
Create_random_log:
"""
from opyenxes.factory.XFactory import XFactory
from opyenxes.id.XIDFactory import XIDFactory
from opyenxes.out.XesXmlSerializer import XesXmlSerializer
import random

number_trace = 10
minimum_length_of_trace = 3
maximum_length_of_trace = 7
attributes_per_event = 4

log = XFactory.create_log()
for a in range(number_trace):
    trace = XFactory.create_trace()
    for e in range(
            random.randint(minimum_length_of_trace, maximum_length_of_trace)):
        event = XFactory.create_event()
        for _ in range(attributes_per_event):
            # Generate random attribute
            option = random.choice([
                "string", "date", "int", "float", "boolean", "id", "list",
                "container"
            ])
            if option == "string":
                attribute = XFactory.create_attribute_literal(
                    option, "UNKNOWN")
            elif option == "date":
                attribute = XFactory.create_attribute_timestamp(option, 0)
            elif option == "int":
예제 #11
0
from opyenxes.out.XesXmlGZIPSerializer import XesXmlGZIPSerializer
from opyenxes.classification.XEventNameClassifier import XEventNameClassifier
from opyenxes.factory.XFactory import XFactory
from opyenxes.model.XLog import XLog

__author__ = "Wai Lam Jonathan Lee"
__email__ = "*****@*****.**"


if __name__ == '__main__':
    datadir = os.path.join(
        '..', '..', 'data', 'synthetic', '2018-05-01_small'
    )

    nb_traces = 1000
    factory = XFactory()

    print(os.listdir(datadir))

    for dir in os.listdir(datadir):
        if not os.path.isdir(os.path.join(datadir, dir)):
            continue
        outdir = os.path.join(datadir, dir, 'l1000')
        os.makedirs(outdir)

        for xlog_filepath in os.listdir(os.path.join(datadir, dir, 'l5000')):
            if '.xes.gz' not in xlog_filepath:
                continue

            print('Processing {}'.format(xlog_filepath))
예제 #12
0
    ['3', 1400.0]
]

TRACE_DF = pd.DataFrame(TRACES, columns=TRACE_DF_COLUMNS)

NAME_AND_LIFECYCLE_CLF = XEventAndClassifier([XEventNameClassifier(), XEventLifeTransClassifier()])

CLASSIFIERS = {
    XEventNameClassifier().name(): [const.CONCEPT_NAME],
    NAME_AND_LIFECYCLE_CLF.name(): [const.CONCEPT_NAME, const.LIFECYCLE_TRANS]
}

LOG_TABLE = LogTable(event_df=EVENT_DF, trace_df=TRACE_DF,
                     attributes=LOG_ATTRIBUTE_DICT, classifiers=CLASSIFIERS)

XLOG = XFactory.create_log()
XLOG_NAME = 'Test log'
CONCEPT_EXTENSION.assign_name(XLOG, XLOG_NAME)
TOTAL_TIME = 100
TOTAL_TIME_ATTRIBUTE = XFactory.create_attribute_continuous('total_time', TOTAL_TIME)
XLOG.get_attributes()['total_time'] = TOTAL_TIME_ATTRIBUTE

for caseid, cost_total in TRACES:
    xtrace = XFactory.create_trace()

    CONCEPT_EXTENSION.assign_name(xtrace, caseid)
    COST_EXTENSION.assign_total(xtrace, cost_total)

    trace_events = filter(lambda event: event[0] == caseid, EVENTS)

    for _, concept_name, cost_unit, lifecyle, org, timestamp in trace_events:
예제 #13
0
 def __init__(self):
     super().__init__()
     self.set_current_default(XFactory())
예제 #14
0
        split = l.strip().split("\t")
        acceptance = split[0].strip()
        acceptances.append(acceptance)

        trace = split[1].strip().split(";") if len(split) > 1 else []
        if trace != ['']:
            traces.append(trace)

        logging.info("trace {}, acc {}: {}".format(idx, acceptance, trace))

    logging.info("num traces: {}".format(len(traces)))
    logging.info("num acceptances: {}".format(len(acceptances)))
    logging.info("alphabet: {}".format(
        reduce(lambda x, y: x.union(y), map(set, traces))))

    positive_log = XFactory.create_log()
    negative_log = XFactory.create_log()

    assert len(acceptances) == len(traces)
    for acc, t in zip(acceptances, traces):
        trace = XFactory.create_trace()
        for e in t:
            event = XFactory.create_event()
            attribute = XFactory.create_attribute_literal("concept:name", e)
            event.get_attributes()["string"] = attribute
            trace.append(event)
        if acc == "Y":
            positive_log.append(trace)
        else:
            negative_log.append(trace)
예제 #15
0
with open("xes_file/csv_file.csv") as file:
    first_line = file.readline().split(";")
    dictionary = {}
    for i in range(len(first_line)):
        if "yyyy" in first_line[i]:
            # Convert csv date format in xes date format
            first_line[i] = first_line[i].replace("dd", "%d").\
                replace("MM", "%m").replace("yyyy", "%Y").replace("HH", "%H").\
                replace("mm", "%M")

        dictionary[str(i)] = first_line[i].strip("\n")

    first_event = file.readline().split(";")
    actual_trace = first_event[0]

    log = XFactory.create_log()
    trace = XFactory.create_trace()
    trace.append(convert_line_in_event(dictionary, first_event))

    for line in file.readlines():
        line_list = line.split(";")
        event = convert_line_in_event(dictionary, line_list)
        if line_list[0] == actual_trace:  # View the Case Id
            trace.append(event)
        else:
            log.append(trace)
            trace = XFactory.create_trace()
            trace.append(event)

# Save log in xes format
with open("xes_file/csv_log_in_xes_format.xes", "w") as file:
예제 #16
0
    CONVERGENCE_TOLERANCE = 0.001
    NUM_THREADS = 8

    kmeans = KMeans(n_clusters=NUM_CLUSTERS,
                    max_iter=MAX_ITERATIONS,
                    init=INITIALIZE_CLUSTERS,
                    tol=CONVERGENCE_TOLERANCE,
                    n_jobs=NUM_THREADS)

    # Create the cluster with the log vector
    kmeans.fit(log_vector)

    # Create new log with the attribute for the original log
    new_logs = {}
    for i in range(len(kmeans.cluster_centers_)):
        new_log = XFactory.create_log(log.get_attributes().clone())
        for elem in log.get_extensions():
            new_log.get_extensions().add(elem)

        new_log.__classifiers = log.get_classifiers().copy()
        new_log.__globalTraceAttributes = log.get_global_trace_attributes(
        ).copy()
        new_log.__globalEventAttributes = log.get_global_event_attributes(
        ).copy()

        new_logs[str(i)] = new_log

    # Distribute the trace depending the cluster.
    for point, trace in zip(log_vector, log):
        cluster = kmeans.predict([point])[0]
        new_logs[str(cluster)].append(trace)
예제 #17
0
def create_xes_trace(trace_events, event_filter, abstraction_path,
                     abstraction_exact_match, abstraction_delimiter, verbose,
                     remove_unlisted, remove_duplicates):
    """Collect events that belong to a trace in an opyenxes trace.

    Keyword arguments:
    trace_events -- list of events belonging to a trace
    abstraction_path -- path to the abstraction table stored as a .csv-file
    abstraction_delimiter -- column delimiter used in abstraction table
    abstraction_exact_match -- match only keywords that are identical to the given event name
    verbose -- flag to enable detailed console output
    remove_unlisted -- remove all events that are not included in the abstraction table
    event_filter -- a custom filter to filter events
    remove_duplicates -- flag for remove duplicate events in a trace
    """
    trace = XFactory.create_trace()

    if len(trace_events) == 0:
        return trace

    id_attribute = XFactory.create_attribute_id("id", str(uuid.uuid4()))
    trace.get_attributes()["id"] = id_attribute

    trace.get_attributes()["patient:mrn"] = XFactory.create_attribute_literal(
        "patient:mrn", trace_events[0].medical_record_number)
    trace.get_attributes(
    )["patient:date_of_birth"] = XFactory.create_attribute_literal(
        "patient:date_of_birth", trace_events[0].date_of_birth)
    trace.get_attributes(
    )["patient:address_zip"] = XFactory.create_attribute_literal(
        "patient:address_zip", trace_events[0].address_zip)
    trace.get_attributes(
    )["patient:gender"] = XFactory.create_attribute_literal(
        "patient:gender", trace_events[0].gender)
    trace.get_attributes(
    )["patient:language"] = XFactory.create_attribute_literal(
        "patient:language", trace_events[0].language)
    trace.get_attributes(
    )["patient:patient_ethnic_group"] = XFactory.create_attribute_literal(
        "patient:patient_ethnic_group", trace_events[0].patient_ethnic_group)
    trace.get_attributes()["patient:race"] = XFactory.create_attribute_literal(
        "patient:race", trace_events[0].race)
    trace.get_attributes(
    )["patient:religion"] = XFactory.create_attribute_literal(
        "patient:religion", trace_events[0].religion)
    trace.get_attributes(
    )["patient:citizenship"] = XFactory.create_attribute_literal(
        "patient:citizenship", trace_events[0].citizenship)
    trace.get_attributes(
    )["patient:marital_status_code"] = XFactory.create_attribute_literal(
        "patient:marital_status_code", trace_events[0].marital_status_code)

    relevant_events = list()

    # Filter out events that do not match the specified events filter
    for event in trace_events:
        is_relevant = False
        if event_filter is None:
            is_relevant = True
        else:
            is_relevant = event_filter.is_relevant_event(event)
        if not is_relevant:
            continue

        event_descriptor, event_name, event_context, event_code = \
            translate_procedure_diagnosis_material_to_event(
                abstraction_path=abstraction_path,
                abstraction_exact_match=abstraction_exact_match,
                abstraction_delimiter=abstraction_delimiter,
                event=event,
                verbose=verbose,
                remove_unlisted=remove_unlisted
            )
        if event_descriptor is not None:
            event = {
                "timestamp": event.timestamp,
                "name": event_descriptor,
                "description": event_name,
                "context": event_context,
                "code": event_code,
                "caregiver_group_key": event.caregiver_group_key,
                "facility_key": event.facility_key
            }
            relevant_events.append(event)

    if len(relevant_events) == 0:
        return trace

    if remove_duplicates:
        # Remove events with the same name and timestamp
        unique_values = set()
        deduplicated_events = list()
        for event in relevant_events:
            if not (event["timestamp"], event["name"]) in unique_values:
                unique_values.add((event["timestamp"], event["name"]))
                deduplicated_events.append(event)
        relevant_events = deduplicated_events

    for event in relevant_events:
        # Create opyenxes event and append it to the trace
        log_event = XFactory.create_event()

        timestamp_int = event["timestamp"]
        timestamp_attribute = XFactory.create_attribute_timestamp(
            "time:timestamp", timestamp_int)
        log_event.get_attributes()["timestamp"] = timestamp_attribute

        activity_attribute = XFactory.create_attribute_literal(
            "concept:name", event["name"])
        log_event.get_attributes()["Activity"] = activity_attribute

        description_attribute = XFactory.create_attribute_literal(
            "event:description", event["description"])
        log_event.get_attributes()["event:description"] = description_attribute

        context_attribute = XFactory.create_attribute_literal(
            "event:context", event["context"])
        log_event.get_attributes()["event:context"] = context_attribute

        code_attribute = XFactory.create_attribute_literal(
            "event:code", event["code"])
        log_event.get_attributes()["event:code"] = code_attribute

        caregiver_attribute = XFactory.create_attribute_literal(
            "event:caregiver_group", event["caregiver_group_key"])
        log_event.get_attributes(
        )["event:caregiver_group"] = caregiver_attribute

        facility_attribute = XFactory.create_attribute_literal(
            "event:facility", event["facility_key"])
        log_event.get_attributes()["event:facility"] = facility_attribute

        trace.append(log_event)
    return trace