def log(): log = XFactory.create_log() # add log classifier clf = XEventNameClassifier() log.get_classifiers().append(clf) # add global trace attributes glb_t_attr = XFactory.create_attribute_discrete('glb_t_attr', 0) log.get_global_trace_attributes().append(glb_t_attr) # add global event attributes glb_e_attr = XFactory.create_attribute_discrete('glb_e_attr', 0) log.get_global_event_attributes().append(glb_e_attr) # add log attributes str_attr = XFactory.create_attribute_literal('l_attr', 'UNKNOWN') log.get_attributes()['l_attr'] = str_attr # add extension meta_concept = XExtensionParser().parse( "http://www.xes-standard.org/meta_concept.xesext") log.get_extensions().add(meta_concept) # add a trace tracelen = 2 trace0 = XFactory.create_trace() # add some trace attributes bool_attr = XFactory.create_attribute_boolean('t_attr', True) # add some trace features trace0.get_attributes()['t_attr'] = bool_attr for i in range(tracelen): event = XFactory.create_event() # add an attribute int_attr = XFactory.create_attribute_discrete('e_attr', 0) event.get_attributes()['e_attr0'] = int_attr trace0.append(event) log.append(trace0) return log
def merge_and_label(normLogs, devLogs): assert (len(normLogs) > 0 and len(devLogs) > 0) merged_log = XFactory.create_log(normLogs[0].get_attributes().clone()) for elem in normLogs[0].get_extensions(): merged_log.get_extensions().add(elem) merged_log.__classifiers = normLogs[0].get_classifiers().copy() merged_log.__globalTraceAttributes = normLogs[ 0].get_global_trace_attributes().copy() merged_log.__globalEventAttributes = normLogs[ 0].get_global_event_attributes().copy() merged_log.get_global_trace_attributes().append( XAttributeLiteral("Label", "0")) for log in normLogs: for trace in log: trace.get_attributes()["Label"] = XAttributeLiteral("Label", "0") merged_log.append(trace) for log in devLogs: for trace in log: trace.get_attributes()["Label"] = XAttributeLiteral("Label", "1") merged_log.append(trace) return merged_log
def cohort_to_event_log(cohort, trace_type, verbose=False, remove_unlisted=True, remove_duplicates=True, event_filter=None, trace_filter=None, cores=multiprocessing.cpu_count(), window_size=200, abstraction_path=None, abstraction_exact_match=False, abstraction_delimiter=";"): """Converts a fiber cohort to an xes event log. Therefore it slices the cohort to smaller windows (because of memory restrictions) and calls the method `cohort_to_event_log_for_window` with the slices. Keyword arguments: cohort -- the fiber cohort trace_type -- the type of a trace (`mrn` or `visit`) verbose -- flag if the events should contain original non abstracted values (default False) remove_unlisted -- flag if a trace should only contain listed events (default True) remove_duplicates -- flag if duplicate events should be removed (default True) event_filter -- a custom filter to filter events (default None) trace_filter -- a custom filter to filter traces (default None) cores -- the number of cores which should be used to process the cohort (default amount of CPUs) window_size -- the number of patients per window (default 500) abstraction_path -- the path to the abstraction file (default None) abstraction_exact_match -- flag if the abstraction algorithm should only abstract exacted matches (default False) abstraction_delimiter -- the delimiter of the abstraction file (default ;) """ manager = multiprocessing.Manager() traces = manager.list() mrns = list(cohort.mrns()) window_amount = math.ceil(len(mrns)/window_size) # Spawn a new process for each window to free memory after each window completion for i in range(0, window_amount): print("Start window {current_window} / {max_window}".format(current_window=(i + 1), max_window=window_amount)) window_start_time = time.perf_counter() mrns_in_window = mrns[i * window_size: (i + 1) * window_size] cohort_for_window = Cohort(condition.MRNs(mrns_in_window)) p = multiprocessing.Process(target=cohort_to_event_log_for_window, args=( cohort_for_window, trace_type, verbose, remove_unlisted, remove_duplicates, event_filter, trace_filter, cores, abstraction_path, abstraction_exact_match, abstraction_delimiter, traces )) p.start() p.join() print("Finished window {current_window} / {max_window} in {window_time} s".format( current_window=(i + 1), max_window=window_amount, window_time=(time.perf_counter() - window_start_time) )) log = XFactory.create_log() for trace in traces: log.append(trace) return log
def convert_line_in_event(type_for_attribute: dict, attribute_list: list): """Read one line and convert in a Xes Event object :param type_for_attribute: dictionary with the type of all attribute. :param attribute_list: List with the attribute in string format :return: An XEvent with the respective attribute """ attribute_map = XFactory.create_attribute_map() for index in range(2, len(attribute_list)): attribute_string = attribute_list[index] attribute_type = type_for_attribute[str(index)] if attribute_type == "Activity" or attribute_type == "Resource": attribute = XFactory.create_attribute_literal(attribute_type, attribute_string) elif "%Y" in attribute_type: # "date" attribute = XFactory.create_attribute_timestamp("time", datetime.strptime(attribute_string, attribute_type)) else: # Cost attribute = XFactory.create_attribute_discrete(attribute_type, int(attribute_string)) attribute_map[attribute.get_key()] = attribute return XFactory.create_event(attribute_map)
def attributeFactory(key, value, type): attribute = None if (type in ["bool", "boolean"]): if (value.lower() in ["True", "true"]): value = True elif (value.lower() in ["False", "false"]): value = False else: value = None attribute = XFactory.create_attribute_boolean(key, value) elif (type == "float"): attribute = XFactory.create_attribute_continuous(key, float(value)) elif (type == "int"): attribute = XFactory.create_attribute_discrete(key, int(value)) elif (type == "string"): attribute = XFactory.create_attribute_literal(key, value) elif (type == "date"): attributeValue = datetime.utcfromtimestamp(float(value)) attribute = XFactory.create_attribute_timestamp(key, attributeValue) else: print('Not supported attribute type: "' + type + '"') sys.exit(1) # Not yet implemented: map, id, list, container return attribute
def setEvent(txn, eventMapping, trace, switches): event = XFactory.create_event() ok = True for attributeKey in eventMapping: attributeData = getAttributeData(txn, attributeKey, eventMapping, switches) if (attributeData != None): event.get_attributes()[attributeKey] = attributeFactory( attributeKey, attributeData[0], attributeData[1]) elif ("nullable" not in eventMapping[attributeKey] or not eventMapping[attributeKey]["nullable"]): ok = False break if (ok and len(event.get_attributes()) != 0): trace.insert_ordered(event)
def setTrace(idAttrKey, idAttrValue, idAttrType, transaction, traceMap, switches): trace = XFactory.create_trace() trace.get_attributes()[idAttrKey] = attributeFactory( idAttrKey, idAttrValue, idAttrType) for attributeKey in traceMap: if (attributeKey != "identifier:id"): attributeData = getAttributeData(transaction, attributeKey, traceMap, switches) if (attributeData != None): attributeValue = attributeData[0] attributeType = attributeData[1] trace.get_attributes()[attributeKey] = attributeFactory( attributeKey, attributeValue, attributeType) elif ("nullable" not in traceMap[attributeKey] or not traceMap[attributeKey]["nullable"]): return None return trace
def extract(indexer, manifestPath, xesFilePath): manifest = json.load(open(manifestPath)) log = XFactory.create_log() switches = {} with open("./transactionsFromIndexer.txt", "w") as f: f.write("") for key in manifest: if (key == "xesExtensions"): setExtension(log, manifest[key]) elif (key == "xesClassifiers"): setClassifiers(log, manifest[key]) elif (key == "xesGlobals"): setGlobals(log, manifest[key]) elif (key == "switches"): switches = manifest[key] try: mappings = manifest["mappings"] except: print("Missing mappings in the manifest!") mapLog(log, mappings, indexer, switches) with open(xesFilePath, "w") as file: XesXmlSerializer().serialize(log, file)
from opyenxes.out.XesXmlSerializer import XesXmlSerializer from sklearn.cluster import KMeans from opyenxes.factory.XFactory import XFactory import random if __name__ == '__main__': path = "input_log.xes" with open(path) as log_file: logs = XUniversalParser().parse(log_file) classifier_doctype = XEventAttributeClassifier("doctype", ["doctype"]) classifier_subprocess = XEventAttributeClassifier("subprocess", ["subprocess"]) new_log = XFactory.create_log() for log in logs: random_list_of_traces = random.sample(log, 5) for trace in random_list_of_traces: list_trace = [] new_trace = XFactory.create_trace() for event in trace: doctype = classifier_doctype.get_class_identity(event) subprocess = classifier_subprocess.get_class_identity(event) if len(list_trace) != 0: if list_trace[-1][0] == doctype and list_trace[-1][
""" Create_random_log: """ from opyenxes.factory.XFactory import XFactory from opyenxes.id.XIDFactory import XIDFactory from opyenxes.out.XesXmlSerializer import XesXmlSerializer import random number_trace = 10 minimum_length_of_trace = 3 maximum_length_of_trace = 7 attributes_per_event = 4 log = XFactory.create_log() for a in range(number_trace): trace = XFactory.create_trace() for e in range( random.randint(minimum_length_of_trace, maximum_length_of_trace)): event = XFactory.create_event() for _ in range(attributes_per_event): # Generate random attribute option = random.choice([ "string", "date", "int", "float", "boolean", "id", "list", "container" ]) if option == "string": attribute = XFactory.create_attribute_literal( option, "UNKNOWN") elif option == "date": attribute = XFactory.create_attribute_timestamp(option, 0) elif option == "int":
from opyenxes.out.XesXmlGZIPSerializer import XesXmlGZIPSerializer from opyenxes.classification.XEventNameClassifier import XEventNameClassifier from opyenxes.factory.XFactory import XFactory from opyenxes.model.XLog import XLog __author__ = "Wai Lam Jonathan Lee" __email__ = "*****@*****.**" if __name__ == '__main__': datadir = os.path.join( '..', '..', 'data', 'synthetic', '2018-05-01_small' ) nb_traces = 1000 factory = XFactory() print(os.listdir(datadir)) for dir in os.listdir(datadir): if not os.path.isdir(os.path.join(datadir, dir)): continue outdir = os.path.join(datadir, dir, 'l1000') os.makedirs(outdir) for xlog_filepath in os.listdir(os.path.join(datadir, dir, 'l5000')): if '.xes.gz' not in xlog_filepath: continue print('Processing {}'.format(xlog_filepath))
['3', 1400.0] ] TRACE_DF = pd.DataFrame(TRACES, columns=TRACE_DF_COLUMNS) NAME_AND_LIFECYCLE_CLF = XEventAndClassifier([XEventNameClassifier(), XEventLifeTransClassifier()]) CLASSIFIERS = { XEventNameClassifier().name(): [const.CONCEPT_NAME], NAME_AND_LIFECYCLE_CLF.name(): [const.CONCEPT_NAME, const.LIFECYCLE_TRANS] } LOG_TABLE = LogTable(event_df=EVENT_DF, trace_df=TRACE_DF, attributes=LOG_ATTRIBUTE_DICT, classifiers=CLASSIFIERS) XLOG = XFactory.create_log() XLOG_NAME = 'Test log' CONCEPT_EXTENSION.assign_name(XLOG, XLOG_NAME) TOTAL_TIME = 100 TOTAL_TIME_ATTRIBUTE = XFactory.create_attribute_continuous('total_time', TOTAL_TIME) XLOG.get_attributes()['total_time'] = TOTAL_TIME_ATTRIBUTE for caseid, cost_total in TRACES: xtrace = XFactory.create_trace() CONCEPT_EXTENSION.assign_name(xtrace, caseid) COST_EXTENSION.assign_total(xtrace, cost_total) trace_events = filter(lambda event: event[0] == caseid, EVENTS) for _, concept_name, cost_unit, lifecyle, org, timestamp in trace_events:
def __init__(self): super().__init__() self.set_current_default(XFactory())
split = l.strip().split("\t") acceptance = split[0].strip() acceptances.append(acceptance) trace = split[1].strip().split(";") if len(split) > 1 else [] if trace != ['']: traces.append(trace) logging.info("trace {}, acc {}: {}".format(idx, acceptance, trace)) logging.info("num traces: {}".format(len(traces))) logging.info("num acceptances: {}".format(len(acceptances))) logging.info("alphabet: {}".format( reduce(lambda x, y: x.union(y), map(set, traces)))) positive_log = XFactory.create_log() negative_log = XFactory.create_log() assert len(acceptances) == len(traces) for acc, t in zip(acceptances, traces): trace = XFactory.create_trace() for e in t: event = XFactory.create_event() attribute = XFactory.create_attribute_literal("concept:name", e) event.get_attributes()["string"] = attribute trace.append(event) if acc == "Y": positive_log.append(trace) else: negative_log.append(trace)
with open("xes_file/csv_file.csv") as file: first_line = file.readline().split(";") dictionary = {} for i in range(len(first_line)): if "yyyy" in first_line[i]: # Convert csv date format in xes date format first_line[i] = first_line[i].replace("dd", "%d").\ replace("MM", "%m").replace("yyyy", "%Y").replace("HH", "%H").\ replace("mm", "%M") dictionary[str(i)] = first_line[i].strip("\n") first_event = file.readline().split(";") actual_trace = first_event[0] log = XFactory.create_log() trace = XFactory.create_trace() trace.append(convert_line_in_event(dictionary, first_event)) for line in file.readlines(): line_list = line.split(";") event = convert_line_in_event(dictionary, line_list) if line_list[0] == actual_trace: # View the Case Id trace.append(event) else: log.append(trace) trace = XFactory.create_trace() trace.append(event) # Save log in xes format with open("xes_file/csv_log_in_xes_format.xes", "w") as file:
CONVERGENCE_TOLERANCE = 0.001 NUM_THREADS = 8 kmeans = KMeans(n_clusters=NUM_CLUSTERS, max_iter=MAX_ITERATIONS, init=INITIALIZE_CLUSTERS, tol=CONVERGENCE_TOLERANCE, n_jobs=NUM_THREADS) # Create the cluster with the log vector kmeans.fit(log_vector) # Create new log with the attribute for the original log new_logs = {} for i in range(len(kmeans.cluster_centers_)): new_log = XFactory.create_log(log.get_attributes().clone()) for elem in log.get_extensions(): new_log.get_extensions().add(elem) new_log.__classifiers = log.get_classifiers().copy() new_log.__globalTraceAttributes = log.get_global_trace_attributes( ).copy() new_log.__globalEventAttributes = log.get_global_event_attributes( ).copy() new_logs[str(i)] = new_log # Distribute the trace depending the cluster. for point, trace in zip(log_vector, log): cluster = kmeans.predict([point])[0] new_logs[str(cluster)].append(trace)
def create_xes_trace(trace_events, event_filter, abstraction_path, abstraction_exact_match, abstraction_delimiter, verbose, remove_unlisted, remove_duplicates): """Collect events that belong to a trace in an opyenxes trace. Keyword arguments: trace_events -- list of events belonging to a trace abstraction_path -- path to the abstraction table stored as a .csv-file abstraction_delimiter -- column delimiter used in abstraction table abstraction_exact_match -- match only keywords that are identical to the given event name verbose -- flag to enable detailed console output remove_unlisted -- remove all events that are not included in the abstraction table event_filter -- a custom filter to filter events remove_duplicates -- flag for remove duplicate events in a trace """ trace = XFactory.create_trace() if len(trace_events) == 0: return trace id_attribute = XFactory.create_attribute_id("id", str(uuid.uuid4())) trace.get_attributes()["id"] = id_attribute trace.get_attributes()["patient:mrn"] = XFactory.create_attribute_literal( "patient:mrn", trace_events[0].medical_record_number) trace.get_attributes( )["patient:date_of_birth"] = XFactory.create_attribute_literal( "patient:date_of_birth", trace_events[0].date_of_birth) trace.get_attributes( )["patient:address_zip"] = XFactory.create_attribute_literal( "patient:address_zip", trace_events[0].address_zip) trace.get_attributes( )["patient:gender"] = XFactory.create_attribute_literal( "patient:gender", trace_events[0].gender) trace.get_attributes( )["patient:language"] = XFactory.create_attribute_literal( "patient:language", trace_events[0].language) trace.get_attributes( )["patient:patient_ethnic_group"] = XFactory.create_attribute_literal( "patient:patient_ethnic_group", trace_events[0].patient_ethnic_group) trace.get_attributes()["patient:race"] = XFactory.create_attribute_literal( "patient:race", trace_events[0].race) trace.get_attributes( )["patient:religion"] = XFactory.create_attribute_literal( "patient:religion", trace_events[0].religion) trace.get_attributes( )["patient:citizenship"] = XFactory.create_attribute_literal( "patient:citizenship", trace_events[0].citizenship) trace.get_attributes( )["patient:marital_status_code"] = XFactory.create_attribute_literal( "patient:marital_status_code", trace_events[0].marital_status_code) relevant_events = list() # Filter out events that do not match the specified events filter for event in trace_events: is_relevant = False if event_filter is None: is_relevant = True else: is_relevant = event_filter.is_relevant_event(event) if not is_relevant: continue event_descriptor, event_name, event_context, event_code = \ translate_procedure_diagnosis_material_to_event( abstraction_path=abstraction_path, abstraction_exact_match=abstraction_exact_match, abstraction_delimiter=abstraction_delimiter, event=event, verbose=verbose, remove_unlisted=remove_unlisted ) if event_descriptor is not None: event = { "timestamp": event.timestamp, "name": event_descriptor, "description": event_name, "context": event_context, "code": event_code, "caregiver_group_key": event.caregiver_group_key, "facility_key": event.facility_key } relevant_events.append(event) if len(relevant_events) == 0: return trace if remove_duplicates: # Remove events with the same name and timestamp unique_values = set() deduplicated_events = list() for event in relevant_events: if not (event["timestamp"], event["name"]) in unique_values: unique_values.add((event["timestamp"], event["name"])) deduplicated_events.append(event) relevant_events = deduplicated_events for event in relevant_events: # Create opyenxes event and append it to the trace log_event = XFactory.create_event() timestamp_int = event["timestamp"] timestamp_attribute = XFactory.create_attribute_timestamp( "time:timestamp", timestamp_int) log_event.get_attributes()["timestamp"] = timestamp_attribute activity_attribute = XFactory.create_attribute_literal( "concept:name", event["name"]) log_event.get_attributes()["Activity"] = activity_attribute description_attribute = XFactory.create_attribute_literal( "event:description", event["description"]) log_event.get_attributes()["event:description"] = description_attribute context_attribute = XFactory.create_attribute_literal( "event:context", event["context"]) log_event.get_attributes()["event:context"] = context_attribute code_attribute = XFactory.create_attribute_literal( "event:code", event["code"]) log_event.get_attributes()["event:code"] = code_attribute caregiver_attribute = XFactory.create_attribute_literal( "event:caregiver_group", event["caregiver_group_key"]) log_event.get_attributes( )["event:caregiver_group"] = caregiver_attribute facility_attribute = XFactory.create_attribute_literal( "event:facility", event["facility_key"]) log_event.get_attributes()["event:facility"] = facility_attribute trace.append(log_event) return trace