def log(): log = XFactory.create_log() # add log classifier clf = XEventNameClassifier() log.get_classifiers().append(clf) # add global trace attributes glb_t_attr = XFactory.create_attribute_discrete('glb_t_attr', 0) log.get_global_trace_attributes().append(glb_t_attr) # add global event attributes glb_e_attr = XFactory.create_attribute_discrete('glb_e_attr', 0) log.get_global_event_attributes().append(glb_e_attr) # add log attributes str_attr = XFactory.create_attribute_literal('l_attr', 'UNKNOWN') log.get_attributes()['l_attr'] = str_attr # add extension meta_concept = XExtensionParser().parse( "http://www.xes-standard.org/meta_concept.xesext") log.get_extensions().add(meta_concept) # add a trace tracelen = 2 trace0 = XFactory.create_trace() # add some trace attributes bool_attr = XFactory.create_attribute_boolean('t_attr', True) # add some trace features trace0.get_attributes()['t_attr'] = bool_attr for i in range(tracelen): event = XFactory.create_event() # add an attribute int_attr = XFactory.create_attribute_discrete('e_attr', 0) event.get_attributes()['e_attr0'] = int_attr trace0.append(event) log.append(trace0) return log
def merge_and_label(normLogs, devLogs): assert (len(normLogs) > 0 and len(devLogs) > 0) merged_log = XFactory.create_log(normLogs[0].get_attributes().clone()) for elem in normLogs[0].get_extensions(): merged_log.get_extensions().add(elem) merged_log.__classifiers = normLogs[0].get_classifiers().copy() merged_log.__globalTraceAttributes = normLogs[ 0].get_global_trace_attributes().copy() merged_log.__globalEventAttributes = normLogs[ 0].get_global_event_attributes().copy() merged_log.get_global_trace_attributes().append( XAttributeLiteral("Label", "0")) for log in normLogs: for trace in log: trace.get_attributes()["Label"] = XAttributeLiteral("Label", "0") merged_log.append(trace) for log in devLogs: for trace in log: trace.get_attributes()["Label"] = XAttributeLiteral("Label", "1") merged_log.append(trace) return merged_log
def cohort_to_event_log(cohort, trace_type, verbose=False, remove_unlisted=True, remove_duplicates=True, event_filter=None, trace_filter=None, cores=multiprocessing.cpu_count(), window_size=200, abstraction_path=None, abstraction_exact_match=False, abstraction_delimiter=";"): """Converts a fiber cohort to an xes event log. Therefore it slices the cohort to smaller windows (because of memory restrictions) and calls the method `cohort_to_event_log_for_window` with the slices. Keyword arguments: cohort -- the fiber cohort trace_type -- the type of a trace (`mrn` or `visit`) verbose -- flag if the events should contain original non abstracted values (default False) remove_unlisted -- flag if a trace should only contain listed events (default True) remove_duplicates -- flag if duplicate events should be removed (default True) event_filter -- a custom filter to filter events (default None) trace_filter -- a custom filter to filter traces (default None) cores -- the number of cores which should be used to process the cohort (default amount of CPUs) window_size -- the number of patients per window (default 500) abstraction_path -- the path to the abstraction file (default None) abstraction_exact_match -- flag if the abstraction algorithm should only abstract exacted matches (default False) abstraction_delimiter -- the delimiter of the abstraction file (default ;) """ manager = multiprocessing.Manager() traces = manager.list() mrns = list(cohort.mrns()) window_amount = math.ceil(len(mrns)/window_size) # Spawn a new process for each window to free memory after each window completion for i in range(0, window_amount): print("Start window {current_window} / {max_window}".format(current_window=(i + 1), max_window=window_amount)) window_start_time = time.perf_counter() mrns_in_window = mrns[i * window_size: (i + 1) * window_size] cohort_for_window = Cohort(condition.MRNs(mrns_in_window)) p = multiprocessing.Process(target=cohort_to_event_log_for_window, args=( cohort_for_window, trace_type, verbose, remove_unlisted, remove_duplicates, event_filter, trace_filter, cores, abstraction_path, abstraction_exact_match, abstraction_delimiter, traces )) p.start() p.join() print("Finished window {current_window} / {max_window} in {window_time} s".format( current_window=(i + 1), max_window=window_amount, window_time=(time.perf_counter() - window_start_time) )) log = XFactory.create_log() for trace in traces: log.append(trace) return log
def extract(indexer, manifestPath, xesFilePath): manifest = json.load(open(manifestPath)) log = XFactory.create_log() switches = {} with open("./transactionsFromIndexer.txt", "w") as f: f.write("") for key in manifest: if (key == "xesExtensions"): setExtension(log, manifest[key]) elif (key == "xesClassifiers"): setClassifiers(log, manifest[key]) elif (key == "xesGlobals"): setGlobals(log, manifest[key]) elif (key == "switches"): switches = manifest[key] try: mappings = manifest["mappings"] except: print("Missing mappings in the manifest!") mapLog(log, mappings, indexer, switches) with open(xesFilePath, "w") as file: XesXmlSerializer().serialize(log, file)
with open("xes_file/csv_file.csv") as file: first_line = file.readline().split(";") dictionary = {} for i in range(len(first_line)): if "yyyy" in first_line[i]: # Convert csv date format in xes date format first_line[i] = first_line[i].replace("dd", "%d").\ replace("MM", "%m").replace("yyyy", "%Y").replace("HH", "%H").\ replace("mm", "%M") dictionary[str(i)] = first_line[i].strip("\n") first_event = file.readline().split(";") actual_trace = first_event[0] log = XFactory.create_log() trace = XFactory.create_trace() trace.append(convert_line_in_event(dictionary, first_event)) for line in file.readlines(): line_list = line.split(";") event = convert_line_in_event(dictionary, line_list) if line_list[0] == actual_trace: # View the Case Id trace.append(event) else: log.append(trace) trace = XFactory.create_trace() trace.append(event) # Save log in xes format with open("xes_file/csv_log_in_xes_format.xes", "w") as file:
factory = XFactory() print(os.listdir(datadir)) for dir in os.listdir(datadir): if not os.path.isdir(os.path.join(datadir, dir)): continue outdir = os.path.join(datadir, dir, 'l1000') os.makedirs(outdir) for xlog_filepath in os.listdir(os.path.join(datadir, dir, 'l5000')): if '.xes.gz' not in xlog_filepath: continue print('Processing {}'.format(xlog_filepath)) with open(os.path.join(datadir, dir, xlog_filepath), 'r') as f: xlog = XUniversalParser().parse(f)[0] assert isinstance(xlog, XLog) new_xlog = factory.create_log(xlog.get_attributes()) traces = np.random.choice(xlog, nb_traces, replace=False) new_xlog.get_classifiers().append(xlog.get_classifiers()[0]) for t in traces: new_xlog.append(t) with open(outdir + os.sep + xlog_filepath, 'w') as f: XesXmlGZIPSerializer().serialize(new_xlog, f)
['3', 1400.0] ] TRACE_DF = pd.DataFrame(TRACES, columns=TRACE_DF_COLUMNS) NAME_AND_LIFECYCLE_CLF = XEventAndClassifier([XEventNameClassifier(), XEventLifeTransClassifier()]) CLASSIFIERS = { XEventNameClassifier().name(): [const.CONCEPT_NAME], NAME_AND_LIFECYCLE_CLF.name(): [const.CONCEPT_NAME, const.LIFECYCLE_TRANS] } LOG_TABLE = LogTable(event_df=EVENT_DF, trace_df=TRACE_DF, attributes=LOG_ATTRIBUTE_DICT, classifiers=CLASSIFIERS) XLOG = XFactory.create_log() XLOG_NAME = 'Test log' CONCEPT_EXTENSION.assign_name(XLOG, XLOG_NAME) TOTAL_TIME = 100 TOTAL_TIME_ATTRIBUTE = XFactory.create_attribute_continuous('total_time', TOTAL_TIME) XLOG.get_attributes()['total_time'] = TOTAL_TIME_ATTRIBUTE for caseid, cost_total in TRACES: xtrace = XFactory.create_trace() CONCEPT_EXTENSION.assign_name(xtrace, caseid) COST_EXTENSION.assign_total(xtrace, cost_total) trace_events = filter(lambda event: event[0] == caseid, EVENTS) for _, concept_name, cost_unit, lifecyle, org, timestamp in trace_events:
CONVERGENCE_TOLERANCE = 0.001 NUM_THREADS = 8 kmeans = KMeans(n_clusters=NUM_CLUSTERS, max_iter=MAX_ITERATIONS, init=INITIALIZE_CLUSTERS, tol=CONVERGENCE_TOLERANCE, n_jobs=NUM_THREADS) # Create the cluster with the log vector kmeans.fit(log_vector) # Create new log with the attribute for the original log new_logs = {} for i in range(len(kmeans.cluster_centers_)): new_log = XFactory.create_log(log.get_attributes().clone()) for elem in log.get_extensions(): new_log.get_extensions().add(elem) new_log.__classifiers = log.get_classifiers().copy() new_log.__globalTraceAttributes = log.get_global_trace_attributes( ).copy() new_log.__globalEventAttributes = log.get_global_event_attributes( ).copy() new_logs[str(i)] = new_log # Distribute the trace depending the cluster. for point, trace in zip(log_vector, log): cluster = kmeans.predict([point])[0] new_logs[str(cluster)].append(trace)
split = l.strip().split("\t") acceptance = split[0].strip() acceptances.append(acceptance) trace = split[1].strip().split(";") if len(split) > 1 else [] if trace != ['']: traces.append(trace) logging.info("trace {}, acc {}: {}".format(idx, acceptance, trace)) logging.info("num traces: {}".format(len(traces))) logging.info("num acceptances: {}".format(len(acceptances))) logging.info("alphabet: {}".format( reduce(lambda x, y: x.union(y), map(set, traces)))) positive_log = XFactory.create_log() negative_log = XFactory.create_log() assert len(acceptances) == len(traces) for acc, t in zip(acceptances, traces): trace = XFactory.create_trace() for e in t: event = XFactory.create_event() attribute = XFactory.create_attribute_literal("concept:name", e) event.get_attributes()["string"] = attribute trace.append(event) if acc == "Y": positive_log.append(trace) else: negative_log.append(trace)