def load_xes(self): try: with open(self.input_path) as log_file: log = XUniversalParser().parse(log_file)[0] # get classifiers classifiers = [] for cl in log.get_classifiers(): classifiers.append(str(cl)) classifier = XEventAttributeClassifier("activity", [classifiers[0]]) log_list = list(map(lambda trace: list(map(classifier.get_class_identity, trace)), log)) self._event_log = set(tuple(trace) for trace in log_list) except: raise IOError('[ERROR]: Unable to import xes file')
factory = XFactory() print(os.listdir(datadir)) for dir in os.listdir(datadir): if not os.path.isdir(os.path.join(datadir, dir)): continue outdir = os.path.join(datadir, dir, 'l1000') os.makedirs(outdir) for xlog_filepath in os.listdir(os.path.join(datadir, dir, 'l5000')): if '.xes.gz' not in xlog_filepath: continue print('Processing {}'.format(xlog_filepath)) with open(os.path.join(datadir, dir, xlog_filepath), 'r') as f: xlog = XUniversalParser().parse(f)[0] assert isinstance(xlog, XLog) new_xlog = factory.create_log(xlog.get_attributes()) traces = np.random.choice(xlog, nb_traces, replace=False) new_xlog.get_classifiers().append(xlog.get_classifiers()[0]) for t in traces: new_xlog.append(t) with open(outdir + os.sep + xlog_filepath, 'w') as f: XesXmlGZIPSerializer().serialize(new_xlog, f)
max_iter=MAX_ITERATIONS, init=INITIALIZE_CLUSTERS, tol=CONVERGENCE_TOLERANCE, n_jobs=NUM_THREADS) # Create the cluster with the log vector kmeans.fit(log_vector) # Create new log with the attribute for the original log new_logs = {} for i in range(len(kmeans.cluster_centers_)): new_log = XFactory.create_log(log.get_attributes().clone()) for elem in log.get_extensions(): new_log.get_extensions().add(elem) new_log.__classifiers = log.get_classifiers().copy() new_log.__globalTraceAttributes = log.get_global_trace_attributes( ).copy() new_log.__globalEventAttributes = log.get_global_event_attributes( ).copy() new_logs[str(i)] = new_log # Distribute the trace depending the cluster. for point, trace in zip(log_vector, log): cluster = kmeans.predict([point])[0] new_logs[str(cluster)].append(trace) # Write the new logs log_id = 0 for log in new_logs.values():