Beispiel #1
0
    def load_xes(self):
        try:
            with open(self.input_path) as log_file:
                log = XUniversalParser().parse(log_file)[0]

            # get classifiers
            classifiers = []
            for cl in log.get_classifiers():
                classifiers.append(str(cl))

            classifier = XEventAttributeClassifier("activity", [classifiers[0]])
            log_list = list(map(lambda trace: list(map(classifier.get_class_identity, trace)), log))

            self._event_log = set(tuple(trace) for trace in log_list)

        except:
            raise IOError('[ERROR]: Unable to import xes file')
Beispiel #2
0
    factory = XFactory()

    print(os.listdir(datadir))

    for dir in os.listdir(datadir):
        if not os.path.isdir(os.path.join(datadir, dir)):
            continue
        outdir = os.path.join(datadir, dir, 'l1000')
        os.makedirs(outdir)

        for xlog_filepath in os.listdir(os.path.join(datadir, dir, 'l5000')):
            if '.xes.gz' not in xlog_filepath:
                continue

            print('Processing {}'.format(xlog_filepath))

            with open(os.path.join(datadir, dir, xlog_filepath), 'r') as f:
                xlog = XUniversalParser().parse(f)[0]

            assert isinstance(xlog, XLog)

            new_xlog = factory.create_log(xlog.get_attributes())
            traces = np.random.choice(xlog, nb_traces, replace=False)
            new_xlog.get_classifiers().append(xlog.get_classifiers()[0])

            for t in traces:
                new_xlog.append(t)

            with open(outdir + os.sep + xlog_filepath, 'w') as f:
                XesXmlGZIPSerializer().serialize(new_xlog, f)
Beispiel #3
0
                    max_iter=MAX_ITERATIONS,
                    init=INITIALIZE_CLUSTERS,
                    tol=CONVERGENCE_TOLERANCE,
                    n_jobs=NUM_THREADS)

    # Create the cluster with the log vector
    kmeans.fit(log_vector)

    # Create new log with the attribute for the original log
    new_logs = {}
    for i in range(len(kmeans.cluster_centers_)):
        new_log = XFactory.create_log(log.get_attributes().clone())
        for elem in log.get_extensions():
            new_log.get_extensions().add(elem)

        new_log.__classifiers = log.get_classifiers().copy()
        new_log.__globalTraceAttributes = log.get_global_trace_attributes(
        ).copy()
        new_log.__globalEventAttributes = log.get_global_event_attributes(
        ).copy()

        new_logs[str(i)] = new_log

    # Distribute the trace depending the cluster.
    for point, trace in zip(log_vector, log):
        cluster = kmeans.predict([point])[0]
        new_logs[str(cluster)].append(trace)

    # Write the new logs
    log_id = 0
    for log in new_logs.values():