Ejemplo n.º 1
0
    factory = XFactory()

    print(os.listdir(datadir))

    for dir in os.listdir(datadir):
        if not os.path.isdir(os.path.join(datadir, dir)):
            continue
        outdir = os.path.join(datadir, dir, 'l1000')
        os.makedirs(outdir)

        for xlog_filepath in os.listdir(os.path.join(datadir, dir, 'l5000')):
            if '.xes.gz' not in xlog_filepath:
                continue

            print('Processing {}'.format(xlog_filepath))

            with open(os.path.join(datadir, dir, xlog_filepath), 'r') as f:
                xlog = XUniversalParser().parse(f)[0]

            assert isinstance(xlog, XLog)

            new_xlog = factory.create_log(xlog.get_attributes())
            traces = np.random.choice(xlog, nb_traces, replace=False)
            new_xlog.get_classifiers().append(xlog.get_classifiers()[0])

            for t in traces:
                new_xlog.append(t)

            with open(outdir + os.sep + xlog_filepath, 'w') as f:
                XesXmlGZIPSerializer().serialize(new_xlog, f)
Ejemplo n.º 2
0
    CONVERGENCE_TOLERANCE = 0.001
    NUM_THREADS = 8

    kmeans = KMeans(n_clusters=NUM_CLUSTERS,
                    max_iter=MAX_ITERATIONS,
                    init=INITIALIZE_CLUSTERS,
                    tol=CONVERGENCE_TOLERANCE,
                    n_jobs=NUM_THREADS)

    # Create the cluster with the log vector
    kmeans.fit(log_vector)

    # Create new log with the attribute for the original log
    new_logs = {}
    for i in range(len(kmeans.cluster_centers_)):
        new_log = XFactory.create_log(log.get_attributes().clone())
        for elem in log.get_extensions():
            new_log.get_extensions().add(elem)

        new_log.__classifiers = log.get_classifiers().copy()
        new_log.__globalTraceAttributes = log.get_global_trace_attributes(
        ).copy()
        new_log.__globalEventAttributes = log.get_global_event_attributes(
        ).copy()

        new_logs[str(i)] = new_log

    # Distribute the trace depending the cluster.
    for point, trace in zip(log_vector, log):
        cluster = kmeans.predict([point])[0]
        new_logs[str(cluster)].append(trace)