def endCluster(folderName, assigned_clusters, vectorsize, clusterType, corpus): trace_list = loadXES.get_trace_names(folderName+".xes") clusterResult= {} for doc_id in range(len(corpus)): clusterResult[trace_list[doc_id]]=assigned_clusters[doc_id] resultFile= open('output/'+folderName+'T2VVS'+str(vectorsize)+clusterType+'.csv','w') for doc_id in range(len(corpus)): resultFile.write(trace_list[doc_id]+','+str(assigned_clusters[doc_id])+"\n") resultFile.close() print("done with " , clusterType , " on event log ", folderName)
def cluster(folderName, vectorsize, clusterType): corpus = loadXES.get_doc_XES_tagged(folderName + '.xes') print('Data Loading finished, ', str(len(corpus)), ' traces found.') model = gensim.models.Doc2Vec.load('output/' + folderName + 'T2VVS' + str(vectorsize) + '.model') vectors = [] NUM_CLUSTERS = 5 print("inferring vectors") for doc_id in range(len(corpus)): inferred_vector = model.infer_vector(corpus[doc_id].words) vectors.append(inferred_vector) print("done") if (clusterType == "KMeans"): kclusterer = KMeansClusterer( NUM_CLUSTERS, distance=nltk.cluster.util.cosine_distance, repeats=25) assigned_clusters = kclusterer.cluster(vectors, assign_clusters=True) elif (clusterType == "HierWard"): ward = AgglomerativeClustering(n_clusters=NUM_CLUSTERS, linkage='ward').fit(vectors) assigned_clusters = ward.labels_ elif clusterType == "OCSVM": ocsvm = OneClassSVM() assigned_clusters = ocsvm.fit_predict(vectors) else: print( clusterType, " is not a predefined cluster type. Please use 'KMeans' or 'HierWard', or create a definition for ", clusterType) return trace_list = loadXES.get_trace_names(folderName + ".xes") clusterResult = {} for doc_id in range(len(corpus)): clusterResult[trace_list[doc_id]] = assigned_clusters[doc_id] resultFile = open( 'output/' + folderName + 'T2VVS' + str(vectorsize) + clusterType + '.csv', 'w') for doc_id in range(len(corpus)): resultFile.write(trace_list[doc_id] + ',' + str(assigned_clusters[doc_id]) + "\n") resultFile.close() print("done with ", clusterType, " on event log ", folderName)