Beispiel #1
0
import tempfile
import traceback
import weka.core.jvm as jvm
import edeweka.helper as helper
from weka.clusterers import Clusterer
import weka.core.converters as converters
import weka.core.serialization as serialization
from dataformatter import DataFormatter
import weka.core.packages as packages

dataDir = os.path.join(os.path.dirname(os.path.abspath('')), 'data')
modelDir = os.path.join(os.path.dirname(os.path.abspath('')), 'models')

dformat = DataFormatter(dataDir)

dformat.dict2arff(os.path.join(dataDir, 'System.csv'),
                  os.path.join(dataDir, 'System.arff'))

#Arff_file = os.path.join(dataDir, 'System.arff')

jvm.start(packages=True)

data = converters.load_any_file(os.path.join(dataDir, 'System.arff'))
clusterer = Clusterer(classname="weka.clusterers.SimpleKMeans",
                      options=["-N", "10", "-S", "10"])
clusterer.build_clusterer(data)

# print clusterer
# cluster the data
# for inst in data:
#     cl = clusterer.cluster_instance(inst)  # 0-based cluster index
#     dist = clusterer.distribution_for_instance(inst)   # cluster membership distribution