Exemple #1
0
    def score(self, testExamples, labels):
        f = open("testingweka.arff", "w")
        f.write("@relation randomset\n")
        for j in range(len(testExamples[0])):
            f.write("@attribute feature%d real\n" % j)
        f.write("@attribute class {TRUE, FALSE}\n")
        f.write("@data\n")
        for (example, label) in zip(testExamples, labels):
            for feature in example:
                f.write("%f," % feature)
            if label == 1:
                f.write("TRUE\n")
            else:
                f.write("FALSE\n")
        f.close()

        loader = Loader(classname="weka.core.converters.ArffLoader")
        #                        options=["-H", "-B", "10000"])
        self.testingData = loader.load_file("testingweka.arff")
        self.testingData.set_class_index(self.testingData.num_attributes() - 1)

        evaluation = Evaluation(self.trainingData)
        evaluation.test_model(self.classifier, self.testingData)

        #print evaluation.percent_correct()
        #jvm.stop()
        return evaluation.percent_correct()
    def execute(self,featureInclusion, kFold, classIndex):
        deletedFeatures = 0
        for i in range(0,len(featureInclusion)):
            if featureInclusion[i] == False:
                self.instances.deleteAttributeAt( i - deletedFeatures)
                deletedFeatures += 1

        self.instances.setClassIndex(classIndex)

        cvParameterSelection = javabridge.make_instance("Lweka/classifiers/meta/CVParameterSelection","()V")
        javabridge.call(cvParameterSelection, "setNumFolds", "(I)V", kFold)
        javabridge.call(cvParameterSelection,"buildClassifier(Lweka/core/Instances)V",self.instances)

        eval = Evaluation(self.instances)
        eval.crossvalidate_model(cvParameterSelection, self.instances, kFold, Random(1))

        return eval.percent_correct()
Exemple #3
0
if plot.matplotlib_available:
    import matplotlib.pyplot as plt

jvm.start()

# load glass
fname = data_dir + os.sep + "glass.arff"
print("\nLoading dataset: " + fname + "\n")
loader = Loader(classname="weka.core.converters.ArffLoader")
data = loader.load_file(fname)
data.set_class_index(data.num_attributes() - 1)

# compute baseline
evl = Evaluation(data)
evl.crossvalidate_model(Classifier("weka.classifiers.rules.ZeroR"), data, 10, Random(1))
baseline = evl.percent_correct()

# generate learning curves
percentages = [1, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
repetitions = [1, 10, 100]
curves = {}
for repetition in repetitions:
    # progress info
    sys.stdout.write("Repetitions=" + str(repetition))
    # initialize curve
    curve = {}
    for percentage in percentages:
        curve[percentage] = 0
    curves[repetition] = curve
    # run and add up percentage correct from repetition
    for seed in xrange(repetition):
Exemple #4
0
# load diabetes
loader = Loader(classname="weka.core.converters.ArffLoader")
fname = data_dir + os.sep + "diabetes.arff"
print("\nLoading dataset: " + fname + "\n")
data = loader.load_file(fname)
data.set_class_index(data.num_attributes() - 1)

for classifier in ["weka.classifiers.bayes.NaiveBayes", "weka.classifiers.rules.ZeroR", "weka.classifiers.trees.J48"]:
    # train/test split 90% using classifier
    cls = Classifier(classname=classifier)
    evl = Evaluation(data)
    evl.evaluate_train_test_split(cls, data, 90.0, Random(1))
    print("\n" + classifier + " train/test split (90%):\n" + evl.to_summary())
    cls.build_classifier(data)
    print(classifier + " model:\n\n" + str(cls))

# calculate mean/stdev over 10 cross-validations
for classifier in [
    "weka.classifiers.meta.ClassificationViaRegression", "weka.classifiers.bayes.NaiveBayes",
        "weka.classifiers.rules.ZeroR", "weka.classifiers.trees.J48", "weka.classifiers.functions.Logistic"]:
    accuracy = []
    for i in xrange(1,11):
        cls = Classifier(classname=classifier)
        evl = Evaluation(data)
        evl.crossvalidate_model(cls, data, 10, Random(i))
        accuracy.append(evl.percent_correct())
    nacc = numpy.array(accuracy)
    print("%s: %0.2f +/-%0.2f" % (classifier, numpy.mean(nacc), numpy.std(nacc)))

jvm.stop()
Exemple #5
0
preds.sort(preds.get_attribute_by_name("distribution-good").get_index())
print(evl.to_summary())
print(evl.to_matrix())
print(preds)

# cross-validate CostSensitiveClassifier with J48 (minimize cost)
classifier = "weka.classifiers.meta.CostSensitiveClassifier"
base = "weka.classifiers.trees.J48"
print("\n--> " + classifier + "/" + base + "\n")
cost = array([[0, 1], [5, 0]])
matrx = CostMatrix(matrx=cost)
cls = Classifier(classname=classifier,
                 options=["-M", "-W", base, "-cost-matrix", matrx.to_matlab()])
evl = Evaluation(data)
evl.crossvalidate_model(cls, data, 10, Random(1))
print("Accuracy: %0.1f" % evl.percent_correct())
print(evl.to_matrix())

# cross-validate Bagging with J48
classifier = "weka.classifiers.meta.Bagging"
base = "weka.classifiers.trees.J48"
print("\n--> " + classifier + "/" + base + "\n")
cls = Classifier(classname=classifier,
                 options=["-W", base])
evl = Evaluation(data)
evl.crossvalidate_model(cls, data, 10, Random(1))
print("Accuracy: %0.1f" % evl.percent_correct())
print(evl.to_matrix())

# cross-validate CostSensitiveClassifier with NaiveBayes
classifier = "weka.classifiers.meta.CostSensitiveClassifier"
Exemple #6
0
    print("Please restart")
    jvm.stop()
    exit()

# load diabetes
fname = data_dir + os.sep + "diabetes.arff"
print("\nLoading dataset: " + fname + "\n")
loader = Loader(classname="weka.core.converters.ArffLoader")
data = loader.load_file(fname)
data.set_class_index(data.num_attributes() - 1)

# J48
cls = Classifier(classname="weka.classifiers.trees.J48")
evl = Evaluation(data)
evl.crossvalidate_model(cls, data, 10, Random(1))
print("J48: %0.1f%%" % evl.percent_correct())

# CVParameterSelection with J48 - confidenceFactor
cls = Classifier(classname="weka.classifiers.meta.CVParameterSelection",
                 options=["-W", "weka.classifiers.trees.J48", "-P", "C 0.1 0.9 9"])
evl = Evaluation(data)
evl.crossvalidate_model(cls, data, 10, Random(1))
print("CVParameterSelection (confidenceFactor): %0.1f%%" % evl.percent_correct())

# CVParameterSelection with J48 - confidenceFactor+minNumObj
cls = Classifier(classname="weka.classifiers.meta.CVParameterSelection",
                 options=["-W", "weka.classifiers.trees.J48", "-P", "C 0.1 0.9 9", "-P", "M 1 10 10"])
evl = Evaluation(data)
evl.crossvalidate_model(cls, data, 10, Random(1))
print("CVParameterSelection (confidenceFactor+minNumObj): %0.1f%%" % evl.percent_correct())
Exemple #7
0
fname = data_dir + os.sep + "ReutersGrain-test.arff"
print("\nLoading dataset: " + fname + "\n")
loader = Loader(classname="weka.core.converters.ArffLoader")
test = loader.load_file(fname)
test.set_class_index(test.num_attributes() - 1)

setups = (
    ("weka.classifiers.trees.J48", []),
    ("weka.classifiers.bayes.NaiveBayes", []),
    ("weka.classifiers.bayes.NaiveBayesMultinomial", []),
    ("weka.classifiers.bayes.NaiveBayesMultinomial", ["-C"]),
    ("weka.classifiers.bayes.NaiveBayesMultinomial", ["-C", "-L", "-S"])
)

# cross-validate classifiers
for setup in setups:
    classifier, opt = setup
    print("\n--> %s (filter options: %s)\n" % (classifier, " ".join(opt)))
    cls = FilteredClassifier()
    cls.set_classifier(Classifier(classname=classifier))
    cls.set_filter(Filter(classname="weka.filters.unsupervised.attribute.StringToWordVector", options=opt))
    cls.build_classifier(data)
    evl = Evaluation(test)
    evl.test_model(cls, test)
    print("Accuracy: %0.0f%%" % evl.percent_correct())
    tcdata = plc.generate_thresholdcurve_data(evl, 0)
    print("AUC: %0.3f" % plc.get_auc(tcdata))
    print(evl.to_matrix("Matrix:"))

jvm.stop()
Exemple #8
0
jvm.start()

# load diabetes
loader = Loader(classname="weka.core.converters.ArffLoader")
fname = data_dir + os.sep + "diabetes.arff"
print("\nLoading dataset: " + fname + "\n")
data = loader.load_file(fname)
data.set_class_index(data.num_attributes() - 1)

# determine baseline with ZeroR
zeror = Classifier(classname="weka.classifiers.rules.ZeroR")
zeror.build_classifier(data)
evl = Evaluation(data)
evl.test_model(zeror, data)
print("Baseline accuracy (ZeroR): %0.1f%%" % evl.percent_correct())

print("\nHoldout 10%...")
# use seed 1-10 and perform random split with 90%
perc = []
for i in xrange(1, 11):
    evl = Evaluation(data)
    evl.evaluate_train_test_split(
        Classifier(classname="weka.classifiers.trees.J48"), data, 90.0, Random(i))
    perc.append(round(evl.percent_correct(), 1))
    print("Accuracy with seed %i: %0.1f%%" % (i, evl.percent_correct()))

# calculate mean and standard deviation
nperc = numpy.array(perc)
print("mean=%0.2f stdev=%0.2f" % (numpy.mean(nperc), numpy.std(nperc)))
Exemple #9
0
# load ionosphere
fname = data_dir + os.sep + "ionosphere.arff"
print("\nLoading dataset: " + fname + "\n")
loader = Loader(classname="weka.core.converters.ArffLoader")
data = loader.load_file(fname)
data.set_class_index(data.num_attributes() - 1)

# 1. cheating with default filter
fltr = Filter(classname="weka.filters.supervised.attribute.Discretize", options=[])
fltr.set_inputformat(data)
filtered = fltr.filter(data)
cls = Classifier(classname="weka.classifiers.trees.J48")
evl = Evaluation(filtered)
evl.crossvalidate_model(cls, filtered, 10, Random(1))
cls.build_classifier(filtered)
print("cheating (default): accuracy=%0.1f nodes=%s" % (evl.percent_correct(), get_nodes(str(cls))))

# 2. using FilteredClassifier with default filter
cls = FilteredClassifier()
cls.set_classifier(Classifier(classname="weka.classifiers.trees.J48"))
cls.set_filter(Filter(classname="weka.filters.supervised.attribute.Discretize", options=[]))
evl = Evaluation(data)
evl.crossvalidate_model(cls, data, 10, Random(1))
cls.build_classifier(data)
print("FilteredClassifier (default): accuracy=%0.1f nodes=%s" % (evl.percent_correct(), get_nodes(str(cls))))

# 3. using FilteredClassifier (make binary)
cls = FilteredClassifier()
cls.set_classifier(Classifier(classname="weka.classifiers.trees.J48"))
cls.set_filter(Filter(classname="weka.filters.supervised.attribute.Discretize", options=["-D"]))
evl = Evaluation(data)
Exemple #10
0
# load weather.nominal
loader = Loader(classname="weka.core.converters.ArffLoader")
fname = data_dir + os.sep + "weather.nominal.arff"
print("\nLoading dataset: " + fname + "\n")
data = loader.load_file(fname)
data.set_class_index(data.num_attributes() - 1)

# define classifiers
classifiers = ["weka.classifiers.rules.OneR", "weka.classifiers.trees.J48"]

# cross-validate original dataset
for classifier in classifiers:
    cls = Classifier(classname=classifier)
    evl = Evaluation(data)
    evl.crossvalidate_model(cls, data, 10, Random(1))
    print("%s (original): %0.0f%%" % (classifier, evl.percent_correct()))

# replace 'outlook' in first 4 'no' instances with 'missing'
modified = Instances.copy_instances(data)
count    = 0
for i in xrange(modified.num_instances()):
    if modified.get_instance(i).get_string_value(modified.get_class_index()) == "no":
        count += 1
        modified.get_instance(i).set_missing(0)
        if count == 4:
            break

# cross-validate modified dataset
for classifier in classifiers:
    cls = Classifier(classname=classifier)
    evl = Evaluation(modified)
Exemple #11
0
fname = data_dir + os.sep + "segment-challenge.arff"
print("\nLoading dataset: " + fname + "\n")
train = loader.load_file(fname)
train.set_class_index(train.num_attributes() - 1)

fname = data_dir + os.sep + "segment-test.arff"
print("\nLoading dataset: " + fname + "\n")
test = loader.load_file(fname)
test.set_class_index(train.num_attributes() - 1)

# build J48
cls = Classifier(classname="weka.classifiers.trees.J48")
cls.build_classifier(train)

# evaluate on test
evl = Evaluation(train)
evl.test_model(cls, test)
print("Test set accuracy: %0.0f%%" % evl.percent_correct())

# evaluate on train
evl = Evaluation(train)
evl.test_model(cls, train)
print("Train set accuracy: %0.0f%%" % evl.percent_correct())

# evaluate on random split
evl = Evaluation(train)
evl.evaluate_train_test_split(cls, train, 66.0, Random(1))
print("Random split accuracy: %0.0f%%" % evl.percent_correct())

jvm.stop()
Exemple #12
0
        options=["-N", "10", "-F", str(i), "-S", "1", "-V"])
    remove.set_inputformat(data)
    train = remove.filter(data)

    # create test set
    remove = Filter(
        classname="weka.filters.supervised.instance.StratifiedRemoveFolds",
        options=["-N", "10", "-F", str(i), "-S", "1"])
    remove.set_inputformat(data)
    test = remove.filter(data)

    cls = Classifier(classname="weka.classifiers.trees.J48")
    cls.build_classifier(train)
    evl.test_model(cls, test)

print("Simulated CV accuracy: %0.1f%%" % (evl.percent_correct()))

# perform actual cross-validation
evl = Evaluation(data)
cls = Classifier(classname="weka.classifiers.trees.J48")
evl.crossvalidate_model(cls, data, 10, Random(1))

print("Actual CV accuracy: %0.1f%%" % (evl.percent_correct()))

# deploy
print("Build model on full dataset:\n")
cls = Classifier(classname="weka.classifiers.trees.J48")
cls.build_classifier(data)
print(cls)

jvm.stop()
Exemple #13
0
evl.crossvalidate_model(cls, data, 10, Random(1))
print("10-fold cross-validation (without 'outlook'):\n" + evl.to_summary())
cls.build_classifier(data)
print("Model:\n\n" + str(cls))

# load diabetes
loader = Loader(classname="weka.core.converters.ArffLoader")
fname = data_dir + os.sep + "diabetes.arff"
print("\nLoading dataset: " + fname + "\n")
data = loader.load_file(fname)
data.set_class_index(data.num_attributes() - 1)

cls = Classifier(classname="weka.classifiers.rules.ZeroR")
evl = Evaluation(data)
evl.crossvalidate_model(cls, data, 10, Random(1))
print("Accuracy 10-fold cross-validation (ZeroR): %0.1f%%" % evl.percent_correct())

cls = Classifier(classname="weka.classifiers.rules.OneR")
evl = Evaluation(data)
evl.crossvalidate_model(cls, data, 10, Random(1))
print("Accuracy 10-fold cross-validation (OneR): %0.1f%%" % evl.percent_correct())
cls.build_classifier(data)
print(cls)

cls = Classifier(classname="weka.classifiers.rules.OneR", options=["-B", "1"])
evl = Evaluation(data)
evl.crossvalidate_model(cls, data, 10, Random(1))
print("Accuracy 10-fold cross-validation (OneR -B 1): %0.1f%%" % evl.percent_correct())

cls = Classifier(classname="weka.classifiers.rules.OneR", options=["-B", "1"])
cls.build_classifier(data)
Exemple #14
0
# install stackingC if necessary
if not packages.is_installed("stackingC"):
    print("Installing stackingC...")
    packages.install_package("stackingC")
    jvm.stop()
    print("Please restart")
    exit()

# load glass
loader = Loader(classname="weka.core.converters.ArffLoader")
fname = data_dir + os.sep + "glass.arff"
print("\nLoading dataset: " + fname + "\n")
data = loader.load_file(fname)
data.set_class_index(data.num_attributes() - 1)

# compare several meta-classifiers with J48
for classifier in [("weka.classifiers.trees.J48", []), ("weka.classifiers.meta.Bagging", []),
                   ("weka.classifiers.trees.RandomForest", []), ("weka.classifiers.meta.AdaBoostM1", []),
                   ("weka.classifiers.meta.Stacking", []),
                   ("weka.classifiers.meta.StackingC", ["-B", "weka.classifiers.lazy.IBk", "-B", "weka.classifiers.rules.PART", "-B", "weka.classifiers.trees.J48"])]:

    # cross-validate classifier
    cname, coptions = classifier
    cls = Classifier(classname=cname, options=coptions)
    evl = Evaluation(data)
    evl.crossvalidate_model(cls, data, 10, Random(1))
    print(cname + " cross-validated accuracy: %0.2f" % evl.percent_correct())

jvm.stop()
Exemple #15
0
# load a dataset
iris_file = "HairEyeColor.csv"
print("Loading dataset: " + iris_file)
loader = Loader(classname="weka.core.converters.CSVLoader")
iris_data = loader.load_file(iris_file)
print (iris_data.num_attributes)
iris_data.set_class_index(iris_data.num_attributes() - 1)
                                            
# build a classifier and output model
print ("Training J48 classifier on iris")
classifier = Classifier(classname="weka.test.Regression")
#classifier = Classifier(classname="weka.classifiers.trees.J48", options=["-C", "0.5"])
# Instead of using 'options=["-C", "0.3"]' in the constructor, we can also set the "confidenceFactor"
# property of the J48 classifier itself. However, being of type float rather than double, we need
# to convert it to the correct type first using the double_to_float function:
#classifier.set_property("confidenceFactor", types.double_to_float(0.3))
classifier.build_classifier(iris_data)
print(classifier)
print(classifier.graph())
#plot_graph.plot_dot_graph(classifier.graph())
    

evaluation = Evaluation(iris_data)                     # initialize with priors
evaluation.crossvalidate_model(classifier, iris_data, 10, Random(42))  # 10-fold CV
print(evaluation.to_summary())

print("pctCorrect: " + str(evaluation.percent_correct()))
print("incorrect: " + str(evaluation.incorrect()))
jvm.stop()
Exemple #16
0
print("\nLoading dataset: " + fname + "\n")
loader = Loader(classname="weka.core.converters.ArffLoader")
data = loader.load_file(fname)
data.set_class_index(data.num_attributes() - 1)

classifiers = [
    "weka.classifiers.trees.J48",
    "weka.classifiers.lazy.IBk"
]

# cross-validate classifiers
for classifier in classifiers:
    cls = Classifier(classname=classifier)
    evl = Evaluation(data)
    evl.crossvalidate_model(cls, data, 10, Random(1))
    print("%s: %0.0f%%" % (classifier, evl.percent_correct()))

# wrapper
for classifier in classifiers:
    aseval = ASEvaluation(classname="weka.attributeSelection.WrapperSubsetEval",
                          options=["-B", classifier])
    assearch = ASSearch(classname="weka.attributeSelection.BestFirst",
                        options=[])
    attsel = AttributeSelection()
    attsel.set_evaluator(aseval)
    attsel.set_search(assearch)
    attsel.select_attributes(data)
    reduced = attsel.reduce_dimensionality(data)

    cls = Classifier(classname=classifier)
    evl = Evaluation(reduced)
Exemple #17
0
data = loader.load_file(fname)
data.set_class_index(data.num_attributes() - 1)

classifiers = [
    "weka.classifiers.bayes.NaiveBayes",
    "weka.classifiers.lazy.IBk",
    "weka.classifiers.trees.J48"
]

# cross-validate classifiers
for classifier in classifiers:
    # classifier itself
    cls = Classifier(classname=classifier)
    evl = Evaluation(data)
    evl.crossvalidate_model(cls, data, 10, Random(1))
    print("%s: %0.0f%%" % (classifier, evl.percent_correct()))
    # meta with cfssubseteval
    meta = SingleClassifierEnhancer(classname="weka.classifiers.meta.AttributeSelectedClassifier")
    meta.set_options(
        ["-E", "weka.attributeSelection.CfsSubsetEval",
         "-S", "weka.attributeSelection.BestFirst",
         "-W", classifier])
    evl = Evaluation(data)
    evl.crossvalidate_model(meta, data, 10, Random(1))
    print("%s (cfs): %0.0f%%" % (classifier, evl.percent_correct()))
    # meta with wrapper
    meta = SingleClassifierEnhancer(classname="weka.classifiers.meta.AttributeSelectedClassifier")
    meta.set_options(
        ["-E", "weka.attributeSelection.WrapperSubsetEval -B " + classifier,
         "-S", "weka.attributeSelection.BestFirst",
         "-W", classifier])
Exemple #18
0
data = loader.load_file(fname)
data.set_class_index(data.num_attributes() - 1)

# cross-validate classifiers
classifiers = [
    "weka.classifiers.functions.MultilayerPerceptron",
    "weka.classifiers.trees.J48",
    "weka.classifiers.bayes.NaiveBayes",
    "weka.classifiers.functions.SMO",
    "weka.classifiers.lazy.IBk"
]
for classifier in classifiers:
    cls = Classifier(classname=classifier)
    evl = Evaluation(data)
    evl.crossvalidate_model(cls, data, 10, Random(1))
    print("%s: %0.0f%%" % (classifier, evl.percent_correct()))

# configure experiment
print("This will take some time, so grab a cuppa... And a muffin... And read the paper...")
datasets = [
    data_dir + os.sep + "iris.arff",
    data_dir + os.sep + "breast-cancer.arff",
    data_dir + os.sep + "credit-g.arff",
    data_dir + os.sep + "diabetes.arff",
    data_dir + os.sep + "glass.arff",
    data_dir + os.sep + "ionosphere.arff"
]
classifiers = [
    Classifier(classname="weka.classifiers.functions.MultilayerPerceptron"),
    Classifier(classname="weka.classifiers.rules.ZeroR"),
    Classifier(classname="weka.classifiers.rules.OneR"),
Exemple #19
0
print("\nLoading dataset: " + fname + "\n")
loader = Loader(classname="weka.core.converters.ArffLoader")
data = loader.load_file(fname)
data.set_class_index(data.num_attributes() - 1)

for equal in ["", "-F"]:
    print("\nEqual frequency binning? " + str(equal == "-F") + "\n")
    for bins in [0, 40, 10, 5, 2]:
        if bins > 0:
            fltr = Filter(classname="weka.filters.unsupervised.attribute.Discretize", options=["-B", str(bins), equal])
            fltr.set_inputformat(data)
            filtered = fltr.filter(data)
        else:
            filtered = data
        cls = Classifier(classname="weka.classifiers.trees.J48")
        # cross-validate
        evl = Evaluation(filtered)
        evl.crossvalidate_model(cls, filtered, 10, Random(1))
        # build classifier on full dataset
        cls.build_classifier(filtered)
        # get size of tree from model strings
        lines = str(cls).split("\n")
        nodes = "N/A"
        for line in lines:
            if line.find("Size of the tree :") > -1:
                nodes = line.replace("Size of the tree :", "").strip()
        # output stats
        print("bins=%i accuracy=%0.1f nodes=%s" % (bins, evl.percent_correct(), nodes))

jvm.stop()
Exemple #20
0
from weka.classifiers import Classifier, Evaluation, CostMatrix, PredictionOutput

jvm.start()

datasets = [
    "ionosphere.arff",
    "credit-g.arff",
    "breast-cancer.arff",
    "diabetes.arff"
]
classifiers = [
    "weka.classifiers.functions.VotedPerceptron",
    "weka.classifiers.functions.SMO",
]

for dataset in datasets:
    # load dataset
    fname = data_dir + os.sep + dataset
    loader = Loader(classname="weka.core.converters.ArffLoader")
    data = loader.load_file(fname)
    data.set_class_index(data.num_attributes() - 1)

    for classifier in classifiers:
        # cross-validate classifier
        cls = Classifier(classname=classifier)
        evl = Evaluation(data)
        evl.crossvalidate_model(cls, data, 10, Random(1))
        print("%s / %s: %0.1f%%" % (dataset, classifier, evl.percent_correct()))

jvm.stop()
Exemple #21
0
import numpy
import weka.core.jvm as jvm
from weka.core.converters import Loader
from weka.core.classes import Random
from weka.classifiers import Classifier, Evaluation

jvm.start()

# load segment-challenge
loader = Loader(classname="weka.core.converters.ArffLoader")

fname = data_dir + os.sep + "segment-challenge.arff"
print("\nLoading dataset: " + fname + "\n")
train = loader.load_file(fname)
train.set_class_index(train.num_attributes() - 1)

# use seed 1-10 and perform random split with 90%
perc = []
for i in xrange(1, 11):
    evl = Evaluation(train)
    evl.evaluate_train_test_split(
        Classifier(classname="weka.classifiers.trees.J48"), train, 90.0, Random(i))
    perc.append(round(evl.percent_correct(), 1))
    print("Accuracy with seed %i: %0.1f%%" % (i, evl.percent_correct()))

# calculate mean and standard deviation
nperc = numpy.array(perc)
print("mean=%0.2f stdev=%0.2f" % (numpy.mean(nperc), numpy.std(nperc)))

jvm.stop()