Exemple #1
0
def main(args):
    """
    Loads a dataset, shuffles it, splits it into train/test set. Trains J48 with training set and
    evaluates the built model on the test set.
    The predictions get recorded in two different ways:
    1. in-memory via the test_model method
    2. directly to file (more memory efficient), but a separate run of making predictions

    :param args: the commandline arguments (optional, can be dataset filename)
    :type args: list
    """

    # load a dataset
    if len(args) <= 1:
        data_file = helper.get_data_dir() + os.sep + "vote.arff"
    else:
        data_file = args[1]
    helper.print_info("Loading dataset: " + data_file)
    loader = Loader(classname="weka.core.converters.ArffLoader")
    data = loader.load_file(data_file)
    data.class_is_last()

    # generate train/test split of randomized data
    train, test = data.train_test_split(66.0, Random(1))

    # build classifier
    cls = Classifier(classname="weka.classifiers.trees.J48")
    cls.build_classifier(train)
    print(cls)

    # evaluate and record predictions in memory
    helper.print_title("recording predictions in-memory")
    output = PredictionOutput(
        classname="weka.classifiers.evaluation.output.prediction.CSV",
        options=["-distribution"])
    evl = Evaluation(train)
    evl.test_model(cls, test, output=output)
    print(evl.summary())
    helper.print_info("Predictions:")
    print(output.buffer_content())

    # record/output predictions separately
    helper.print_title("recording/outputting predictions separately")
    outputfile = helper.get_tmp_dir() + "/j48_vote.csv"
    output = PredictionOutput(
        classname="weka.classifiers.evaluation.output.prediction.CSV",
        options=["-distribution", "-suppress", "-file", outputfile])
    output.header = test
    output.print_all(cls, test)
    helper.print_info("Predictions stored in:" + outputfile)
    # by using "-suppress" we don't store the output in memory, the following statement won't output anything
    print(output.buffer_content())
def Boost_J48(data, rnm):
    data.class_is_last()
    fc1 = FilteredClassifier()
    fc1.classifier = Classifier(classname="weka.classifiers.trees.J48", options=["-C", "0.25", "-M", "2"])
    fc1.filter = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "first"])
    fc2 = SingleClassifierEnhancer(classname="weka.classifiers.meta.AdaBoostM1", options=["-P", "100", "-S", "1", "-I", "10"])
    fc2.classifier = fc1
    pred_output = PredictionOutput(classname="weka.classifiers.evaluation.output.prediction.CSV", options=["-p", "1"])
    folds = 10
    fc2.build_classifier(data)
    evaluation = Evaluation(data)
    evaluation.crossvalidate_model(fc2, data, folds, Random(1), pred_output)
    f0 = open(rnm + '_Boost_J48_Tree.txt', 'w')
    print >> f0, "Filename: ", rnm
    print >> f0, '\n\n'
    print >> f0, str(fc2)
    f0.close()
    f1 = open(rnm + '_Boost_J48_Prediction.txt', 'w')
    print >> f1, 'Filename:', rnm
    print >> f1, 'Prediction Summary:', (pred_output.buffer_content())
    f1.close()
    f2 = open(rnm + '_Boost_j48_Evaluation.txt', 'w')
    print >> f2, 'Filename:', rnm
    print >> f2, 'Evaluation Summary:', (evaluation.summary())
    print >> f2, '\n\n\n'
    print >> f2, (evaluation.class_details())
    f2.close()
    plot_roc(evaluation, class_index=[0,1], title=rnm, key_loc='best', outfile=rnm + '_Boost_J48_ROC.png', wait=False)
    value_Boost_J48 = str(evaluation.percent_correct)
    return value_Boost_J48
def experiment_file_random(path_features, path_folder_save_results, options,
                           classifier, fold, random, name):
    print("start weka")
    cls = Classifier(classname=classifier,
                     options=weka.core.classes.split_options(options))
    d_results = {
        'percent_correct': [],
        'percent_incorrect': [],
        'confusion_matrix': []
    }
    data = converters.load_any_file(path_features)
    data.class_is_last()
    pout = PredictionOutput(
        classname="weka.classifiers.evaluation.output.prediction.CSV")
    evl = Evaluation(data)
    evl.crossvalidate_model(cls, data, fold, Random(random), pout)
    d_results['percent_correct'].append(evl.percent_correct)
    d_results['percent_incorrect'].append(evl.percent_incorrect)
    d_results['confusion_matrix'].append(
        evl.matrix())  # Generates the confusion matrix.

    d_results = pd.DataFrame(data=d_results)

    d_results.to_csv(path_folder_save_results + '/' + str(name) + '.csv',
                     index=False)

    save = pout.buffer_content()

    with open(
            path_folder_save_results + '/' + 'prediction/' + str(name) +
            '.csv', 'w') as f:
        f.write(save)
def RandomTree(data, rnm):
    data.class_is_last()
    fc = FilteredClassifier()
    fc.classifier = Classifier(classname="weka.classifiers.trees.RandomTree", options=["-K", "0", "-M", "1.0", "-V", "0.001", "-S", "1"])
    fc.filter = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "first"])
    pred_output = PredictionOutput(classname="weka.classifiers.evaluation.output.prediction.CSV", options=["-p", "1"])
    folds = 10
    evl = Evaluation(data)
    evl.crossvalidate_model(fc, data, folds, Random(1), pred_output)
    fc.build_classifier(data)
    f0 = open(rnm + '_RT_Tree.txt', 'w')
    print >> f0, "Filename: ", rnm
    print >> f0, '\n\n'
    print >> f0, str(fc)
    f0.close()
    f1 = open(rnm + '_RT_Prediction.txt', 'w')
    print >> f1, 'Filename:', rnm
    print >> f1, 'Prediction Summary:', (pred_output.buffer_content())
    f1.close()
    f2 = open(rnm + '_RT_Evaluation.txt', 'w')
    print >> f2, 'Filename:', rnm
    print >> f2, 'Evaluation Summary:', (evl.summary())
    print >> f2, '\n\n\n'
    print >> f2, (evl.class_details())
    f2.close()
    plot_roc(evl, class_index=[0,1], title=rnm, key_loc='best', outfile=rnm+'_RT_ROC.png', wait=False)
    value_RT = str(evl.percent_correct)
    return value_RT
def experiment_more_file(path_files, path_folder_save_results, fold, options,
                         classifier, random, name):
    cls = Classifier(classname=classifier,
                     options=weka.core.classes.split_options(options))

    file_list = os.listdir(path_files)

    for file in file_list:
        if ".csv" not in file:
            file_list.remove(file)

    d_results = {
        'name_file': [],
        'percent_correct': [],
        'percent_incorrect': [],
        'confusion_matrix': []
    }

    print(file_list)

    for file in file_list:
        print(str(file))
        data = converters.load_any_file(path_files + "/" + file)

        data.class_is_last()

        pout = PredictionOutput(
            classname="weka.classifiers.evaluation.output.prediction.CSV")

        evl = Evaluation(data)

        evl.crossvalidate_model(cls, data, fold, Random(random), pout)

        d_results['name_file'].append(str(file))
        d_results['percent_correct'].append(evl.percent_correct)
        d_results['percent_incorrect'].append(evl.percent_incorrect)
        d_results['confusion_matrix'].append(
            evl.matrix())  # Generates the confusion matrix.

        save = pout.buffer_content()

        with open(
                path_folder_save_results + '/' + 'prediction/' + str(name) +
                str(file)[:-4] + 'pred_data.csv', 'w') as f:
            f.write(save)

    d_results = pd.DataFrame(data=d_results)

    d_results.to_csv(path_folder_save_results + '/' + str(name) + ".csv",
                     index=False)
def index():
    if request.method == "GET":
        return render_template('bot.html')
    if request.method == "POST":
        # jvm.stop()
        jvm.start()
        f = open("instances.arff", "a")
        args = request.form.to_dict()
        weight_lb = float(args['weight']) * 2.20462
        bmi = (weight_lb / pow(float(args['height']), 2)) * 703
        hypertensive_status = args['hypertensive_status']
        heart_disease_status = args['heart_disease_status']
        if heart_disease_status == "Yes":
            heart_disease_status = '1'
        else:
            heart_disease_status = '0'
        if hypertensive_status == "Yes":
            hypertensive_status = '1'
        else:
            hypertensive_status = '0'

        st = "\n"+args['gender']+","+args['age']+","+hypertensive_status+","+heart_disease_status+","+args['marrital_status'] + \
            ","+args['work_type']+","+args['residence']+"," + \
            args['hypertension']+","+str(bmi)+",'"+args['smoking_status'].lower()+"',?"
        print(st)
        f.write(st)
        f.close()
        objects = serialization.read_all("J48.model")
        loader = Loader(classname="weka.core.converters.ArffLoader")
        csr = Classifier(jobject=objects[0])
        output_results = PredictionOutput(
            classname="weka.classifiers.evaluation.output.prediction.CSV")
        data1 = loader.load_file("instances.arff")
        data1.class_is_last()
        ev2 = Evaluation(data1)
        ev2.test_model(csr, data1, output_results)

        TESTDATA = StringIO("Instance,Actual,Predicted," +
                            output_results.buffer_content())
        df = pd.read_csv(TESTDATA)
        prediction = list(df.Predicted).pop().split(":")[1]
        print(prediction)
        # jvm.stop()
        response = {"status": "200", "prediction": prediction}
        return Response(json.dumps(response, indent=2),
                        mimetype="application/json")
Exemple #7
0
def SimpleLogistic():
    # load a dataset
    loader = Loader(classname="weka.core.converters.ArffLoader")
    data = loader.load_file("First_trial_classification.arff")
    data.class_is_last()  # set class attribute

    cls = Classifier(classname="weka.classifiers.functions.SimpleLogistic")
    pout = PredictionOutput(
        classname="weka.classifiers.evaluation.output.prediction.PlainText")
    evl = Evaluation(data)
    evl.crossvalidate_model(cls, data, 10, Random(486), pout)

    print(evl.summary())
    print(pout.buffer_content())

    # save model
    serialization.write_all("SimpleLogistic2.model", cls)
Exemple #8
0
def SMOreg():
    loader = Loader(classname="weka.core.converters.ArffLoader")
    data = loader.load_file("First_trial_regression.arff")
    data.class_is_last()

    cls = KernelClassifier(classname="weka.classifiers.functions.SMOreg",
                           options=["-N", "0"])
    kernel = Kernel(
        classname="weka.classifiers.functions.supportVector.RBFKernel",
        options=["-G", "0.2"])
    cls.kernel = kernel
    pout = PredictionOutput(
        classname="weka.classifiers.evaluation.output.prediction.PlainText")
    evl = Evaluation(data)
    evl.crossvalidate_model(cls, data, 10, Random(486), pout)

    print(evl.summary())
    print(pout.buffer_content())

    # save model
    serialization.write_all("SMOreg.model2", cls)
Exemple #9
0
def handle_message(message):
    global accel_x
    global accel_y
    global accel_z
    global gyro_x
    global gyro_y
    global gyro_z
    if message['sensorName'] == 'accelerometer':
        accel_x.append(float(message['x']))
        accel_y.append(float(message['y']))
        accel_z.append(float(message['z']))
    elif message['sensorName'] == 'gyroscope':
        gyro_x.append(float(message['x']))
        gyro_y.append(float(message['y']))
        gyro_z.append(float(message['z']))
    elif message['sensorName'] == "stop":
        # stop signal
        stop()
    if len(gyro_x) >= 25 and len(accel_x) >= 25:
        # only classify when both gyroscope and accelerometer data has more than 25 samples
        processDataToArff(accel_x, accel_y, accel_z, gyro_x, gyro_y, gyro_z)
        jvm.start()
        loader = Loader(classname="weka.core.converters.ArffLoader")
        # load the training data
        train = loader.load_file("train.arff")
        train.class_is_last()
        cls = Classifier(classname="weka.classifiers.trees.LMT")
        # train the classifier
        cls.build_classifier(train)
        pout = PredictionOutput(
            classname="weka.classifiers.evaluation.output.prediction.PlainText")
        evl = Evaluation(train)
        # load the classify data
        test = loader.load_file("classify.arff")
        test.class_is_last()
        evl.test_model(cls, test, pout)
        result = pout.buffer_content()
        resultLines = result.splitlines()
        for i in range(len(resultLines)):
            if (resultLines[i].find("upDown") != -1):
                result = 1
            elif (resultLines[i].find("leftRight") != -1):
                result = 2
            elif (resultLines[i].find("inOut") != -1):
                result = 3
            elif (resultLines[i].find("rotation") != -1):
                result = 4
            else:
                result = "error"
        if result == 1:
            stop()
            playD()
        elif result == 2:
            stop()
            playBm()
        elif result == 3:
            stop()
            playA()
        elif result == 4:
            stop()
            playG()
        # clear the arrays for new data
        gyro_x = []
        gyro_y = []
        gyro_z = []
        accel_x = []
        accel_y = []
        accel_z = []
Exemple #10
0
            print(classifier.to_commandline())
            classifier.build_classifier(dataA)
            print("\n--> classifier:\n")
            print(classifier)
            print("\n--> graph:\n")
            print(classifier.graph)

            outputfile = helper.get_tmp_dir() + "/result.csv"
            output = PredictionOutput(
                classname='weka.classifiers.evaluation.output.prediction.CSV',
                options=["-distribution", "-suppress", "-file", outputfile])
            print("\n--> Output:\n")
            output.header = dataA
            output.print_all(classifier, dataA)
            helper.print_info("Predictions stored in:" + outputfile)
            print(output.buffer_content())
            Eval = Evaluation(dataA)
            Eval.test_model(classifier, dataA, output=output)
            print(Eval.summary())
            ListEval = []
            Corr = []
            Corrf = []
            ListEval = Eval.summary().split('Mean absolute error')
            print("ListEval :")
            print(ListEval)
            Corr = ListEval[0].split('\n')
            Corrf = Corr[1].split('Correlation coefficient                  ')
            print("Corrf :")
            print(Corrf[1])

            ListEvalRAE = []
Exemple #11
0
def experiment_sequential_file(path_indices, path_features,
                               path_folder_save_results, options, classifier,
                               name, indicator_col, images):
    ind_f = load(path_indices)
    lst = ind_f.files

    for item in lst:
        ind = ind_f[item] + 1

    cls = Classifier(classname=classifier,
                     options=weka.core.classes.split_options(options))

    data = converters.load_any_file(path_features)

    ind = np.append(ind, len(data))

    data.class_is_last()

    pout = PredictionOutput(
        classname="weka.classifiers.evaluation.output.prediction.CSV")

    d_results = {
        'index': [],
        'percent_correct': [],
        'percent_incorrect': [],
        'precision': [],
        'recall': [],
        'f-score': [],
        'confusion_matrix': []
    }

    for j in range(len(ind) - 1):
        first = ind[j]

        if j == len(ind) - 2:
            last = ind[j + 1]
        else:
            last = ind[j + 1] - 1

        d_test = data.subset(row_range=str(first) + '-' + str(last))

        if j == 0:  # first
            d_train = data.subset(row_range=str(last + 1) + '-' +
                                  str(ind[-1]))  # last element
            print(str(last + 1) + '-' + str(ind[-1]))
        elif j == len(ind) - 2:  # last
            d_train = data.subset(row_range='1-' +
                                  str(first - 1))  # last element
            print('1-' + str(first - 1))
        else:  # central
            s = '1-' + str(first - 1) + ',' + str(last + 1) + '-' + str(
                ind[-1])
            print(s)
            d_train = data.subset(row_range=s)

        cls.build_classifier(d_train)

        evl = Evaluation(data)
        evl.test_model(cls, d_test, pout)

        # print(type(d_train))
        # print(type(d_test))

        d_results['index'].append(str(ind[j]))
        d_results['percent_correct'].append(evl.percent_correct)
        d_results['percent_incorrect'].append(evl.percent_incorrect)
        d_results['precision'].append(evl.precision(1))
        d_results['recall'].append(evl.recall(1))
        d_results['f-score'].append(evl.f_measure(1))
        d_results['confusion_matrix'].append(
            evl.matrix())  # Generates the confusion matrix.

    save = pout.buffer_content()

    check_folder_or_create(path_folder_save_results + '/' + 'prediction')

    with open(
            path_folder_save_results + '/' + 'prediction/' + name +
            'pred_data.csv', 'w') as f:
        f.write(save)

    buffer_save = pd.read_csv(path_folder_save_results + '/' + 'prediction/' +
                              name + 'pred_data.csv',
                              index_col=False,
                              header=None)

    col_label = buffer_save[1]
    col_prediction = buffer_save[2]
    col_different = buffer_save[3]

    create_prediction(col_label, col_prediction, col_different, indicator_col,
                      images, name, path_folder_save_results + '/prediction/')

    d_results = pd.DataFrame(data=d_results)

    d_results.to_csv(path_folder_save_results + '/' + name + 'results.csv',
                     index=False)
Exemple #12
0
def experiment_more_file(path_files,
                         path_folder_save_results,
                         fold,
                         options,
                         classifier,
                         random,
                         name,
                         voting=False):
    cls = Classifier(classname=classifier,
                     options=weka.core.classes.split_options(options))

    file_list = os.listdir(path_files)

    for file in file_list:
        if ".csv" not in file:
            file_list.remove(file)

    d_results = {
        'name_file': [],
        'percent_correct': [],
        'percent_incorrect': [],
        'precision': [],
        'recall': [],
        'f-score': [],
        'confusion_matrix': []
    }

    for file in file_list:
        indicator_table = pd.read_csv(path_files + '/indicator/' + file[0] +
                                      '_indicator.csv')
        indicator = list(indicator_table['indicator'])
        images = list(indicator_table['image'])

        data = converters.load_any_file(path_files + "/" + file)

        data.class_is_last()

        pout = PredictionOutput(
            classname="weka.classifiers.evaluation.output.prediction.CSV")

        evl = Evaluation(data)

        evl.crossvalidate_model(cls, data, fold, Random(random), pout)

        d_results['name_file'].append(str(file))
        d_results['percent_correct'].append(evl.percent_correct)
        d_results['percent_incorrect'].append(evl.percent_incorrect)
        d_results['precision'].append(evl.precision(1))
        d_results['recall'].append(evl.recall(1))
        d_results['f-score'].append(evl.f_measure(1))
        d_results['confusion_matrix'].append(
            evl.matrix())  # Generates the confusion matrix.

        save = pout.buffer_content()

        check_folder_or_create(path_folder_save_results + '/' + name + '/' +
                               'prediction')

        with open(
                path_folder_save_results + '/' + name + '/' +
                'prediction/pred_data.csv', 'w') as f:
            f.write(save)

        buffer_save = pd.read_csv(path_folder_save_results + '/' + name + '/' +
                                  'prediction/pred_data.csv',
                                  index_col=False)

        col_label = buffer_save['actual']
        col_prediction = buffer_save['predicted']
        col_different = buffer_save['error']

        create_prediction(
            col_label, col_prediction, col_different, indicator, images,
            file[:-4], path_folder_save_results + '/' + name + '/prediction/')

    d_results = pd.DataFrame(data=d_results)

    d_results.to_csv(path_folder_save_results + '/' + str(name) + ".csv")
def experiment_sequential_file(path_indices, path_features,
                               path_folder_save_results, options, classifier,
                               name):
    ind_f = load(path_indices)

    lst = ind_f.files

    for item in lst:
        ind = ind_f[item] + 1

    cls = Classifier(classname=classifier,
                     options=weka.core.classes.split_options(options))

    data = converters.load_any_file(path_features)

    ind = np.append(ind, len(data))

    data.class_is_last()

    pout = PredictionOutput(
        classname="weka.classifiers.evaluation.output.prediction.CSV")

    d_results = {
        'index': [],
        'percent_correct': [],
        'percent_incorrect': [],
        'confusion_matrix': []
    }

    for j in range(len(ind) - 1):
        print(j)

        print(str(ind[j]) + '-' + str(ind[j + 1]))

        d_test = data.subset(row_range=str(ind[j]) + '-' + str(ind[j + 1]))

        if j == 0:  # first
            d_train = data.subset(row_range=str(ind[j + 1] + 1) + '-' +
                                  str(ind[-1]))  # last element
        elif j == len(ind) - 2:  # last
            d_train = data.subset(row_range='1-' +
                                  str(ind[j] - 1))  # last element
        else:  # central
            s = '1-' + str(ind[j] - 1) + ',' + str(ind[j + 1] + 1) + '-' + str(
                ind[-1])
            d_train = data.subset(row_range=s)

        cls.build_classifier(d_train)

        evl = Evaluation(data)
        evl.test_model(cls, d_test, pout)

        save = pout.buffer_content()

        with open(
                path_folder_save_results + '/' + '/prediction/' + name +
                str(j) + 'pred_data.csv', 'w') as f:
            f.write(save)

        d_results['index'].append(str(ind[j]))
        d_results['percent_correct'].append(evl.percent_correct)
        d_results['percent_incorrect'].append(evl.percent_incorrect)
        d_results['confusion_matrix'].append(
            evl.matrix())  # Generates the confusion matrix.

    d_results = pd.DataFrame(data=d_results)

    d_results.to_csv(path_folder_save_results + '/' + name + 'results.csv',
                     index=False)
# In[4]:


f= open("instances.arff","r")
print(f.read())
f.close()


# In[10]:


from io import StringIO
output_results = PredictionOutput(classname="weka.classifiers.evaluation.output.prediction.CSV")
data1 = loader.load_file("instances.arff")
data1.class_is_last()
ev2 = Evaluation(data1)
ev2.test_model(csr,data1,output_results)
print("Class prediction: ",output_results.buffer_content()[-13:-10])
print("\n\n     Instance","     Actual","    Predicted")
print(output_results.buffer_content())
TESTDATA = StringIO("Instance,Actual,Predicted,"+output_results.buffer_content())
# jvm.stop()
x = pd.read_csv(TESTDATA)


# In[14]:


list(x.Predicted).pop().split(":")[1]

Exemple #15
0
train.class_is_last()
test = loader.load_file("test.arff")
test.class_is_last()
# print(train)

cls = Classifier(
    classname="weka.classifiers.trees.LMT")  #use LMT as our algorithm
cls.build_classifier(train)  #train the model using train.arff

pout = PredictionOutput(
    classname="weka.classifiers.evaluation.output.prediction.PlainText")
evl = Evaluation(train)
evl.test_model(cls, test, pout)

# print the result
result = pout.buffer_content()
#print(result)

# split the result and only print the gesture
resultLines = result.splitlines()
for i in range(len(resultLines)):
    if (resultLines[i].find("upDown") != -1):
        print("%d upDown" % (i + 1))
    elif (resultLines[i].find("leftRight") != -1):
        print("%d leftRight" % (i + 1))
    elif (resultLines[i].find("inOut") != -1):
        print("%d inOut" % (i + 1))
    elif (resultLines[i].find("rotation") != -1):
        print("%d rotation" % (i + 1))
    else:
        print("error")