def main(args): """ Loads a dataset, shuffles it, splits it into train/test set. Trains J48 with training set and evaluates the built model on the test set. The predictions get recorded in two different ways: 1. in-memory via the test_model method 2. directly to file (more memory efficient), but a separate run of making predictions :param args: the commandline arguments (optional, can be dataset filename) :type args: list """ # load a dataset if len(args) <= 1: data_file = helper.get_data_dir() + os.sep + "vote.arff" else: data_file = args[1] helper.print_info("Loading dataset: " + data_file) loader = Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(data_file) data.class_is_last() # generate train/test split of randomized data train, test = data.train_test_split(66.0, Random(1)) # build classifier cls = Classifier(classname="weka.classifiers.trees.J48") cls.build_classifier(train) print(cls) # evaluate and record predictions in memory helper.print_title("recording predictions in-memory") output = PredictionOutput( classname="weka.classifiers.evaluation.output.prediction.CSV", options=["-distribution"]) evl = Evaluation(train) evl.test_model(cls, test, output=output) print(evl.summary()) helper.print_info("Predictions:") print(output.buffer_content()) # record/output predictions separately helper.print_title("recording/outputting predictions separately") outputfile = helper.get_tmp_dir() + "/j48_vote.csv" output = PredictionOutput( classname="weka.classifiers.evaluation.output.prediction.CSV", options=["-distribution", "-suppress", "-file", outputfile]) output.header = test output.print_all(cls, test) helper.print_info("Predictions stored in:" + outputfile) # by using "-suppress" we don't store the output in memory, the following statement won't output anything print(output.buffer_content())
def Boost_J48(data, rnm): data.class_is_last() fc1 = FilteredClassifier() fc1.classifier = Classifier(classname="weka.classifiers.trees.J48", options=["-C", "0.25", "-M", "2"]) fc1.filter = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "first"]) fc2 = SingleClassifierEnhancer(classname="weka.classifiers.meta.AdaBoostM1", options=["-P", "100", "-S", "1", "-I", "10"]) fc2.classifier = fc1 pred_output = PredictionOutput(classname="weka.classifiers.evaluation.output.prediction.CSV", options=["-p", "1"]) folds = 10 fc2.build_classifier(data) evaluation = Evaluation(data) evaluation.crossvalidate_model(fc2, data, folds, Random(1), pred_output) f0 = open(rnm + '_Boost_J48_Tree.txt', 'w') print >> f0, "Filename: ", rnm print >> f0, '\n\n' print >> f0, str(fc2) f0.close() f1 = open(rnm + '_Boost_J48_Prediction.txt', 'w') print >> f1, 'Filename:', rnm print >> f1, 'Prediction Summary:', (pred_output.buffer_content()) f1.close() f2 = open(rnm + '_Boost_j48_Evaluation.txt', 'w') print >> f2, 'Filename:', rnm print >> f2, 'Evaluation Summary:', (evaluation.summary()) print >> f2, '\n\n\n' print >> f2, (evaluation.class_details()) f2.close() plot_roc(evaluation, class_index=[0,1], title=rnm, key_loc='best', outfile=rnm + '_Boost_J48_ROC.png', wait=False) value_Boost_J48 = str(evaluation.percent_correct) return value_Boost_J48
def experiment_file_random(path_features, path_folder_save_results, options, classifier, fold, random, name): print("start weka") cls = Classifier(classname=classifier, options=weka.core.classes.split_options(options)) d_results = { 'percent_correct': [], 'percent_incorrect': [], 'confusion_matrix': [] } data = converters.load_any_file(path_features) data.class_is_last() pout = PredictionOutput( classname="weka.classifiers.evaluation.output.prediction.CSV") evl = Evaluation(data) evl.crossvalidate_model(cls, data, fold, Random(random), pout) d_results['percent_correct'].append(evl.percent_correct) d_results['percent_incorrect'].append(evl.percent_incorrect) d_results['confusion_matrix'].append( evl.matrix()) # Generates the confusion matrix. d_results = pd.DataFrame(data=d_results) d_results.to_csv(path_folder_save_results + '/' + str(name) + '.csv', index=False) save = pout.buffer_content() with open( path_folder_save_results + '/' + 'prediction/' + str(name) + '.csv', 'w') as f: f.write(save)
def RandomTree(data, rnm): data.class_is_last() fc = FilteredClassifier() fc.classifier = Classifier(classname="weka.classifiers.trees.RandomTree", options=["-K", "0", "-M", "1.0", "-V", "0.001", "-S", "1"]) fc.filter = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "first"]) pred_output = PredictionOutput(classname="weka.classifiers.evaluation.output.prediction.CSV", options=["-p", "1"]) folds = 10 evl = Evaluation(data) evl.crossvalidate_model(fc, data, folds, Random(1), pred_output) fc.build_classifier(data) f0 = open(rnm + '_RT_Tree.txt', 'w') print >> f0, "Filename: ", rnm print >> f0, '\n\n' print >> f0, str(fc) f0.close() f1 = open(rnm + '_RT_Prediction.txt', 'w') print >> f1, 'Filename:', rnm print >> f1, 'Prediction Summary:', (pred_output.buffer_content()) f1.close() f2 = open(rnm + '_RT_Evaluation.txt', 'w') print >> f2, 'Filename:', rnm print >> f2, 'Evaluation Summary:', (evl.summary()) print >> f2, '\n\n\n' print >> f2, (evl.class_details()) f2.close() plot_roc(evl, class_index=[0,1], title=rnm, key_loc='best', outfile=rnm+'_RT_ROC.png', wait=False) value_RT = str(evl.percent_correct) return value_RT
def experiment_more_file(path_files, path_folder_save_results, fold, options, classifier, random, name): cls = Classifier(classname=classifier, options=weka.core.classes.split_options(options)) file_list = os.listdir(path_files) for file in file_list: if ".csv" not in file: file_list.remove(file) d_results = { 'name_file': [], 'percent_correct': [], 'percent_incorrect': [], 'confusion_matrix': [] } print(file_list) for file in file_list: print(str(file)) data = converters.load_any_file(path_files + "/" + file) data.class_is_last() pout = PredictionOutput( classname="weka.classifiers.evaluation.output.prediction.CSV") evl = Evaluation(data) evl.crossvalidate_model(cls, data, fold, Random(random), pout) d_results['name_file'].append(str(file)) d_results['percent_correct'].append(evl.percent_correct) d_results['percent_incorrect'].append(evl.percent_incorrect) d_results['confusion_matrix'].append( evl.matrix()) # Generates the confusion matrix. save = pout.buffer_content() with open( path_folder_save_results + '/' + 'prediction/' + str(name) + str(file)[:-4] + 'pred_data.csv', 'w') as f: f.write(save) d_results = pd.DataFrame(data=d_results) d_results.to_csv(path_folder_save_results + '/' + str(name) + ".csv", index=False)
def index(): if request.method == "GET": return render_template('bot.html') if request.method == "POST": # jvm.stop() jvm.start() f = open("instances.arff", "a") args = request.form.to_dict() weight_lb = float(args['weight']) * 2.20462 bmi = (weight_lb / pow(float(args['height']), 2)) * 703 hypertensive_status = args['hypertensive_status'] heart_disease_status = args['heart_disease_status'] if heart_disease_status == "Yes": heart_disease_status = '1' else: heart_disease_status = '0' if hypertensive_status == "Yes": hypertensive_status = '1' else: hypertensive_status = '0' st = "\n"+args['gender']+","+args['age']+","+hypertensive_status+","+heart_disease_status+","+args['marrital_status'] + \ ","+args['work_type']+","+args['residence']+"," + \ args['hypertension']+","+str(bmi)+",'"+args['smoking_status'].lower()+"',?" print(st) f.write(st) f.close() objects = serialization.read_all("J48.model") loader = Loader(classname="weka.core.converters.ArffLoader") csr = Classifier(jobject=objects[0]) output_results = PredictionOutput( classname="weka.classifiers.evaluation.output.prediction.CSV") data1 = loader.load_file("instances.arff") data1.class_is_last() ev2 = Evaluation(data1) ev2.test_model(csr, data1, output_results) TESTDATA = StringIO("Instance,Actual,Predicted," + output_results.buffer_content()) df = pd.read_csv(TESTDATA) prediction = list(df.Predicted).pop().split(":")[1] print(prediction) # jvm.stop() response = {"status": "200", "prediction": prediction} return Response(json.dumps(response, indent=2), mimetype="application/json")
def SimpleLogistic(): # load a dataset loader = Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file("First_trial_classification.arff") data.class_is_last() # set class attribute cls = Classifier(classname="weka.classifiers.functions.SimpleLogistic") pout = PredictionOutput( classname="weka.classifiers.evaluation.output.prediction.PlainText") evl = Evaluation(data) evl.crossvalidate_model(cls, data, 10, Random(486), pout) print(evl.summary()) print(pout.buffer_content()) # save model serialization.write_all("SimpleLogistic2.model", cls)
def SMOreg(): loader = Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file("First_trial_regression.arff") data.class_is_last() cls = KernelClassifier(classname="weka.classifiers.functions.SMOreg", options=["-N", "0"]) kernel = Kernel( classname="weka.classifiers.functions.supportVector.RBFKernel", options=["-G", "0.2"]) cls.kernel = kernel pout = PredictionOutput( classname="weka.classifiers.evaluation.output.prediction.PlainText") evl = Evaluation(data) evl.crossvalidate_model(cls, data, 10, Random(486), pout) print(evl.summary()) print(pout.buffer_content()) # save model serialization.write_all("SMOreg.model2", cls)
def handle_message(message): global accel_x global accel_y global accel_z global gyro_x global gyro_y global gyro_z if message['sensorName'] == 'accelerometer': accel_x.append(float(message['x'])) accel_y.append(float(message['y'])) accel_z.append(float(message['z'])) elif message['sensorName'] == 'gyroscope': gyro_x.append(float(message['x'])) gyro_y.append(float(message['y'])) gyro_z.append(float(message['z'])) elif message['sensorName'] == "stop": # stop signal stop() if len(gyro_x) >= 25 and len(accel_x) >= 25: # only classify when both gyroscope and accelerometer data has more than 25 samples processDataToArff(accel_x, accel_y, accel_z, gyro_x, gyro_y, gyro_z) jvm.start() loader = Loader(classname="weka.core.converters.ArffLoader") # load the training data train = loader.load_file("train.arff") train.class_is_last() cls = Classifier(classname="weka.classifiers.trees.LMT") # train the classifier cls.build_classifier(train) pout = PredictionOutput( classname="weka.classifiers.evaluation.output.prediction.PlainText") evl = Evaluation(train) # load the classify data test = loader.load_file("classify.arff") test.class_is_last() evl.test_model(cls, test, pout) result = pout.buffer_content() resultLines = result.splitlines() for i in range(len(resultLines)): if (resultLines[i].find("upDown") != -1): result = 1 elif (resultLines[i].find("leftRight") != -1): result = 2 elif (resultLines[i].find("inOut") != -1): result = 3 elif (resultLines[i].find("rotation") != -1): result = 4 else: result = "error" if result == 1: stop() playD() elif result == 2: stop() playBm() elif result == 3: stop() playA() elif result == 4: stop() playG() # clear the arrays for new data gyro_x = [] gyro_y = [] gyro_z = [] accel_x = [] accel_y = [] accel_z = []
print(classifier.to_commandline()) classifier.build_classifier(dataA) print("\n--> classifier:\n") print(classifier) print("\n--> graph:\n") print(classifier.graph) outputfile = helper.get_tmp_dir() + "/result.csv" output = PredictionOutput( classname='weka.classifiers.evaluation.output.prediction.CSV', options=["-distribution", "-suppress", "-file", outputfile]) print("\n--> Output:\n") output.header = dataA output.print_all(classifier, dataA) helper.print_info("Predictions stored in:" + outputfile) print(output.buffer_content()) Eval = Evaluation(dataA) Eval.test_model(classifier, dataA, output=output) print(Eval.summary()) ListEval = [] Corr = [] Corrf = [] ListEval = Eval.summary().split('Mean absolute error') print("ListEval :") print(ListEval) Corr = ListEval[0].split('\n') Corrf = Corr[1].split('Correlation coefficient ') print("Corrf :") print(Corrf[1]) ListEvalRAE = []
def experiment_sequential_file(path_indices, path_features, path_folder_save_results, options, classifier, name, indicator_col, images): ind_f = load(path_indices) lst = ind_f.files for item in lst: ind = ind_f[item] + 1 cls = Classifier(classname=classifier, options=weka.core.classes.split_options(options)) data = converters.load_any_file(path_features) ind = np.append(ind, len(data)) data.class_is_last() pout = PredictionOutput( classname="weka.classifiers.evaluation.output.prediction.CSV") d_results = { 'index': [], 'percent_correct': [], 'percent_incorrect': [], 'precision': [], 'recall': [], 'f-score': [], 'confusion_matrix': [] } for j in range(len(ind) - 1): first = ind[j] if j == len(ind) - 2: last = ind[j + 1] else: last = ind[j + 1] - 1 d_test = data.subset(row_range=str(first) + '-' + str(last)) if j == 0: # first d_train = data.subset(row_range=str(last + 1) + '-' + str(ind[-1])) # last element print(str(last + 1) + '-' + str(ind[-1])) elif j == len(ind) - 2: # last d_train = data.subset(row_range='1-' + str(first - 1)) # last element print('1-' + str(first - 1)) else: # central s = '1-' + str(first - 1) + ',' + str(last + 1) + '-' + str( ind[-1]) print(s) d_train = data.subset(row_range=s) cls.build_classifier(d_train) evl = Evaluation(data) evl.test_model(cls, d_test, pout) # print(type(d_train)) # print(type(d_test)) d_results['index'].append(str(ind[j])) d_results['percent_correct'].append(evl.percent_correct) d_results['percent_incorrect'].append(evl.percent_incorrect) d_results['precision'].append(evl.precision(1)) d_results['recall'].append(evl.recall(1)) d_results['f-score'].append(evl.f_measure(1)) d_results['confusion_matrix'].append( evl.matrix()) # Generates the confusion matrix. save = pout.buffer_content() check_folder_or_create(path_folder_save_results + '/' + 'prediction') with open( path_folder_save_results + '/' + 'prediction/' + name + 'pred_data.csv', 'w') as f: f.write(save) buffer_save = pd.read_csv(path_folder_save_results + '/' + 'prediction/' + name + 'pred_data.csv', index_col=False, header=None) col_label = buffer_save[1] col_prediction = buffer_save[2] col_different = buffer_save[3] create_prediction(col_label, col_prediction, col_different, indicator_col, images, name, path_folder_save_results + '/prediction/') d_results = pd.DataFrame(data=d_results) d_results.to_csv(path_folder_save_results + '/' + name + 'results.csv', index=False)
def experiment_more_file(path_files, path_folder_save_results, fold, options, classifier, random, name, voting=False): cls = Classifier(classname=classifier, options=weka.core.classes.split_options(options)) file_list = os.listdir(path_files) for file in file_list: if ".csv" not in file: file_list.remove(file) d_results = { 'name_file': [], 'percent_correct': [], 'percent_incorrect': [], 'precision': [], 'recall': [], 'f-score': [], 'confusion_matrix': [] } for file in file_list: indicator_table = pd.read_csv(path_files + '/indicator/' + file[0] + '_indicator.csv') indicator = list(indicator_table['indicator']) images = list(indicator_table['image']) data = converters.load_any_file(path_files + "/" + file) data.class_is_last() pout = PredictionOutput( classname="weka.classifiers.evaluation.output.prediction.CSV") evl = Evaluation(data) evl.crossvalidate_model(cls, data, fold, Random(random), pout) d_results['name_file'].append(str(file)) d_results['percent_correct'].append(evl.percent_correct) d_results['percent_incorrect'].append(evl.percent_incorrect) d_results['precision'].append(evl.precision(1)) d_results['recall'].append(evl.recall(1)) d_results['f-score'].append(evl.f_measure(1)) d_results['confusion_matrix'].append( evl.matrix()) # Generates the confusion matrix. save = pout.buffer_content() check_folder_or_create(path_folder_save_results + '/' + name + '/' + 'prediction') with open( path_folder_save_results + '/' + name + '/' + 'prediction/pred_data.csv', 'w') as f: f.write(save) buffer_save = pd.read_csv(path_folder_save_results + '/' + name + '/' + 'prediction/pred_data.csv', index_col=False) col_label = buffer_save['actual'] col_prediction = buffer_save['predicted'] col_different = buffer_save['error'] create_prediction( col_label, col_prediction, col_different, indicator, images, file[:-4], path_folder_save_results + '/' + name + '/prediction/') d_results = pd.DataFrame(data=d_results) d_results.to_csv(path_folder_save_results + '/' + str(name) + ".csv")
def experiment_sequential_file(path_indices, path_features, path_folder_save_results, options, classifier, name): ind_f = load(path_indices) lst = ind_f.files for item in lst: ind = ind_f[item] + 1 cls = Classifier(classname=classifier, options=weka.core.classes.split_options(options)) data = converters.load_any_file(path_features) ind = np.append(ind, len(data)) data.class_is_last() pout = PredictionOutput( classname="weka.classifiers.evaluation.output.prediction.CSV") d_results = { 'index': [], 'percent_correct': [], 'percent_incorrect': [], 'confusion_matrix': [] } for j in range(len(ind) - 1): print(j) print(str(ind[j]) + '-' + str(ind[j + 1])) d_test = data.subset(row_range=str(ind[j]) + '-' + str(ind[j + 1])) if j == 0: # first d_train = data.subset(row_range=str(ind[j + 1] + 1) + '-' + str(ind[-1])) # last element elif j == len(ind) - 2: # last d_train = data.subset(row_range='1-' + str(ind[j] - 1)) # last element else: # central s = '1-' + str(ind[j] - 1) + ',' + str(ind[j + 1] + 1) + '-' + str( ind[-1]) d_train = data.subset(row_range=s) cls.build_classifier(d_train) evl = Evaluation(data) evl.test_model(cls, d_test, pout) save = pout.buffer_content() with open( path_folder_save_results + '/' + '/prediction/' + name + str(j) + 'pred_data.csv', 'w') as f: f.write(save) d_results['index'].append(str(ind[j])) d_results['percent_correct'].append(evl.percent_correct) d_results['percent_incorrect'].append(evl.percent_incorrect) d_results['confusion_matrix'].append( evl.matrix()) # Generates the confusion matrix. d_results = pd.DataFrame(data=d_results) d_results.to_csv(path_folder_save_results + '/' + name + 'results.csv', index=False)
# In[4]: f= open("instances.arff","r") print(f.read()) f.close() # In[10]: from io import StringIO output_results = PredictionOutput(classname="weka.classifiers.evaluation.output.prediction.CSV") data1 = loader.load_file("instances.arff") data1.class_is_last() ev2 = Evaluation(data1) ev2.test_model(csr,data1,output_results) print("Class prediction: ",output_results.buffer_content()[-13:-10]) print("\n\n Instance"," Actual"," Predicted") print(output_results.buffer_content()) TESTDATA = StringIO("Instance,Actual,Predicted,"+output_results.buffer_content()) # jvm.stop() x = pd.read_csv(TESTDATA) # In[14]: list(x.Predicted).pop().split(":")[1]
train.class_is_last() test = loader.load_file("test.arff") test.class_is_last() # print(train) cls = Classifier( classname="weka.classifiers.trees.LMT") #use LMT as our algorithm cls.build_classifier(train) #train the model using train.arff pout = PredictionOutput( classname="weka.classifiers.evaluation.output.prediction.PlainText") evl = Evaluation(train) evl.test_model(cls, test, pout) # print the result result = pout.buffer_content() #print(result) # split the result and only print the gesture resultLines = result.splitlines() for i in range(len(resultLines)): if (resultLines[i].find("upDown") != -1): print("%d upDown" % (i + 1)) elif (resultLines[i].find("leftRight") != -1): print("%d leftRight" % (i + 1)) elif (resultLines[i].find("inOut") != -1): print("%d inOut" % (i + 1)) elif (resultLines[i].find("rotation") != -1): print("%d rotation" % (i + 1)) else: print("error")