def testNB(training_data, testing_data): train_data = Instances.copy_instances(training_data) test_data = Instances.copy_instances(testing_data) evaluation = Evaluation(train_data) classifier = Classifier(classname="weka.classifiers.bayes.NaiveBayes") classifier.build_classifier( train_data) # build classifier on the training data evaluation.test_model(classifier, test_data) # test and evaluate model on the test set print("") print("") print( evaluation.summary( "--------------Naive Bayes Evaluation--------------")) print("Accuracy: " + str(evaluation.percent_correct)) print("") print("Label\tPrecision\t\tRecall\t\t\tF-Measure") print("<=50K\t" + str(evaluation.precision(0)) + "\t" + str(evaluation.recall(0)) + "\t" + str(evaluation.f_measure(0))) print(">50K\t" + str(evaluation.precision(1)) + "\t" + str(evaluation.recall(1)) + "\t" + str(evaluation.f_measure(1))) print("Mean\t" + str(((evaluation.precision(1)) + (evaluation.precision(0))) / 2) + "\t" + str(((evaluation.recall(1)) + (evaluation.recall(0))) / 2) + "\t" + str(((evaluation.f_measure(1)) + (evaluation.f_measure(0))) / 2))
def readCross(num,type,select_feature,numtrees): filename=resultFile+'_'+type+'_'+num+'_'+select_feature+'_all.csv' loader=CSVLoader() loader.setSource(File(filename)) data=loader.getDataSet() #print data.numAttributes() data.setClassIndex(data.numAttributes()-1) rf=RF() rf.setNumTrees(numtrees) #pred_output = PredictionOutput( classname="weka.classifiers.evaluation.output.prediction.PlainText", options=["-distribution"]) buffer = StringBuffer() # buffer for the predictions output=PlainText() output.setHeader(data) output.setBuffer(buffer) output.setOutputDistribution(True) attRange = Range() # attributes to output outputDistributions = Boolean(True) evaluator=Evaluation(data) evaluator.crossValidateModel(rf,data,10, Random(1),[output,attRange,outputDistributions]) print evaluator.toSummaryString() print evaluator.toClassDetailsString() print evaluator.toMatrixString() return [evaluator.precision(1),evaluator.recall(1),evaluator.fMeasure(1),evaluator.matthewsCorrelationCoefficient(1),evaluator.numTruePositives(1),evaluator.numFalsePositives(1),evaluator.numTrueNegatives(1),evaluator.numFalseNegatives(1),evaluator.areaUnderROC(1)]
def naiveBayes(data): classifier = Classifier(classname="weka.classifiers.bayes.NaiveBayes", options=["-D"]) nfolds=13 rnd = Random(0) evaluation = Evaluation(data) evaluation.crossvalidate_model(classifier, data, nfolds, rnd) print(" Naive Bayes Cross-validation information") print(evaluation.summary()) print("precision: " + str(evaluation.precision(1))) print("recall: " + str(evaluation.recall(1))) print("F-measure: " + str(evaluation.f_measure(1))) print("==confusion matrix==") print(" a b") print(evaluation.confusion_matrix) print #write to file f = open("naiveeval.txt", "w") f.write(evaluation.summary()) f.write("\n") f.write("==confusion matrix==\n") f.write(" a b\n") for item in evaluation.confusion_matrix: f.write("%s\n" % item) f.close() #plot roc graph plcls.plot_roc(evaluation, title="Naive Bayes ROC", outfile="NBROC", wait=True) return evaluation.percent_correct
def readFeature(num_features,type,select_feature,numtrees): #filename1=resultFileTest #filename2=resultFileTest2 filename1=resultFile+'_'+type+'_'+num_features+'_'+select_feature+'_train.csv' filename2=resultFile+'_'+type+'_'+num_features+'_'+select_feature+'_test.csv' #print filename1 loader=CSVLoader() loader.setSource(File(filename1)) data=loader.getDataSet() #print data.numAttributes() data.setClassIndex(data.numAttributes()-1) rf=RF() rf.setNumTrees(numtrees) rf.buildClassifier(data) #print rf loader.setSource(File(filename2)) test_data=Instances(loader.getDataSet()) test_data.setClassIndex(test_data.numAttributes()-1) ''' num=test_data.numInstances() print num for i in xrange(num): r1=rf.distributionForInstance(test_data.instance(i)) r2=rf.classifyInstance(test_data.instance(i)) ptrixrint r1 print r2''' buffer = StringBuffer() # buffer for the predictions output=PlainText() output.setHeader(test_data) output.setBuffer(buffer) attRange = Range() # attributes to output outputDistribution = Boolean(True) evaluator=Evaluation(data) evaluator.evaluateModel(rf,test_data,[output,attRange,outputDistribution]) #print evaluator.evaluateModel(RF(),['-t',filename1,'-T',filename2,'-I',str(numtrees)]) #evaluator1=Evaluation(test_data) print evaluator.toSummaryString() print evaluator.toClassDetailsString() print evaluator.toMatrixString() return [evaluator.precision(1),evaluator.recall(1),evaluator.fMeasure(1),evaluator.matthewsCorrelationCoefficient(1),evaluator.numTruePositives(1),evaluator.numFalsePositives(1),evaluator.numTrueNegatives(1),evaluator.numFalseNegatives(1),evaluator.areaUnderROC(1)]
def ClassifyWithDT(f3, test, tree , fileOut) : eval= Evaluation(f3) tree.build_classifier(f3) eval.test_model(tree, test) fileOut.write("\n\nSelf-Training data========"+str((1-eval.error_rate)*100)+" number of instances=="+str(f3.num_instances)+"\n") fileOut.write("\n Error Rate=="+str(eval.error_rate) + "\n") fileOut.write("\n precision recall areaUnderROC \n\n"); for i in range(test.get_instance(0).num_classes) : fileOut.write(str(eval.precision(i)) +" "+str(eval.recall(i)) + " " + str(eval.area_under_roc(i))+"\n") return eval
def experiment_file_random(path_features, path_folder_save_results, options, classifier, fold, random, name): print(name + " Start: " + str(datetime.datetime.now())) time = datetime.datetime.now() cls = Classifier(classname=classifier, options=weka.core.classes.split_options(options)) d_results = { 'percent_correct': [], 'percent_incorrect': [], 'precision': [], 'recall': [], 'f-score': [], 'confusion_matrix': [] } data = converters.load_any_file(path_features) data.class_is_last() pout = PredictionOutput( classname="weka.classifiers.evaluation.output.prediction.CSV") evl = Evaluation(data) evl.crossvalidate_model(cls, data, fold, Random(random), pout) d_results['percent_correct'].append(evl.percent_correct) d_results['percent_incorrect'].append(evl.percent_incorrect) d_results['precision'].append(evl.precision(1)) d_results['recall'].append(evl.recall(1)) d_results['f-score'].append(evl.f_measure(1)) d_results['confusion_matrix'].append( evl.matrix()) # Generates the confusion matrix. d_results = pd.DataFrame(data=d_results) d_results.to_csv(path_folder_save_results + '/' + str(name) + '.csv', index=False) save = pout.buffer_content() check_folder_or_create(path_folder_save_results + '/' + 'prediction') with open( path_folder_save_results + '/' + 'prediction/' + str(name) + '.csv', 'w') as f: f.write(save) print(name + " End: " + str(datetime.datetime.now() - time))
def do_temporal_cv(t_selector, instances, num_folds): num_instances = instances.numInstances() results = [] # Split folds for f in xrange(2, num_folds+1): print "fold:%d"%f for pair in split_temporal_train_test(f, num_instances): # train_start = pair.train_start # train_end = pair.train_end train_set = Instances(instances, int(pair.train_start), int(pair.train_end - pair.train_start+1)) test_set = Instances(instances, int(pair.test_start), int(pair.test_end - pair.test_start +1)) t_selector.buildClassifier(train_set) e = Evaluation(train_set) e.evaluateModel(t_selector, test_set) if e.recall(0) > 0 and e.precision(0) > 0: results.append(Result(instances.numAttributes(), e)) # print "precision: %.2f"%evalTest.precision(0) # print "recall: %.2f"%evalTest.recall(0) # print evalTest.toSummaryString() # System.out.println(strSummary); sum_precision = 0 sum_recall = 0 for r in results: # print "precision:" # print r.precision # print "recall:" # print r.recall sum_precision += r.precision sum_recall +=r.recall precision = sum_precision*1.0/len(results) recall = sum_recall*1.0/len(results) avg_fmeasure = harmonic_mean([precision, recall]) print "f_measure:%.2f"%avg_fmeasure
def experiment_more_file(path_files, path_folder_save_results, fold, options, classifier, random, name, voting=False): cls = Classifier(classname=classifier, options=weka.core.classes.split_options(options)) file_list = os.listdir(path_files) for file in file_list: if ".csv" not in file: file_list.remove(file) d_results = { 'name_file': [], 'percent_correct': [], 'percent_incorrect': [], 'precision': [], 'recall': [], 'f-score': [], 'confusion_matrix': [] } for file in file_list: indicator_table = pd.read_csv(path_files + '/indicator/' + file[0] + '_indicator.csv') indicator = list(indicator_table['indicator']) images = list(indicator_table['image']) data = converters.load_any_file(path_files + "/" + file) data.class_is_last() pout = PredictionOutput( classname="weka.classifiers.evaluation.output.prediction.CSV") evl = Evaluation(data) evl.crossvalidate_model(cls, data, fold, Random(random), pout) d_results['name_file'].append(str(file)) d_results['percent_correct'].append(evl.percent_correct) d_results['percent_incorrect'].append(evl.percent_incorrect) d_results['precision'].append(evl.precision(1)) d_results['recall'].append(evl.recall(1)) d_results['f-score'].append(evl.f_measure(1)) d_results['confusion_matrix'].append( evl.matrix()) # Generates the confusion matrix. save = pout.buffer_content() check_folder_or_create(path_folder_save_results + '/' + name + '/' + 'prediction') with open( path_folder_save_results + '/' + name + '/' + 'prediction/pred_data.csv', 'w') as f: f.write(save) buffer_save = pd.read_csv(path_folder_save_results + '/' + name + '/' + 'prediction/pred_data.csv', index_col=False) col_label = buffer_save['actual'] col_prediction = buffer_save['predicted'] col_different = buffer_save['error'] create_prediction( col_label, col_prediction, col_different, indicator, images, file[:-4], path_folder_save_results + '/' + name + '/prediction/') d_results = pd.DataFrame(data=d_results) d_results.to_csv(path_folder_save_results + '/' + str(name) + ".csv")
classname= "weka.classifiers.misc.InputMappedClassifier", options=[ "-M", "-W", "weka.classifiers.bayes.NaiveBayes" ]) Class = 'NaiveBayes' mapper.build_classifier(dataTrain) evaluation = Evaluation(dataTrain) evaluation.test_model(mapper, dataTest) NB_AUC[seed - 1, fold - 1, 0] = (evaluation.area_under_roc(1) * 100) NB_Recall[seed - 1, fold - 1, 0] = (evaluation.recall(yIndex) * 100) NB_Precision[seed - 1, fold - 1, 0] = ( evaluation.precision(yIndex) * 100) mapper.build_classifier(dataTrainFS) evaluation = Evaluation(dataTrainFS) evaluation.test_model(mapper, dataTestFS) NB_AUC[seed - 1, fold - 1, 1] = (evaluation.area_under_roc(1) * 100) NB_Recall[seed - 1, fold - 1, 1] = (evaluation.recall(yIndex) * 100) NB_Precision[seed - 1, fold - 1, 1] = (
dataLastTrain.class_is_last() dataLastTest.class_is_last() from weka.classifiers import Evaluation from weka.core.classes import Random from weka.classifiers import Classifier if classifier == 0: for kernel in range(0,2): if kernel == 0: mapper = Classifier(classname="weka.classifiers.misc.InputMappedClassifier", options=["-M","-W", "weka.classifiers.bayes.NaiveBayes"]) Class = 'NaiveBayes' mapper.build_classifier(dataTrain) evaluation = Evaluation(dataTrain) evaluation.test_model(mapper,dataTest) Scores.write(str(evaluation.area_under_roc(1)*100) + ',') recall_NB.append(evaluation.recall(1)*100) precision_NB.append(evaluation.precision(1)*100) mapper.build_classifier(dataLastTrain) evaluation = Evaluation(dataLastTrain) evaluation.test_model(mapper, dataLastTest) ScoresLast.write(str(evaluation.area_under_roc(1) * 100)+',') else: mapper = Classifier(classname="weka.classifiers.misc.InputMappedClassifier", options=["-M","-W", "weka.classifiers.bayes.NaiveBayes", "--", "-K"]) Class = 'NaiveBayes' mapper.build_classifier(dataTrain)
pred_output = PredictionOutput(classname="weka.classifiers.evaluation.output.prediction.PlainText", options=["-distribution"]) evaluation = Evaluation(data) evaluation.crossvalidate_model(classifier, data, 10, Random(42), output=pred_output) plot_cls.plot_roc(evaluation, title="ROC bugs",class_index=range(0, data.class_attribute.num_values), wait=False) plot_cls.plot_prc(evaluation, title="PRC bugs - NaiveBayes",class_index=range(0, data.class_attribute.num_values), wait=False) """Performance Metrics - Naive Bayes Classifier""" print(evaluation.summary()) print(evaluation.class_details()) print(evaluation.matrix()) print("confusionMatrix: " + str(evaluation.confusion_matrix)) print("fMeasure: " + str(evaluation.f_measure(1))) print("precision: " + str(evaluation.precision(1))) print("recall: " + str(evaluation.recall(1))) """Random Forest Classifier""" classifier2 = Classifier(classname="weka.classifiers.trees.RandomForest") evaluation2 = Evaluation(data) evaluation2.crossvalidate_model(classifier2, data, 10, Random(42)) plot_cls.plot_roc(evaluation2, title="ROC bugs",class_index=range(0, data.class_attribute.num_values), wait=False) plot_cls.plot_prc(evaluation2, title="PRC bugs - RandomForest",class_index=range(0, data.class_attribute.num_values), wait=False) """Performance Evaluation Metrics - Random Forest""" print(evaluation2.summary()) print(evaluation2.class_details()) print(evaluation2.matrix())
if kernel == 0: mapper = Classifier( classname= "weka.classifiers.misc.InputMappedClassifier", options=[ '-M', "-W", "weka.classifiers.bayes.NaiveBayes" ]) Class = 'NaiveBayes' mapper.build_classifier(dataTrain) evaluation = Evaluation(dataTrain) evaluation.test_model(mapper, dataTest) roc_aux_NB.append( evaluation.area_under_roc(1) * 100) recall_aux_NB.append( evaluation.recall(1) * 100) precision_aux_NB.append( evaluation.precision(1) * 100) elif classifier == 1: for degree in range(3, 4): mapper = Classifier( classname= "weka.classifiers.misc.InputMappedClassifier", options=[ '-M', "-W", "weka.classifiers.functions.SMO", "--", "-K", "weka.classifiers.functions.supportVector.PolyKernel -E " + str(degree) ])
classname= "weka.classifiers.misc.InputMappedClassifier", options=[ "-M", "-W", "weka.classifiers.bayes.NaiveBayes" ]) Class = 'NaiveBayes' mapper.build_classifier(dataTrainSlow) evaluation = Evaluation(dataTrainSlow) evaluation.test_model(mapper, dataTestSlow) NB_AUC[seed - 1, fold - 1, 0] = (evaluation.area_under_roc(1) * 100) NB_Recall[seed - 1, fold - 1, 0] = (evaluation.recall(yIndexSlow) * 100) NB_Precision[seed - 1, fold - 1, 0] = ( evaluation.precision(yIndexSlow) * 100) if window == 365: mapper = Classifier( classname= "weka.classifiers.misc.InputMappedClassifier", options=[ "-M", "-W", "weka.classifiers.bayes.NaiveBayes", '--', '-K' ]) else: mapper = Classifier(
def experiment_sequential_file(path_indices, path_features, path_folder_save_results, options, classifier, name, indicator_col, images): ind_f = load(path_indices) lst = ind_f.files for item in lst: ind = ind_f[item] + 1 cls = Classifier(classname=classifier, options=weka.core.classes.split_options(options)) data = converters.load_any_file(path_features) ind = np.append(ind, len(data)) data.class_is_last() pout = PredictionOutput( classname="weka.classifiers.evaluation.output.prediction.CSV") d_results = { 'index': [], 'percent_correct': [], 'percent_incorrect': [], 'precision': [], 'recall': [], 'f-score': [], 'confusion_matrix': [] } for j in range(len(ind) - 1): first = ind[j] if j == len(ind) - 2: last = ind[j + 1] else: last = ind[j + 1] - 1 d_test = data.subset(row_range=str(first) + '-' + str(last)) if j == 0: # first d_train = data.subset(row_range=str(last + 1) + '-' + str(ind[-1])) # last element print(str(last + 1) + '-' + str(ind[-1])) elif j == len(ind) - 2: # last d_train = data.subset(row_range='1-' + str(first - 1)) # last element print('1-' + str(first - 1)) else: # central s = '1-' + str(first - 1) + ',' + str(last + 1) + '-' + str( ind[-1]) print(s) d_train = data.subset(row_range=s) cls.build_classifier(d_train) evl = Evaluation(data) evl.test_model(cls, d_test, pout) # print(type(d_train)) # print(type(d_test)) d_results['index'].append(str(ind[j])) d_results['percent_correct'].append(evl.percent_correct) d_results['percent_incorrect'].append(evl.percent_incorrect) d_results['precision'].append(evl.precision(1)) d_results['recall'].append(evl.recall(1)) d_results['f-score'].append(evl.f_measure(1)) d_results['confusion_matrix'].append( evl.matrix()) # Generates the confusion matrix. save = pout.buffer_content() check_folder_or_create(path_folder_save_results + '/' + 'prediction') with open( path_folder_save_results + '/' + 'prediction/' + name + 'pred_data.csv', 'w') as f: f.write(save) buffer_save = pd.read_csv(path_folder_save_results + '/' + 'prediction/' + name + 'pred_data.csv', index_col=False, header=None) col_label = buffer_save[1] col_prediction = buffer_save[2] col_different = buffer_save[3] create_prediction(col_label, col_prediction, col_different, indicator_col, images, name, path_folder_save_results + '/prediction/') d_results = pd.DataFrame(data=d_results) d_results.to_csv(path_folder_save_results + '/' + name + 'results.csv', index=False)
if classifier == 0: SMOTE = Filter(classname="weka.filters.supervised.instance.SMOTE", options=['-P', str(smote)]) SMOTE.inputformat(dataTrain) dataTrain = SMOTE.filter(dataTrain) SMOTE.inputformat(dataLastTrain) dataLastTrain = SMOTE.filter(dataLastTrain) for kernel in range(0,1): if kernel == 0: mapper = Classifier(classname="weka.classifiers.misc.InputMappedClassifier", options=["-M","-W", "weka.classifiers.bayes.NaiveBayes"]) Class = 'NaiveBayes' mapper.build_classifier(dataTrain) evaluation = Evaluation(dataTrain) evaluation.test_model(mapper,dataTest) roc_NB.append(evaluation.area_under_roc(1)*100) recall_NB.append(evaluation.recall(yIndex)*100) precision_NB.append(evaluation.precision(yIndex)*100) mapper.build_classifier(dataLastTrain) evaluation = Evaluation(dataLastTrain) evaluation.test_model(mapper, dataLastTest) roc_NB_Last.append(evaluation.area_under_roc(1) * 100) recall_NB_Last.append(evaluation.recall(yIndex) * 100) precision_NB_Last.append(evaluation.precision(yIndex) * 100) elif classifier == 1: for degree in [2]: mapper = Classifier(classname="weka.classifiers.misc.InputMappedClassifier", options=["-M","-W", "weka.classifiers.functions.SMO", "--", "-K","weka.classifiers.functions.supportVector.PolyKernel -E " + str(degree)]) Class = 'SVM' if ((window == 90) and (ntp == 3 or ntp == 4)):
if classifier == 0: SMOTE = Filter(classname="weka.filters.supervised.instance.SMOTE", options=['-P', str(smote)]) SMOTE.inputformat(dataSlowTrain) dataSlowTrain = SMOTE.filter(dataSlowTrain) SMOTE.inputformat(dataFastTrain) dataFastTrain = SMOTE.filter(dataFastTrain) for kernel in range(0,1): if kernel == 0: mapper = Classifier(classname="weka.classifiers.misc.InputMappedClassifier", options=["-M","-W", "weka.classifiers.bayes.NaiveBayes"]) Class = 'NaiveBayes' mapper.build_classifier(dataSlowTrain) evaluation = Evaluation(dataSlowTrain) evaluation.test_model(mapper,dataSlowTest) roc_NB.append(evaluation.area_under_roc(1)*100) recall_NB.append(evaluation.recall(yIndexSlow)*100) precision_NB.append(evaluation.precision(yIndexSlow)*100) mapper.build_classifier(dataFastTrain) evaluation = Evaluation(dataFastTrain) evaluation.test_model(mapper, dataFastTest) roc_NB_Last.append(evaluation.area_under_roc(1) * 100) recall_NB_Last.append(evaluation.recall(yIndexFast) * 100) precision_NB_Last.append(evaluation.precision(yIndexFast) * 100) mapper.build_classifier(dataNeutralTrain) evaluation = Evaluation(dataNeutralTrain) evaluation.test_model(mapper, dataNeutralTest) roc_NB_Neutral.append(evaluation.area_under_roc(1) * 100)
'\\\\\n') Precision.write( '\multirow{8}{*}{' + str(window) + 'd}' + ' & ' + '\multirow{2}{*}{' + str(ntp) + '}' + ' & ' + dataset + ' & ' + str(np.round(evaluationNB.precision(yIndex) * 100, 2)) + ' & ' + str(np.round(evaluationRF.precision(yIndex) * 100, 2)) + '\\\\\n') Recall.write( '\multirow{8}{*}{' + str(window) + 'd}' + ' & ' + '\multirow{2}{*}{' + str(ntp) + '}' + ' & ' + dataset + ' & ' + str(np.round(evaluationNB.recall(yIndex) * 100, 2)) + ' & ' + str(np.round(evaluationRF.recall(yIndex) * 100, 2)) + '\\\\\n') else: Perf.write( ' & ' + ' & ' + dataset + ' & ' + str(np.round(evaluationNB.area_under_roc(1) * 100, 2)) + ' & ' + str(np.round(evaluationRF.area_under_roc(1) * 100, 2)) + '\\\\\n') Precision.write( ' & ' + ' & ' + dataset + ' & ' + str(np.round(evaluationNB.precision(yIndex) * 100, 2)) +
def classify_and_save(classifier, name, outfile): random.seed("ML349") csv_header = [ "Game Name", "SteamID", "Algorithm", "Number Players", "%Players of Training Set", "Accuracy", "Precision (0)", "Recall (0)", "F1 (0)", "Precision (1)", "Recall (1)", "F1 (1)" ] game_results = [] with open("data/games_by_username_all.csv", "r") as f: game_list = f.next().rstrip().split(",") loader = Loader(classname="weka.core.converters.ArffLoader") train = loader.load_file("data/final_train.arff") test = loader.load_file("data/final_test.arff") count = 0 for i in itertools.chain(xrange(0, 50), random.sample(xrange(50, len(game_list)), 450)): train.class_index = i test.class_index = i count += 1 classifier.build_classifier(train) evaluation = Evaluation(train) evaluation.test_model(classifier, test) confusion = evaluation.confusion_matrix num_players = sum(confusion[1]) steam_id = repr(train.class_attribute).split(" ")[1] result = [ game_list[i], steam_id, name, int(num_players), num_players/1955, evaluation.percent_correct, evaluation.precision(0), evaluation.recall(0), evaluation.f_measure(0), evaluation.precision(1), evaluation.recall(1), evaluation.f_measure(1) ] game_results.append(result) print "\nResult #{2}/500 for {0} (SteamID {1}):".format(game_list[i], steam_id, count), print evaluation.summary() with open(outfile, "wb") as f: csv_writer = csv.writer(f, delimiter=",") csv_writer.writerow(csv_header) for r in game_results: csv_writer.writerow(r)
eval = Evaluation(labledDataSet) eval.test_model(tree, test) fileOut.write("Labeled data======== " + str((1.0 - eval.error_rate )* 100) + " number of instances== " + str(labledDataSet.num_instances) + "\n") Newtrainpool = LabeledUnlabeldata(labledDataSet, UnlabledDataSet, tree, y ) # Newtrainpool = LabeledUnlabeldata(labledDataSet, UnlabledDataSet, tree, y , cal_method=Method) fileOut.write("\n\nLabeled data======== " + str((1.0 - eval.error_rate )* 100) + " number of instances== " + str(labledDataSet.num_instances) + "\n") fileOut.write(" Decision Tree \n") fileOut.write("\n precision recall areaUnderROC \n\n") for i in range(test.get_instance(0).num_classes) : fileOut.write(str(eval.precision(i)) +" "+str(eval.recall(i)) + " " + str(eval.area_under_roc(i))+"\n") ClassifyWithDT(Newtrainpool, test, tree, fileOut ) fileOut.write("\n") fileOut.write("########################################################\n") fileOut.write("\n") except Exception as e: raise e fileOut.write("\n") fileOut.write("\n") fileOut.write("########################################################\n")
def main(): """ Just runs some example code. """ # load a dataset iris_file = helper.get_data_dir() + os.sep + "iris.arff" helper.print_info("Loading dataset: " + iris_file) loader = Loader("weka.core.converters.ArffLoader") iris_data = loader.load_file(iris_file) iris_data.class_is_last() # classifier help helper.print_title("Creating help string") classifier = Classifier(classname="weka.classifiers.trees.J48") print(classifier.to_help()) # partial classname helper.print_title("Creating classifier from partial classname") clsname = ".J48" classifier = Classifier(classname=clsname) print(clsname + " --> " + classifier.classname) # classifier from commandline helper.print_title("Creating SMO from command-line string") cmdline = 'weka.classifiers.functions.SMO -K "weka.classifiers.functions.supportVector.NormalizedPolyKernel -E 3.0"' classifier = from_commandline(cmdline, classname="weka.classifiers.Classifier") classifier.build_classifier(iris_data) print("input: " + cmdline) print("output: " + classifier.to_commandline()) print("model:\n" + str(classifier)) # kernel classifier helper.print_title("Creating SMO as KernelClassifier") kernel = Kernel(classname="weka.classifiers.functions.supportVector.RBFKernel", options=["-G", "0.001"]) classifier = KernelClassifier(classname="weka.classifiers.functions.SMO", options=["-M"]) classifier.kernel = kernel classifier.build_classifier(iris_data) print("classifier: " + classifier.to_commandline()) print("model:\n" + str(classifier)) # build a classifier and output model helper.print_title("Training J48 classifier on iris") classifier = Classifier(classname="weka.classifiers.trees.J48") # Instead of using 'options=["-C", "0.3"]' in the constructor, we can also set the "confidenceFactor" # property of the J48 classifier itself. However, being of type float rather than double, we need # to convert it to the correct type first using the double_to_float function: classifier.set_property("confidenceFactor", typeconv.double_to_float(0.3)) classifier.build_classifier(iris_data) print(classifier) print(classifier.graph) print(classifier.to_source("MyJ48")) plot_graph.plot_dot_graph(classifier.graph) # evaluate model on test set helper.print_title("Evaluating J48 classifier on iris") evaluation = Evaluation(iris_data) evl = evaluation.test_model(classifier, iris_data) print(evl) print(evaluation.summary()) # evaluate model on train/test split helper.print_title("Evaluating J48 classifier on iris (random split 66%)") classifier = Classifier(classname="weka.classifiers.trees.J48", options=["-C", "0.3"]) evaluation = Evaluation(iris_data) evaluation.evaluate_train_test_split(classifier, iris_data, 66.0, Random(1)) print(evaluation.summary()) # load a dataset incrementally and build classifier incrementally helper.print_title("Build classifier incrementally on iris") helper.print_info("Loading dataset: " + iris_file) loader = Loader("weka.core.converters.ArffLoader") iris_inc = loader.load_file(iris_file, incremental=True) iris_inc.class_is_last() classifier = Classifier(classname="weka.classifiers.bayes.NaiveBayesUpdateable") classifier.build_classifier(iris_inc) for inst in loader: classifier.update_classifier(inst) print(classifier) # construct meta-classifiers helper.print_title("Meta classifiers") # generic FilteredClassifier instantiation print("generic FilteredClassifier instantiation") meta = SingleClassifierEnhancer(classname="weka.classifiers.meta.FilteredClassifier") meta.classifier = Classifier(classname="weka.classifiers.functions.LinearRegression") flter = Filter("weka.filters.unsupervised.attribute.Remove") flter.options = ["-R", "first"] meta.set_property("filter", flter.jobject) print(meta.to_commandline()) # direct FilteredClassifier instantiation print("direct FilteredClassifier instantiation") meta = FilteredClassifier() meta.classifier = Classifier(classname="weka.classifiers.functions.LinearRegression") flter = Filter("weka.filters.unsupervised.attribute.Remove") flter.options = ["-R", "first"] meta.filter = flter print(meta.to_commandline()) # generic Vote print("generic Vote instantiation") meta = MultipleClassifiersCombiner(classname="weka.classifiers.meta.Vote") classifiers = [ Classifier(classname="weka.classifiers.functions.SMO"), Classifier(classname="weka.classifiers.trees.J48") ] meta.classifiers = classifiers print(meta.to_commandline()) # cross-validate nominal classifier helper.print_title("Cross-validating NaiveBayes on diabetes") diabetes_file = helper.get_data_dir() + os.sep + "diabetes.arff" helper.print_info("Loading dataset: " + diabetes_file) loader = Loader("weka.core.converters.ArffLoader") diabetes_data = loader.load_file(diabetes_file) diabetes_data.class_is_last() classifier = Classifier(classname="weka.classifiers.bayes.NaiveBayes") pred_output = PredictionOutput( classname="weka.classifiers.evaluation.output.prediction.PlainText", options=["-distribution"]) evaluation = Evaluation(diabetes_data) evaluation.crossvalidate_model(classifier, diabetes_data, 10, Random(42), output=pred_output) print(evaluation.summary()) print(evaluation.class_details()) print(evaluation.matrix()) print("areaUnderPRC/0: " + str(evaluation.area_under_prc(0))) print("weightedAreaUnderPRC: " + str(evaluation.weighted_area_under_prc)) print("areaUnderROC/1: " + str(evaluation.area_under_roc(1))) print("weightedAreaUnderROC: " + str(evaluation.weighted_area_under_roc)) print("avgCost: " + str(evaluation.avg_cost)) print("totalCost: " + str(evaluation.total_cost)) print("confusionMatrix: " + str(evaluation.confusion_matrix)) print("correct: " + str(evaluation.correct)) print("pctCorrect: " + str(evaluation.percent_correct)) print("incorrect: " + str(evaluation.incorrect)) print("pctIncorrect: " + str(evaluation.percent_incorrect)) print("unclassified: " + str(evaluation.unclassified)) print("pctUnclassified: " + str(evaluation.percent_unclassified)) print("coverageOfTestCasesByPredictedRegions: " + str(evaluation.coverage_of_test_cases_by_predicted_regions)) print("sizeOfPredictedRegions: " + str(evaluation.size_of_predicted_regions)) print("falseNegativeRate: " + str(evaluation.false_negative_rate(1))) print("weightedFalseNegativeRate: " + str(evaluation.weighted_false_negative_rate)) print("numFalseNegatives: " + str(evaluation.num_false_negatives(1))) print("trueNegativeRate: " + str(evaluation.true_negative_rate(1))) print("weightedTrueNegativeRate: " + str(evaluation.weighted_true_negative_rate)) print("numTrueNegatives: " + str(evaluation.num_true_negatives(1))) print("falsePositiveRate: " + str(evaluation.false_positive_rate(1))) print("weightedFalsePositiveRate: " + str(evaluation.weighted_false_positive_rate)) print("numFalsePositives: " + str(evaluation.num_false_positives(1))) print("truePositiveRate: " + str(evaluation.true_positive_rate(1))) print("weightedTruePositiveRate: " + str(evaluation.weighted_true_positive_rate)) print("numTruePositives: " + str(evaluation.num_true_positives(1))) print("fMeasure: " + str(evaluation.f_measure(1))) print("weightedFMeasure: " + str(evaluation.weighted_f_measure)) print("unweightedMacroFmeasure: " + str(evaluation.unweighted_macro_f_measure)) print("unweightedMicroFmeasure: " + str(evaluation.unweighted_micro_f_measure)) print("precision: " + str(evaluation.precision(1))) print("weightedPrecision: " + str(evaluation.weighted_precision)) print("recall: " + str(evaluation.recall(1))) print("weightedRecall: " + str(evaluation.weighted_recall)) print("kappa: " + str(evaluation.kappa)) print("KBInformation: " + str(evaluation.kb_information)) print("KBMeanInformation: " + str(evaluation.kb_mean_information)) print("KBRelativeInformation: " + str(evaluation.kb_relative_information)) print("SFEntropyGain: " + str(evaluation.sf_entropy_gain)) print("SFMeanEntropyGain: " + str(evaluation.sf_mean_entropy_gain)) print("SFMeanPriorEntropy: " + str(evaluation.sf_mean_prior_entropy)) print("SFMeanSchemeEntropy: " + str(evaluation.sf_mean_scheme_entropy)) print("matthewsCorrelationCoefficient: " + str(evaluation.matthews_correlation_coefficient(1))) print("weightedMatthewsCorrelation: " + str(evaluation.weighted_matthews_correlation)) print("class priors: " + str(evaluation.class_priors)) print("numInstances: " + str(evaluation.num_instances)) print("meanAbsoluteError: " + str(evaluation.mean_absolute_error)) print("meanPriorAbsoluteError: " + str(evaluation.mean_prior_absolute_error)) print("relativeAbsoluteError: " + str(evaluation.relative_absolute_error)) print("rootMeanSquaredError: " + str(evaluation.root_mean_squared_error)) print("rootMeanPriorSquaredError: " + str(evaluation.root_mean_prior_squared_error)) print("rootRelativeSquaredError: " + str(evaluation.root_relative_squared_error)) print("prediction output:\n" + str(pred_output)) plot_cls.plot_roc( evaluation, title="ROC diabetes", class_index=range(0, diabetes_data.class_attribute.num_values), wait=False) plot_cls.plot_prc( evaluation, title="PRC diabetes", class_index=range(0, diabetes_data.class_attribute.num_values), wait=False) # load a numeric dataset bolts_file = helper.get_data_dir() + os.sep + "bolts.arff" helper.print_info("Loading dataset: " + bolts_file) loader = Loader("weka.core.converters.ArffLoader") bolts_data = loader.load_file(bolts_file) bolts_data.class_is_last() # build a classifier and output model helper.print_title("Training LinearRegression on bolts") classifier = Classifier(classname="weka.classifiers.functions.LinearRegression", options=["-S", "1", "-C"]) classifier.build_classifier(bolts_data) print(classifier) # cross-validate numeric classifier helper.print_title("Cross-validating LinearRegression on bolts") classifier = Classifier(classname="weka.classifiers.functions.LinearRegression", options=["-S", "1", "-C"]) evaluation = Evaluation(bolts_data) evaluation.crossvalidate_model(classifier, bolts_data, 10, Random(42)) print(evaluation.summary()) print("correlationCoefficient: " + str(evaluation.correlation_coefficient)) print("errorRate: " + str(evaluation.error_rate)) helper.print_title("Header - bolts") print(str(evaluation.header)) helper.print_title("Predictions on bolts") for index, pred in enumerate(evaluation.predictions): print(str(index+1) + ": " + str(pred) + " -> error=" + str(pred.error)) plot_cls.plot_classifier_errors(evaluation.predictions, wait=False) # learning curve cls = [ Classifier(classname="weka.classifiers.trees.J48"), Classifier(classname="weka.classifiers.bayes.NaiveBayesUpdateable")] plot_cls.plot_learning_curve( cls, diabetes_data, increments=0.05, label_template="[#] !", metric="percent_correct", wait=True) # access classifier's Java API labor_file = helper.get_data_dir() + os.sep + "labor.arff" helper.print_info("Loading dataset: " + labor_file) loader = Loader("weka.core.converters.ArffLoader") labor_data = loader.load_file(labor_file) labor_data.class_is_last() helper.print_title("Using JRip's Java API to access rules") jrip = Classifier(classname="weka.classifiers.rules.JRip") jrip.build_classifier(labor_data) rset = jrip.jwrapper.getRuleset() for i in range(rset.size()): r = rset.get(i) print(str(r.toString(labor_data.class_attribute.jobject)))
for kernel in range(0, 2): if kernel == 0: mapper = Classifier( classname= "weka.classifiers.misc.InputMappedClassifier", options=[ "-M", "-W", "weka.classifiers.bayes.NaiveBayes" ]) Class = 'NaiveBayes' mapper.build_classifier(dataTrain) evaluation = Evaluation(dataTrain) evaluation.test_model(mapper, dataTest) roc_NB.append( evaluation.area_under_roc(1) * 100) recall_NB.append(evaluation.recall(1) * 100) precision_NB.append( evaluation.precision(1) * 100) mapper.build_classifier(dataLastTrain) evaluation = Evaluation(dataLastTrain) evaluation.test_model(mapper, dataLastTest) roc_NB_Last.append( evaluation.area_under_roc(1) * 100) recall_NB_Last.append( evaluation.recall(1) * 100) precision_NB_Last.append( evaluation.precision(1) * 100) else: mapper = Classifier(
# evaluationNB = Evaluation(dataTrain) # evaluationNB.test_model(NB, dataTest) # RF = Classifier(classname="weka.classifiers.misc.InputMappedClassifier", # options=["-M", "-W", "weka.classifiers.trees.RandomForest", "--", "-I", # '20']) Class = 'NaiveBayes' RF.build_classifier(dataTrain) evaluationRF = Evaluation(dataTrain) evaluationRF.test_model(RF, dataTest) print(evaluationRF.area_under_roc(1)) if ntp == 2 and dataset == 'Slow': Perf.write( '\multirow{6}{*}{' + str(window) + 'd}' + ' & ' + '\multirow{3}{*}{' + str(ntp) + '}' + ' & ' + dataset + ' & ' + str(np.round(evaluationRF.area_under_roc(1) * 100, 2)) + ' & ' + str(np.round(evaluationRF.precision(yIndex) * 100, 2)) + ' & ' + str(np.round(evaluationRF.recall(yIndex) * 100, 2)) + '\\\\\n') # Precision.write( # '\multirow{8}{*}{' + str(window) + 'd}' + ' & ' + '\multirow{2}{*}{' + str( # ntp) + '}' + ' & ' + dataset + ' & ' + str( # np.round(evaluationNB.precision(yIndex) * 100, 2)) + ' & ' + str( # np.round(evaluationRF.precision(yIndex) * 100, 2)) + '\\\\\n') # # Recall.write('\multirow{8}{*}{' + str(window) + 'd}' + ' & ' + '\multirow{2}{*}{' + str( # ntp) + '}' + ' & ' + dataset + ' & ' + str(np.round(evaluationNB.recall(yIndex) * 100, 2)) + ' & ' + str( # np.round(evaluationRF.recall(yIndex) * 100, 2)) + '\\\\\n') else: Perf.write( ' & ' + ' & ' + dataset + ' & ' + str(np.round(evaluationRF.area_under_roc(1) * 100, 2)) + ' & ' + str(np.round(evaluationRF.precision(yIndex) * 100, 2)) + ' & ' + str(np.round(evaluationRF.precision(yIndex) * 100, 2)) + '\\\\\n') #
NB = Classifier( classname="weka.classifiers.misc.InputMappedClassifier", options=["-M", "-W", "weka.classifiers.bayes.NaiveBayes"]) Class = 'NaiveBayes' NB.build_classifier(dataTrain) evaluationNB = Evaluation(dataTrain) evaluationNB.test_model(NB, dataTest) RF = Classifier( classname="weka.classifiers.misc.InputMappedClassifier", options=[ "-M", "-W", "weka.classifiers.trees.RandomForest", "--", "-I", '20' ]) Class = 'NaiveBayes' RF.build_classifier(dataTrain) evaluationRF = Evaluation(dataTrain) Perf.write( str(window) + '&' + dataset + '&' + str(np.round(evaluationNB.area_under_roc(1) * 100, 2)) + '&' + str(np.round(evaluationNB.precision(yIndex) * 100, 2)) + '&' + str(np.round(evaluationNB.recall(yIndex) * 100, 2)) + '\n') Scores.write( str(window) + ',' + dataset + ',' + str(np.round(evaluationNB.area_under_roc(1) * 100, 2)) + '\n') #Precision.write(str(window)+ '&' + dataset + '&' + str(np.round(evaluationNB.precision(1) * 100,2))+ '&' + str(np.round(evaluationRF.precision(1) * 100,2))+'\n') #Recall.write(str(window)+ '&' + dataset + '&' + str(np.round(evaluationNB.recall(1) * 100,2))+ '&' + str(np.round(evaluationRF.recall(1) * 100,2))+'\n') jvm.stop()
def main(): """ Just runs some example code. """ # load a dataset iris_file = helper.get_data_dir() + os.sep + "iris.arff" helper.print_info("Loading dataset: " + iris_file) loader = Loader("weka.core.converters.ArffLoader") iris_data = loader.load_file(iris_file) iris_data.class_is_last() # classifier help helper.print_title("Creating help string") classifier = Classifier(classname="weka.classifiers.trees.J48") print(classifier.to_help()) # partial classname helper.print_title("Creating classifier from partial classname") clsname = ".J48" classifier = Classifier(classname=clsname) print(clsname + " --> " + classifier.classname) # classifier from commandline helper.print_title("Creating SMO from command-line string") cmdline = 'weka.classifiers.functions.SMO -K "weka.classifiers.functions.supportVector.NormalizedPolyKernel -E 3.0"' classifier = from_commandline(cmdline, classname="weka.classifiers.Classifier") classifier.build_classifier(iris_data) print("input: " + cmdline) print("output: " + classifier.to_commandline()) print("model:\n" + str(classifier)) # kernel classifier helper.print_title("Creating SMO as KernelClassifier") kernel = Kernel( classname="weka.classifiers.functions.supportVector.RBFKernel", options=["-G", "0.001"]) classifier = KernelClassifier(classname="weka.classifiers.functions.SMO", options=["-M"]) classifier.kernel = kernel classifier.build_classifier(iris_data) print("classifier: " + classifier.to_commandline()) print("model:\n" + str(classifier)) # build a classifier and output model helper.print_title("Training J48 classifier on iris") classifier = Classifier(classname="weka.classifiers.trees.J48") # Instead of using 'options=["-C", "0.3"]' in the constructor, we can also set the "confidenceFactor" # property of the J48 classifier itself. However, being of type float rather than double, we need # to convert it to the correct type first using the double_to_float function: classifier.set_property("confidenceFactor", types.double_to_float(0.3)) classifier.build_classifier(iris_data) print(classifier) print(classifier.graph) print(classifier.to_source("MyJ48")) plot_graph.plot_dot_graph(classifier.graph) # evaluate model on test set helper.print_title("Evaluating J48 classifier on iris") evaluation = Evaluation(iris_data) evl = evaluation.test_model(classifier, iris_data) print(evl) print(evaluation.summary()) # evaluate model on train/test split helper.print_title("Evaluating J48 classifier on iris (random split 66%)") classifier = Classifier(classname="weka.classifiers.trees.J48", options=["-C", "0.3"]) evaluation = Evaluation(iris_data) evaluation.evaluate_train_test_split(classifier, iris_data, 66.0, Random(1)) print(evaluation.summary()) # load a dataset incrementally and build classifier incrementally helper.print_title("Build classifier incrementally on iris") helper.print_info("Loading dataset: " + iris_file) loader = Loader("weka.core.converters.ArffLoader") iris_inc = loader.load_file(iris_file, incremental=True) iris_inc.class_is_last() classifier = Classifier( classname="weka.classifiers.bayes.NaiveBayesUpdateable") classifier.build_classifier(iris_inc) for inst in loader: classifier.update_classifier(inst) print(classifier) # construct meta-classifiers helper.print_title("Meta classifiers") # generic FilteredClassifier instantiation print("generic FilteredClassifier instantiation") meta = SingleClassifierEnhancer( classname="weka.classifiers.meta.FilteredClassifier") meta.classifier = Classifier( classname="weka.classifiers.functions.LinearRegression") flter = Filter("weka.filters.unsupervised.attribute.Remove") flter.options = ["-R", "first"] meta.set_property("filter", flter.jobject) print(meta.to_commandline()) # direct FilteredClassifier instantiation print("direct FilteredClassifier instantiation") meta = FilteredClassifier() meta.classifier = Classifier( classname="weka.classifiers.functions.LinearRegression") flter = Filter("weka.filters.unsupervised.attribute.Remove") flter.options = ["-R", "first"] meta.filter = flter print(meta.to_commandline()) # generic Vote print("generic Vote instantiation") meta = MultipleClassifiersCombiner(classname="weka.classifiers.meta.Vote") classifiers = [ Classifier(classname="weka.classifiers.functions.SMO"), Classifier(classname="weka.classifiers.trees.J48") ] meta.classifiers = classifiers print(meta.to_commandline()) # cross-validate nominal classifier helper.print_title("Cross-validating NaiveBayes on diabetes") diabetes_file = helper.get_data_dir() + os.sep + "diabetes.arff" helper.print_info("Loading dataset: " + diabetes_file) loader = Loader("weka.core.converters.ArffLoader") diabetes_data = loader.load_file(diabetes_file) diabetes_data.class_is_last() classifier = Classifier(classname="weka.classifiers.bayes.NaiveBayes") pred_output = PredictionOutput( classname="weka.classifiers.evaluation.output.prediction.PlainText", options=["-distribution"]) evaluation = Evaluation(diabetes_data) evaluation.crossvalidate_model(classifier, diabetes_data, 10, Random(42), output=pred_output) print(evaluation.summary()) print(evaluation.class_details()) print(evaluation.matrix()) print("areaUnderPRC/0: " + str(evaluation.area_under_prc(0))) print("weightedAreaUnderPRC: " + str(evaluation.weighted_area_under_prc)) print("areaUnderROC/1: " + str(evaluation.area_under_roc(1))) print("weightedAreaUnderROC: " + str(evaluation.weighted_area_under_roc)) print("avgCost: " + str(evaluation.avg_cost)) print("totalCost: " + str(evaluation.total_cost)) print("confusionMatrix: " + str(evaluation.confusion_matrix)) print("correct: " + str(evaluation.correct)) print("pctCorrect: " + str(evaluation.percent_correct)) print("incorrect: " + str(evaluation.incorrect)) print("pctIncorrect: " + str(evaluation.percent_incorrect)) print("unclassified: " + str(evaluation.unclassified)) print("pctUnclassified: " + str(evaluation.percent_unclassified)) print("coverageOfTestCasesByPredictedRegions: " + str(evaluation.coverage_of_test_cases_by_predicted_regions)) print("sizeOfPredictedRegions: " + str(evaluation.size_of_predicted_regions)) print("falseNegativeRate: " + str(evaluation.false_negative_rate(1))) print("weightedFalseNegativeRate: " + str(evaluation.weighted_false_negative_rate)) print("numFalseNegatives: " + str(evaluation.num_false_negatives(1))) print("trueNegativeRate: " + str(evaluation.true_negative_rate(1))) print("weightedTrueNegativeRate: " + str(evaluation.weighted_true_negative_rate)) print("numTrueNegatives: " + str(evaluation.num_true_negatives(1))) print("falsePositiveRate: " + str(evaluation.false_positive_rate(1))) print("weightedFalsePositiveRate: " + str(evaluation.weighted_false_positive_rate)) print("numFalsePositives: " + str(evaluation.num_false_positives(1))) print("truePositiveRate: " + str(evaluation.true_positive_rate(1))) print("weightedTruePositiveRate: " + str(evaluation.weighted_true_positive_rate)) print("numTruePositives: " + str(evaluation.num_true_positives(1))) print("fMeasure: " + str(evaluation.f_measure(1))) print("weightedFMeasure: " + str(evaluation.weighted_f_measure)) print("unweightedMacroFmeasure: " + str(evaluation.unweighted_macro_f_measure)) print("unweightedMicroFmeasure: " + str(evaluation.unweighted_micro_f_measure)) print("precision: " + str(evaluation.precision(1))) print("weightedPrecision: " + str(evaluation.weighted_precision)) print("recall: " + str(evaluation.recall(1))) print("weightedRecall: " + str(evaluation.weighted_recall)) print("kappa: " + str(evaluation.kappa)) print("KBInformation: " + str(evaluation.kb_information)) print("KBMeanInformation: " + str(evaluation.kb_mean_information)) print("KBRelativeInformation: " + str(evaluation.kb_relative_information)) print("SFEntropyGain: " + str(evaluation.sf_entropy_gain)) print("SFMeanEntropyGain: " + str(evaluation.sf_mean_entropy_gain)) print("SFMeanPriorEntropy: " + str(evaluation.sf_mean_prior_entropy)) print("SFMeanSchemeEntropy: " + str(evaluation.sf_mean_scheme_entropy)) print("matthewsCorrelationCoefficient: " + str(evaluation.matthews_correlation_coefficient(1))) print("weightedMatthewsCorrelation: " + str(evaluation.weighted_matthews_correlation)) print("class priors: " + str(evaluation.class_priors)) print("numInstances: " + str(evaluation.num_instances)) print("meanAbsoluteError: " + str(evaluation.mean_absolute_error)) print("meanPriorAbsoluteError: " + str(evaluation.mean_prior_absolute_error)) print("relativeAbsoluteError: " + str(evaluation.relative_absolute_error)) print("rootMeanSquaredError: " + str(evaluation.root_mean_squared_error)) print("rootMeanPriorSquaredError: " + str(evaluation.root_mean_prior_squared_error)) print("rootRelativeSquaredError: " + str(evaluation.root_relative_squared_error)) print("prediction output:\n" + str(pred_output)) plot_cls.plot_roc(evaluation, title="ROC diabetes", class_index=range( 0, diabetes_data.class_attribute.num_values), wait=False) plot_cls.plot_prc(evaluation, title="PRC diabetes", class_index=range( 0, diabetes_data.class_attribute.num_values), wait=False) # train 2nd classifier on diabetes dataset classifier2 = Classifier(classname="weka.classifiers.trees.RandomForest") evaluation2 = Evaluation(diabetes_data) evaluation2.crossvalidate_model(classifier2, diabetes_data, 10, Random(42)) plot_cls.plot_rocs({ "NB": evaluation, "RF": evaluation2 }, title="ROC diabetes", class_index=0, wait=False) plot_cls.plot_prcs({ "NB": evaluation, "RF": evaluation2 }, title="PRC diabetes", class_index=0, wait=False) # load a numeric dataset bolts_file = helper.get_data_dir() + os.sep + "bolts.arff" helper.print_info("Loading dataset: " + bolts_file) loader = Loader("weka.core.converters.ArffLoader") bolts_data = loader.load_file(bolts_file) bolts_data.class_is_last() # build a classifier and output model helper.print_title("Training LinearRegression on bolts") classifier = Classifier( classname="weka.classifiers.functions.LinearRegression", options=["-S", "1", "-C"]) classifier.build_classifier(bolts_data) print(classifier) # cross-validate numeric classifier helper.print_title("Cross-validating LinearRegression on bolts") classifier = Classifier( classname="weka.classifiers.functions.LinearRegression", options=["-S", "1", "-C"]) evaluation = Evaluation(bolts_data) evaluation.crossvalidate_model(classifier, bolts_data, 10, Random(42)) print(evaluation.summary()) print("correlationCoefficient: " + str(evaluation.correlation_coefficient)) print("errorRate: " + str(evaluation.error_rate)) helper.print_title("Header - bolts") print(str(evaluation.header)) helper.print_title("Predictions on bolts") for index, pred in enumerate(evaluation.predictions): print( str(index + 1) + ": " + str(pred) + " -> error=" + str(pred.error)) plot_cls.plot_classifier_errors(evaluation.predictions, wait=False) # train 2nd classifier and show errors in same plot classifier2 = Classifier(classname="weka.classifiers.functions.SMOreg") evaluation2 = Evaluation(bolts_data) evaluation2.crossvalidate_model(classifier2, bolts_data, 10, Random(42)) plot_cls.plot_classifier_errors( { "LR": evaluation.predictions, "SMOreg": evaluation2.predictions }, wait=False) # learning curve cls = [ Classifier(classname="weka.classifiers.trees.J48"), Classifier(classname="weka.classifiers.bayes.NaiveBayesUpdateable") ] plot_cls.plot_learning_curve(cls, diabetes_data, increments=0.05, label_template="[#] !", metric="percent_correct", wait=True) # access classifier's Java API labor_file = helper.get_data_dir() + os.sep + "labor.arff" helper.print_info("Loading dataset: " + labor_file) loader = Loader("weka.core.converters.ArffLoader") labor_data = loader.load_file(labor_file) labor_data.class_is_last() helper.print_title("Using JRip's Java API to access rules") jrip = Classifier(classname="weka.classifiers.rules.JRip") jrip.build_classifier(labor_data) rset = jrip.jwrapper.getRuleset() for i in xrange(rset.size()): r = rset.get(i) print(str(r.toString(labor_data.class_attribute.jobject)))
tree, y, cal_method=Method) print("\n\nLabeled data======== " + str((1.0 - eval.error_rate) * 100) + " number of instances== " + str(labledDataSet.num_instances) + "\n") print(" Decision Tree \n") print( "\n precision recall areaUnderROC \n\n") for i in range(test.get_instance(0).num_classes): print( str(eval.precision(i)) + " " + str(eval.recall(i)) + " " + str(eval.area_under_roc(i)) + "\n") ClassifyWithDT(Newtrainpool, test, tree, fileOut) print("\n") print("########################################################\n") print("\n") except Exception as e: raise e print("\n") print("\n") print("########################################################\n") print("########################################################\n")