def test_weka_classifier(clf, train, test): clf.build_classifier(train) evl = Evaluation(train) evl.test_model(clf, test) acc = evl.percent_correct auc = evl.weighted_area_under_roc err = evl.error_rate log = evl.sf_mean_scheme_entropy print( "# testing | loss: {:.2}, accuracy: {:.4}, AUC: {:.2}, error: {:.2}". format(log, acc, auc, err)) return {'loss': log, 'accuracy': acc, 'auc': auc, 'err': err}
def run_bayes_hill_split(self, output_directory, parents=1): # build classifier print("\nBuilding Bayes Classifier on training data. Parents = " + str(parents) + "\n") buildTimeStart = time.time() cls = Classifier( classname="weka.classifiers.bayes.BayesNet", options=[ "-D", "-Q", "weka.classifiers.bayes.net.search.local.HillClimber", "--", "-P", "" + str(parents), "-S", "BAYES", "-E", "weka.classifiers.bayes.net.estimate.SimpleEstimator", "--", "-A", "0.5" ]) cls.build_classifier(self.training_data) resultsString = "" resultsString = self.print_both(str(cls), resultsString) buildTimeString = "Bayes Split Classifier Built in " + str( time.time() - buildTimeStart) + " secs.\n" resultsString = self.print_both(buildTimeString, resultsString) #Evaluate Classifier resultsString = self.print_both("\nEvaluating on test data.", resultsString) buildTimeStart = time.time() evl = Evaluation(self.training_data) evl.test_model(cls, self.testing_data) resultsString = self.print_both(str(evl.summary()), resultsString) resultsString = self.print_both(str(evl.class_details()), resultsString) resultsString = self.print_both(str(evl.confusion_matrix), resultsString) buildTimeString = "\nBayes Split Classifier Evaluated in " + str( time.time() - buildTimeStart) + " secs.\n" resultsString = self.print_both(buildTimeString, resultsString) #Save Results and Cleanup self.save_results("Bayes_Hill_P" + str(parents) + "_", resultsString, output_directory) self.save_results("Bayes_Hill_P" + str(parents) + "_Graph", cls.graph, output_directory, True)
def predict(self, X): evaluation = Evaluation(self.train_data) # Add class column (we can't copy X, because this is a large object, so we add the column and remove it later) X['class'] = None filename = self.to_arff(X, True) # Remove class column del X['class'] loader = Loader("weka.core.converters.ArffLoader") test_data = loader.load_file(filename) test_data.class_is_last() preds = evaluation.test_model(self.classifier, test_data) return preds
def run_crossval(self, output_directory, classifier_name, classifier_weka_spec, options_list): # build classifier print("\nBuilding " + classifier_name + " Classifier on training data.") buildTimeStart = time.time() cls = Classifier(classname=classifier_weka_spec, options=options_list) cls.build_classifier(self.training_data) resultsString = "" resultsString = self.print_both(str(cls), resultsString) buildTimeString = classifier_name + " Cross Eval Classifier Built in " + str( time.time() - buildTimeStart) + " secs.\n" resultsString = self.print_both(buildTimeString, resultsString) #Evaluate Classifier resultsString = self.print_both("\nCross Evaluating on test data.", resultsString) buildTimeStart = time.time() evl = Evaluation(self.training_data) evl.crossvalidate_model(cls, self.training_data, 10, Random(1)) resultsString = self.print_both(str(evl.summary()), resultsString) resultsString += "\n" resultsString = self.print_both(str(evl.class_details()), resultsString) resultsString += "\n" resultsString = self.print_both(str(evl.confusion_matrix), resultsString) buildTimeString = "\n\n" + classifier_name + " Cross Eval Classifier Evaluated in " + str( time.time() - buildTimeStart) + " secs.\n" resultsString = self.print_both(buildTimeString, resultsString) options_string = "" for option in options_list: options_string = options_string + str(option) options_string = options_string.replace(".", "-") options_string = options_string.replace("-", "_") #Save Results and Cleanup self.save_results(classifier_name + options_string + "_Crossval", resultsString, output_directory)
def ClassifyWithDT(f3, test, tree, fileOut): eval = Evaluation(f3) tree.build_classifier(f3) eval.test_model(tree, test) print("\n\nSelf-Training data========" + str((1 - eval.error_rate) * 100) + " number of instances==" + str(f3.num_instances) + "\n") print("\n Error Rate==" + str(eval.error_rate) + "\n") print("\n precision recall areaUnderROC \n\n") for i in range(test.get_instance(0).num_classes): print( str(eval.precision(i)) + " " + str(eval.recall(i)) + " " + str(eval.area_under_roc(i)) + "\n") return eval
def run(): jvm.start() load_csv = Loader("weka.core.converters.CSVLoader") data_csv = load_csv.load_file( "/Users/imeiliasantoso/web_graduate_project5/register_page/bank-full_input.csv" ) saver = Saver("weka.core.converters.ArffSaver") saver.save_file( data_csv, "/Users/imeiliasantoso/web_graduate_project5/register_page/bank-full_input.arff" ) load_arff = Loader("weka.core.converters.ArffLoader") data_arff = load_arff.load_file( "/Users/imeiliasantoso/web_graduate_project5/register_page/bank-full_input.arff" ) data_arff.class_is_last() cls = Classifier(classname="weka.classifiers.trees.J48", options=["-C", "0.5"]) cls.build_classifier(data_arff) for index, inst in enumerate(data_arff): pred = cls.classify_instance(inst) dist = cls.distribution_for_instance(inst) # save tree prune in txt file saveFile = open( "/Users/imeiliasantoso/web_graduate_project5/register_page/bank-full_input.txt", "w") saveFile.write(str(cls)) # print(cls) saveFile.close() global j48 J48_class = Classifier(classname="weka.classifiers.trees.J48", options=["-C", "0.25", "-M", "2"]) J48_class.build_classifier(data_arff) evaluationj48 = Evaluation(data_arff) evaluationj48.crossvalidate_model(J48_class, data_arff, 10, Random(100)) j48 = str(evaluationj48.percent_correct) jvm.stop() return j48
def use_classifier(data): """ Uses the meta-classifier AttributeSelectedClassifier for attribute selection. :param data: the dataset to use :type data: Instances """ print("\n1. Meta-classifier") classifier = Classifier(classname="weka.classifiers.meta.AttributeSelectedClassifier") aseval = ASEvaluation(classname="weka.attributeSelection.CfsSubsetEval") assearch = ASSearch(classname="weka.attributeSelection.GreedyStepwise", options=["-B"]) base = Classifier(classname="weka.classifiers.trees.J48") # setting nested options is always a bit tricky, getting all the escaped double quotes right # simply using the bean property for setting Java objects is often easier and less error prone classifier.set_property("classifier", base.jobject) classifier.set_property("evaluator", aseval.jobject) classifier.set_property("search", assearch.jobject) evaluation = Evaluation(data) evaluation.crossvalidate_model(classifier, data, 10, Random(1)) print(evaluation.summary())
def experiment_file_random(path_features, path_folder_save_results, options, classifier, fold, random, name): print(name + " Start: " + str(datetime.datetime.now())) time = datetime.datetime.now() cls = Classifier(classname=classifier, options=weka.core.classes.split_options(options)) d_results = { 'percent_correct': [], 'percent_incorrect': [], 'precision': [], 'recall': [], 'f-score': [], 'confusion_matrix': [] } data = converters.load_any_file(path_features) data.class_is_last() pout = PredictionOutput( classname="weka.classifiers.evaluation.output.prediction.CSV") evl = Evaluation(data) evl.crossvalidate_model(cls, data, fold, Random(random), pout) d_results['percent_correct'].append(evl.percent_correct) d_results['percent_incorrect'].append(evl.percent_incorrect) d_results['precision'].append(evl.precision(1)) d_results['recall'].append(evl.recall(1)) d_results['f-score'].append(evl.f_measure(1)) d_results['confusion_matrix'].append( evl.matrix()) # Generates the confusion matrix. d_results = pd.DataFrame(data=d_results) d_results.to_csv(path_folder_save_results + '/' + str(name) + '.csv', index=False) save = pout.buffer_content() check_folder_or_create(path_folder_save_results + '/' + 'prediction') with open( path_folder_save_results + '/' + 'prediction/' + str(name) + '.csv', 'w') as f: f.write(save) print(name + " End: " + str(datetime.datetime.now() - time))
def executeKFoldClassifier(self, featureInclusion, kFold): deleteFeatures = 0 for i in range(0, len(featureInclusion)): if featureInclusion[i]: self.instances.deleteAttributeAt(i - deleteFeatures) deleteFeatures += 1 self.instances.setClassIndex(self.instances.numAttributes - 1) cvParameterSelection = javabridge.make_instance( "weka/classifiers/meta/CVParameterSelection", "()V") javabridge.call(cvParameterSelection, "setNumFolds", "(I)V", kFold) javabridge.call(cvParameterSelection, "buildClassifier(weka/core/Instances)V", self.instances) eval = Evaluation(self.instances) eval.crossvalidate_model(cvParameterSelection, self.instances, kFold, random()) return eval.percent_correct()
def runBayes(file, bound): loader = Loader(classname="weka.core.converters.CSVLoader") data = loader.load_file(file) data.class_is_first() remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", bound]) cls = Classifier(classname="weka.classifiers.bayes.NaiveBayes") remove.inputformat(data) filtered = remove.filter(data) evl = Evaluation(filtered) evl.crossvalidate_model(cls, filtered, 10, Random(1)) print(evl.percent_correct) #print(evl.summary()) result = evl.class_details() print(result) return result
def HOV(dataset, algo, num_datasets): #Executing HOV \_*-*_/ loader = Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(dataset) data.class_is_last() train, test = data.train_test_split(70.0, Random(10)) cls = Classifier(classname=algo) cls.build_classifier(train) evl = Evaluation(train) evl.test_model(cls, test) print(evl.summary("=== " +str(algo)+ " on" + str(dataset) + " ===",False)) print(evl.matrix("=== on click prediction(confusion matrix) ===")) print("For Algo"+ str(algo)+"areaUnderROC/1: for HOV " + str(evl.area_under_roc(1))) return evl.area_under_roc(1)
def crossValidate(self, arrfFile=None, classname="weka.classifiers.trees.J48", options=["-C", "0.3"]): if arrfFile is not None: self.initData(arrfFile) if self.data is None: return print 'Classificador ' + str(classname) + ' ' + ' '.join(options) cls = Classifier(classname=classname, options=options) evl = Evaluation(self.data) evl.crossvalidate_model(cls, self.data, 10, Random(1)) print(evl.percent_correct) print(evl.summary()) print(evl.class_details())
def CV10(dataset, algo): print "inside 10cv" print("dataset ----" + dataset) print("algorithm ----" + algo) #Executing 10FCV # jvm.start(packages=True) loader = Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(dataset) data.class_is_last() #print(data) cls = Classifier(classname=algo) evl = Evaluation(data) evl.crossvalidate_model(cls, data, 2, Random(5)) print("areaUnderROC/1: " + str(evl.area_under_roc(1)))
def train_and_eval_weka_classifier(clf, train, valid, n_instances): # total_inst = train.num_instances total_train_inst = train.num_instances percentage = (n_instances * 100) / total_train_inst if percentage == 100: opt = train else: opt, extra = train.train_test_split(percentage, Random(1)) # inst_train2 = train2.num_instances print('total_train_inst: ', total_train_inst, '| percentage: ', percentage, '| used_inst: ', opt.num_instances) import signal class AlarmException(Exception): pass def alarmHandler(signum, frame): raise AlarmException clf.build_classifier(opt) evl = Evaluation(opt) evl.test_model(clf, valid) acc = evl.percent_correct auc = evl.weighted_area_under_roc err = evl.error_rate log = evl.sf_mean_scheme_entropy print( "# validating | loss: {:.2}, accuracy: {:.4}, AUC: {:.2}, error: {:.2}" .format(log, acc, auc, err)) return {'loss': log, 'accuracy': acc, 'auc': auc, 'err': err}
def TrainingModel(arff, modelOutput, clsfier): # 启动java虚拟机 jvm.start() # 导入训练集 loader = Loader(classname="weka.core.converters.ArffLoader") train = loader.load_file(arff) train.class_is_first() # 使用RandomForest算法进行训练,因为在GUI版本weka中使用多种方式训练后发现此方式TPR与TNR较高 cls_name = "weka.classifiers." + clsfier clsf = Classifier(classname=cls_name) clsf.build_classifier(train) print(clsf) # 建立模型 fc = FilteredClassifier() fc.classifier = clsf evl = Evaluation(train) evl.crossvalidate_model(fc, train, 10, Random(1)) print(evl.percent_correct) print(evl.summary()) print(evl.class_details()) print(evl.matrix()) # 结果统计 matrixResults = evl.confusion_matrix TN = float(matrixResults[0][0]) FP = float(matrixResults[0][1]) FN = float(matrixResults[1][0]) TP = float(matrixResults[1][1]) TPR = TP / (TP + FN) TNR = TN / (FP + TN) PPV = TP / (TP + FP) NPV = TN / (TN + FN) print("算法: " + clsfier) print("敏感度 TPR: " + str(TPR)) print("特异度 TNR: " + str(TNR)) print("PPV: " + str(PPV)) print("NPV: " + str(NPV)) # 保存模型 clsf.serialize(modelOutput, header=train) # 退出虚拟机 jvm.stop() print("分析模型建立完成")
def HOV(dataset, algo): print "inside hov" print("dataset ----" + dataset) print("algorithm ----" + algo) #Executing HOV \_*-*_/ # jvm.start(packages=True) loader = Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(dataset) data.class_is_last() train, test = data.train_test_split(70.0, Random(10)) cls = Classifier(classname=algo) cls.build_classifier(train) evl = Evaluation(train) evl.test_model(cls, test) return (str(evl.area_under_roc(1)))
def SMOreg(): loader = Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file("First_trial_regression.arff") data.class_is_last() cls = KernelClassifier(classname="weka.classifiers.functions.SMOreg", options=["-N", "0"]) kernel = Kernel( classname="weka.classifiers.functions.supportVector.RBFKernel", options=["-G", "0.2"]) cls.kernel = kernel pout = PredictionOutput( classname="weka.classifiers.evaluation.output.prediction.PlainText") evl = Evaluation(data) evl.crossvalidate_model(cls, data, 10, Random(486), pout) print(evl.summary()) print(pout.buffer_content()) # save model serialization.write_all("SMOreg.model2", cls)
def weka_bayesnet(filearffpath='data/datatobayes.arff'): """Simple calling of the bayesian network from python. """ #Preparing the data loader = Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file('data/datatobayes.arff') #data = loader.load_file('data/Full.arff') remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "first"]) remove.inputformat(data) filtered = data #remove.filter(data) #Classifier test from weka.classifiers import Classifier, Evaluation from weka.core.classes import Random filtered.class_is_last() classifier = Classifier(classname="weka.classifiers.bayes.BayesNet", options=['-D']) # evaluation = Evaluation(filtered) evaluation.crossvalidate_model(classifier, filtered, 10, Random(42)) return evaluation.area_under_roc(class_index=0) #ROC, no std of kfold
def DecisionTree(rnd_data, folds, seed, data): data_size = rnd_data.num_instances fold_size = math.floor(data_size / folds) # cross-validation evaluation = Evaluation(rnd_data) for i in range(folds): this_fold = fold_size test_start = i * fold_size test_end = (test_start + fold_size) if ((data_size - test_end) / fold_size < 1): this_fold = data_size - test_start test = Instances.copy_instances(rnd_data, test_start, this_fold) # generate validation fold if i == 0: train = Instances.copy_instances(rnd_data, test_end, data_size - test_end) else: train_1 = Instances.copy_instances(rnd_data, 0, test_start) train_2 = Instances.copy_instances(rnd_data, test_end, data_size - test_end) train = Instances.append_instances( train_1, train_2) # generate training fold # build and evaluate classifier cls = Classifier(classname="weka.classifiers.trees.J48") cls.build_classifier(train) # build classifier on training set evaluation.test_model(cls, test) # test classifier on validation/test set print("") print("=== Decision Tree ===") print("Classifier: " + cls.to_commandline()) print("Dataset: " + data.relationname) print("Folds: " + str(folds)) print("Seed: " + str(seed)) print("") print( evaluation.summary("=== " + str(folds) + "-fold Cross-Validation ==="))
def run_ibk(file): # Get filename from Pathlib object filename = file.parts[-1] dir = file.parents[0] print("Running IBk on %s" % filename) if not filename.endswith(".arff"): print("%s not ARFF file." % filename) return # Removes '.arff' from filename filename_base = filename[:-5] # Load data with class as first attr data = load_Arff_file(file) data.class_is_first() # Use IBk and set options cls = Classifier(classname="weka.classifiers.lazy.IBk", options=["-K", "3"]) # print(cls.options) # Predictions stored in pout pout = PredictionOutput( classname="weka.classifiers.evaluation.output.prediction.PlainText") # Evaluate data evaluation = Evaluation(data) evaluation.crossvalidate_model(cls, data, 10, Random(1), output=pout) # Save summary, class details and confusion matrix to file result_output = filename_base + "_eval_results.txt" output_eval(evaluation, dir / result_output) # Save the predicited results to file prediction_output = filename_base + "_pred_results.txt" output_pred(pout, dir / prediction_output) print("IBk complete")
def evaluate(classifier, data): """ Private function that makes evaluation of classifier on given data. With command line arguments we can chose which evaluation to use. :param classifier: Classifier :param data: weka arff data :return: Evaluation """ args = evaluate_parser() evaluation = Evaluation(data) if args['evaluation'] == 'train_test': evaluation.evaluate_train_test_split(classifier, data, int(args['train_size']), Random(1)) elif args['evaluation'] == 'cross_validate': evaluation.crossvalidate_model(classifier, data, int(args['folds']), Random(42)) else: evaluation.test_model(classifier, data) return evaluation
def training(self): # Preparação dos dados self.imp = Imputation(self.data) # Seleciona as caracteristicas self.features = FeatureSelection(self.imp.imputed_data) data_selected = self.features.data_selected self.selected_features = self.features.selected_features # Encontra os padrões ausentes self.missing_patterns = MissingPatterns(self.data, self.selected_features).missing_patterns # Realiza o treinamento dos classificadores #print('test train') for mpi in self.missing_patterns: # Seleciona as caracteristicas cpi = set(self.selected_features) - set(mpi) data_temp = Instances.copy_instances(data_selected, from_row=0, num_rows=data_selected.num_instances) data_temp.class_is_last() # Separa os dados de treinamento data_temp = self.reduceData(data_temp, cpi, self.data) # Treina os classificadores com os dados imputados classifier = Classifier(classname=self.learn_class, options=self.options) classifier.build_classifier(data_temp) #print(classifier.distribution_for_instance(data_selected.get_instance(30))) #!!!!!! Verica o peso de cada classificador (sua acuracia de classificação) evl = Evaluation(data_temp) evl.crossvalidate_model(classifier, data_temp, 15, Random(1)) # Adiciona os classificadores treinados ao conjunto de classificadores my_classifier = MyClassifier(classifier, cpi, 1 - evl.mean_absolute_error) self.classifiers.add(my_classifier)
def crossEvaluate(self): """ Evaluate classifier using cross-validation using K folds :return: """ if self.classifierInstance is not None: print '[Cross-validate data]' try: # Cross validation evaluation evaluatorInstance = Evaluation(self.classificationData) evaluatorInstance.crossvalidate_model(self.classifierInstance, self.classificationData, self.evaluationNumFolds, Random(1)) # Store evaluation results self.setEvaluationResults(evaluatorInstance) return True except: return False return False
def run_ibk_crossval(self, output_directory): # build classifier print("\nBuilding Classifier on training data.") buildTimeStart = time.time() cls = Classifier( classname="weka.classifiers.lazy.IBk", options=[ "-K", "3", "-W", "0", "-A", "weka.core.neighboursearch.LinearNNSearch -A \"weka.core.EuclideanDistance -R first-last\"" ]) cls.build_classifier(self.training_data) resultsString = "" resultsString = self.print_both(str(cls), resultsString) buildTimeString = "IBK Cross Eval Classifier Built in " + str( time.time() - buildTimeStart) + " secs.\n" resultsString = self.print_both(buildTimeString, resultsString) #Evaluate Classifier resultsString = self.print_both("\nCross Evaluating on test data.", resultsString) buildTimeStart = time.time() evl = Evaluation(self.training_data) evl.crossvalidate_model(cls, self.training_data, 10, Random(1)) resultsString = self.print_both(str(evl.summary()), resultsString) resultsString = self.print_both(str(evl.class_details()), resultsString) resultsString = self.print_both(str(evl.confusion_matrix), resultsString) buildTimeString = "\nIBK Cross Eval Classifier Evaluated in " + str( time.time() - buildTimeStart) + " secs.\n" resultsString = self.print_both(buildTimeString, resultsString) #Save Results and Cleanup self.save_results("IBK_Crossval", resultsString, output_directory)
def train(option, sym, num): # load dataset given the symbol path = os.path.join('HistSet', 'histSet_%s.arff' % sym) loader = Loader("weka.core.converters.ArffLoader") dataset = loader.load_file(path) dataset.class_is_last() # set the last attribute as class attribute # load testset # testset = loader.load_file(os.path.join('HistSet', 'testSet_LTC.arff')) # testset.class_is_last() # define classifier cmd = { 'DecisionTable': 'weka.classifiers.rules.DecisionTable -X 1 -S "weka.attributeSelection.BestFirst -D 1 -N 5"', 'SMOreg': 'weka.classifiers.functions.SMOreg -C 1.0 -N 0 -I "weka.classifiers.functions.supportVector.RegSMOImproved -L 0.001 -W 1 -P 1.0E-12 -T 0.001 -V" -K "weka.classifiers.functions.supportVector.PolyKernel -C 250007 -E 1.0"', 'LinearRegression': 'weka.classifiers.functions.LinearRegression -S 0 -R 1.0E-8', 'GaussianProcesses': 'weka.classifiers.functions.GaussianProcesses -L 1.0 -N 0 -K "weka.classifiers.functions.supportVector.RBFKernel -C 250007 -G 1.0"', } cls = from_commandline(cmd[option], classname='weka.classifiers.Classifier') cls.build_classifier(dataset) # begin evaluating evaluation = Evaluation(dataset) # evaluation.evaluate_train_test_split(cls, dataset, 90, Random(1)) # evaluate by splitting train/test set evl = evaluation.test_model(cls, dataset) # evaluate on test set print('predictions (' + str(len(evl)) + '): ') for i in range(num): print(evl[i - num], end=' ') # print(evaluation.summary()) return evl[-num:]
def train_trees(data, attributes): clfs = [] evls = [] dt_y_hat = [] unused_attributes = [] for i, att in enumerate(attributes): data.class_index = i count_non_nans = np.count_nonzero(~np.isnan(data.values(i))) if count_non_nans < 5: unused_attributes.append(i) print('Not using attribute {}, only {} real values\n\n'.format( att, count_non_nans)) clfs.append(None) evls.append(None) dt_y_hat.append(None) continue this_clf = Classifier(classname='weka.classifiers.trees.J48', options=['-U', '-B', '-M', '2']) this_clf.build_classifier(data) this_evl = Evaluation(data) this_evl.crossvalidate_model(this_clf, data, 5, Random(1)) dt_y_hat.append(this_clf.distributions_for_instances(data)) clfs.append(this_clf) evls.append(this_evl) return clfs, evls, dt_y_hat, unused_attributes
def obtainBayesNet(file): #The path of the arff extension file must be put. data = converters.load_any_file(folderPathOfArffFiles + file + ".arff") #In the case of this specific data set, the first two attributes were removed since they # represent the name and ranking which are unique values that would affect the classification. # Depending on the data set, certain attributes must be removed. remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "1-2"]) remove.inputformat(data) data = remove.filter(data) #It is specified that the class value is the last attribute. data.class_is_last() #Define the classifier to be used. classifier = Classifier(classname="weka.classifiers.bayes.BayesNet") evaluation = Evaluation(data) evaluation.crossvalidate_model(classifier, data, kFold, Random(42)) #The ROC-AUC is extracted from the string that is received from Weka. info = evaluation.class_details() roc_area = float(info[406:411]) return roc_area
data_dir = "\\\\egr-1l11qd2\\CLS_lab\\Junya Zhao\\Data driven model _paper [June 25_2018\\FeatureSelection\\EvlSearch\\" globbed_files = glob.glob(data_dir + "*.csv") for csv in globbed_files: data = converters.load_any_file(csv) data.class_is_last() search = ASSearch(classname="weka.attributeSelection.EvolutionarySearch", options=[ "-population-size", "200", "-generations", "500", "-crossover-probability", "0.6" ]) evaluator = ASEvaluation(classname="weka.attributeSelection.CfsSubsetEval", options=["-P", "1", "E", "1"]) attsel = AttributeSelection() attsel.folds(10) attsel.crossvalidation(True) attsel.seed(1) attsel.search(search) attsel.evaluator(evaluator) attsel.select_attributes(data) evl = Evaluation(data) print("# attributes: " + str(attsel.number_attributes_selected)) print("attributes: " + str(attsel.selected_attributes)) print("result string:\n" + attsel.results_string) print(evl) # write the report for each file with open(f"{csv}._report.csv", "a") as outfile: outfile.write(attsel.results_string) #with open(f"{csv}._label.txt","a") as output: #output.write(str(attsel.selected_attributes)) jvm.stop()
dataSet20x20 = loader.load_file("trainingSet/dataSet20x20.arff") dataSet20x20.class_is_last() dataSet20x50 = loader.load_file("trainingSet/dataSet20x50.arff") dataSet20x50.class_is_last() dataSet50x20 = loader.load_file("trainingSet/dataSet50x20.arff") dataSet50x20.class_is_last() classifier1 = Classifier(classname="weka.classifiers.functions.MultilayerPerceptron", options=["-L", "0.3", "-M", "0.2", "-N", "500", "-V", "0", "-S", "0", "-E", "20", "-H", "9"]) classifier2 = Classifier(classname="weka.classifiers.functions.MultilayerPerceptron", options=["-L", "0.3", "-M", "0.2", "-N", "500", "-V", "0", "-S", "0", "-E", "20", "-H", "11"]) classifier3 = Classifier(classname="weka.classifiers.functions.MultilayerPerceptron", options=["-L", "0.3", "-M", "0.2", "-N", "500", "-V", "0", "-S", "0", "-E", "20", "-H", "9"]) print "\n\nTraining neural network 1" evaluation1 = Evaluation(dataSet20x20) evaluation1.crossvalidate_model(classifier1, dataSet20x20, 10, Random(42)) classifier1.build_classifier(dataSet20x20) serialization.write("trainingSet/nn1.model", classifier1) print "\n\n====================================================== NUERAL NETWORK 1 ======================================================" print(evaluation1.summary()) print(evaluation1.class_details()) print "Training neural network 2" evaluation2 = Evaluation(dataSet20x50) evaluation2.crossvalidate_model(classifier2, dataSet20x50, 10, Random(42)) classifier2.build_classifier(dataSet20x50) serialization.write("trainingSet/nn2.model", classifier2) print "\n\n====================================================== NUERAL NETWORK 2 ======================================================" print(evaluation2.summary()) print(evaluation2.class_details())
print(group) train = data_dir + os.sep + group + "_Cal.arff" test = data_dir + os.sep + group + "_Test.arff" pred = data_dir + os.sep + group + "_Val.arff" loader = Loader(classname="weka.core.converters.ArffLoader") print(train) train_data = loader.load_file(train) train_data.class_index = train_data.attribute_by_name( "reference value").index print(test) test_data = loader.load_file(test) test_data.class_index = test_data.attribute_by_name( "reference value").index print(pred) pred_data = loader.load_file(pred) pred_data.class_index = pred_data.attribute_by_name( "reference value").index cls = FilteredClassifier() cls.classifier = Classifier( classname="weka.classifiers.functions.LinearRegression", options=["-S", "1", "-C"]) cls.filter = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "first"]) cls.build_classifier(train_data) evl = Evaluation(train_data) evl.test_model(cls, test_data) print(evl.summary()) jvm.stop()