Exemple #1
0
def test_weka_classifier(clf, train, test):

    clf.build_classifier(train)

    evl = Evaluation(train)
    evl.test_model(clf, test)

    acc = evl.percent_correct
    auc = evl.weighted_area_under_roc
    err = evl.error_rate
    log = evl.sf_mean_scheme_entropy

    print(
        "# testing  | loss: {:.2}, accuracy: {:.4}, AUC: {:.2}, error: {:.2}".
        format(log, acc, auc, err))

    return {'loss': log, 'accuracy': acc, 'auc': auc, 'err': err}
Exemple #2
0
    def run_bayes_hill_split(self, output_directory, parents=1):
        # build classifier
        print("\nBuilding Bayes Classifier on training data. Parents = " +
              str(parents) + "\n")
        buildTimeStart = time.time()
        cls = Classifier(
            classname="weka.classifiers.bayes.BayesNet",
            options=[
                "-D", "-Q",
                "weka.classifiers.bayes.net.search.local.HillClimber", "--",
                "-P", "" + str(parents), "-S", "BAYES", "-E",
                "weka.classifiers.bayes.net.estimate.SimpleEstimator", "--",
                "-A", "0.5"
            ])
        cls.build_classifier(self.training_data)

        resultsString = ""
        resultsString = self.print_both(str(cls), resultsString)

        buildTimeString = "Bayes Split Classifier Built in " + str(
            time.time() - buildTimeStart) + " secs.\n"
        resultsString = self.print_both(buildTimeString, resultsString)

        #Evaluate Classifier
        resultsString = self.print_both("\nEvaluating on test data.",
                                        resultsString)

        buildTimeStart = time.time()
        evl = Evaluation(self.training_data)
        evl.test_model(cls, self.testing_data)

        resultsString = self.print_both(str(evl.summary()), resultsString)
        resultsString = self.print_both(str(evl.class_details()),
                                        resultsString)
        resultsString = self.print_both(str(evl.confusion_matrix),
                                        resultsString)
        buildTimeString = "\nBayes Split Classifier Evaluated in " + str(
            time.time() - buildTimeStart) + " secs.\n"
        resultsString = self.print_both(buildTimeString, resultsString)

        #Save Results and Cleanup
        self.save_results("Bayes_Hill_P" + str(parents) + "_", resultsString,
                          output_directory)
        self.save_results("Bayes_Hill_P" + str(parents) + "_Graph", cls.graph,
                          output_directory, True)
Exemple #3
0
    def predict(self, X):
        evaluation = Evaluation(self.train_data)

        # Add class column (we can't copy X, because this is a large object, so we add the column and remove it later)
        X['class'] = None

        filename = self.to_arff(X, True)

        # Remove class column
        del X['class']

        loader = Loader("weka.core.converters.ArffLoader")
        test_data = loader.load_file(filename)
        test_data.class_is_last()

        preds = evaluation.test_model(self.classifier, test_data)

        return preds
Exemple #4
0
    def run_crossval(self, output_directory, classifier_name,
                     classifier_weka_spec, options_list):
        # build classifier
        print("\nBuilding " + classifier_name +
              " Classifier on training data.")
        buildTimeStart = time.time()
        cls = Classifier(classname=classifier_weka_spec, options=options_list)
        cls.build_classifier(self.training_data)

        resultsString = ""
        resultsString = self.print_both(str(cls), resultsString)

        buildTimeString = classifier_name + " Cross Eval Classifier Built in " + str(
            time.time() - buildTimeStart) + " secs.\n"
        resultsString = self.print_both(buildTimeString, resultsString)

        #Evaluate Classifier
        resultsString = self.print_both("\nCross Evaluating on test data.",
                                        resultsString)

        buildTimeStart = time.time()
        evl = Evaluation(self.training_data)
        evl.crossvalidate_model(cls, self.training_data, 10, Random(1))

        resultsString = self.print_both(str(evl.summary()), resultsString)
        resultsString += "\n"
        resultsString = self.print_both(str(evl.class_details()),
                                        resultsString)
        resultsString += "\n"
        resultsString = self.print_both(str(evl.confusion_matrix),
                                        resultsString)
        buildTimeString = "\n\n" + classifier_name + " Cross Eval Classifier Evaluated in " + str(
            time.time() - buildTimeStart) + " secs.\n"
        resultsString = self.print_both(buildTimeString, resultsString)

        options_string = ""
        for option in options_list:
            options_string = options_string + str(option)

        options_string = options_string.replace(".", "-")
        options_string = options_string.replace("-", "_")
        #Save Results and Cleanup
        self.save_results(classifier_name + options_string + "_Crossval",
                          resultsString, output_directory)
def ClassifyWithDT(f3, test, tree, fileOut):

    eval = Evaluation(f3)
    tree.build_classifier(f3)

    eval.test_model(tree, test)

    print("\n\nSelf-Training   data========" +
          str((1 - eval.error_rate) * 100) + " number of instances==" +
          str(f3.num_instances) + "\n")
    print("\n Error Rate==" + str(eval.error_rate) + "\n")

    print("\n     precision   recall     areaUnderROC            \n\n")
    for i in range(test.get_instance(0).num_classes):
        print(
            str(eval.precision(i)) + "  " + str(eval.recall(i)) + "  " +
            str(eval.area_under_roc(i)) + "\n")

    return eval
Exemple #6
0
def run():
    jvm.start()
    load_csv = Loader("weka.core.converters.CSVLoader")
    data_csv = load_csv.load_file(
        "/Users/imeiliasantoso/web_graduate_project5/register_page/bank-full_input.csv"
    )

    saver = Saver("weka.core.converters.ArffSaver")
    saver.save_file(
        data_csv,
        "/Users/imeiliasantoso/web_graduate_project5/register_page/bank-full_input.arff"
    )

    load_arff = Loader("weka.core.converters.ArffLoader")
    data_arff = load_arff.load_file(
        "/Users/imeiliasantoso/web_graduate_project5/register_page/bank-full_input.arff"
    )
    data_arff.class_is_last()

    cls = Classifier(classname="weka.classifiers.trees.J48",
                     options=["-C", "0.5"])
    cls.build_classifier(data_arff)
    for index, inst in enumerate(data_arff):
        pred = cls.classify_instance(inst)
        dist = cls.distribution_for_instance(inst)
        # save tree prune in txt file

    saveFile = open(
        "/Users/imeiliasantoso/web_graduate_project5/register_page/bank-full_input.txt",
        "w")
    saveFile.write(str(cls))
    # print(cls)
    saveFile.close()

    global j48
    J48_class = Classifier(classname="weka.classifiers.trees.J48",
                           options=["-C", "0.25", "-M", "2"])
    J48_class.build_classifier(data_arff)
    evaluationj48 = Evaluation(data_arff)
    evaluationj48.crossvalidate_model(J48_class, data_arff, 10, Random(100))
    j48 = str(evaluationj48.percent_correct)
    jvm.stop()
    return j48
def use_classifier(data):
    """
    Uses the meta-classifier AttributeSelectedClassifier for attribute selection.
    :param data: the dataset to use
    :type data: Instances
    """
    print("\n1. Meta-classifier")
    classifier = Classifier(classname="weka.classifiers.meta.AttributeSelectedClassifier")
    aseval = ASEvaluation(classname="weka.attributeSelection.CfsSubsetEval")
    assearch = ASSearch(classname="weka.attributeSelection.GreedyStepwise", options=["-B"])
    base = Classifier(classname="weka.classifiers.trees.J48")
    # setting nested options is always a bit tricky, getting all the escaped double quotes right
    # simply using the bean property for setting Java objects is often easier and less error prone
    classifier.set_property("classifier", base.jobject)
    classifier.set_property("evaluator", aseval.jobject)
    classifier.set_property("search", assearch.jobject)
    evaluation = Evaluation(data)
    evaluation.crossvalidate_model(classifier, data, 10, Random(1))
    print(evaluation.summary())
Exemple #8
0
def experiment_file_random(path_features, path_folder_save_results, options,
                           classifier, fold, random, name):
    print(name + "  Start: " + str(datetime.datetime.now()))
    time = datetime.datetime.now()
    cls = Classifier(classname=classifier,
                     options=weka.core.classes.split_options(options))
    d_results = {
        'percent_correct': [],
        'percent_incorrect': [],
        'precision': [],
        'recall': [],
        'f-score': [],
        'confusion_matrix': []
    }
    data = converters.load_any_file(path_features)
    data.class_is_last()
    pout = PredictionOutput(
        classname="weka.classifiers.evaluation.output.prediction.CSV")
    evl = Evaluation(data)
    evl.crossvalidate_model(cls, data, fold, Random(random), pout)
    d_results['percent_correct'].append(evl.percent_correct)
    d_results['percent_incorrect'].append(evl.percent_incorrect)
    d_results['precision'].append(evl.precision(1))
    d_results['recall'].append(evl.recall(1))
    d_results['f-score'].append(evl.f_measure(1))
    d_results['confusion_matrix'].append(
        evl.matrix())  # Generates the confusion matrix.

    d_results = pd.DataFrame(data=d_results)

    d_results.to_csv(path_folder_save_results + '/' + str(name) + '.csv',
                     index=False)

    save = pout.buffer_content()

    check_folder_or_create(path_folder_save_results + '/' + 'prediction')

    with open(
            path_folder_save_results + '/' + 'prediction/' + str(name) +
            '.csv', 'w') as f:
        f.write(save)
    print(name + "  End: " + str(datetime.datetime.now() - time))
Exemple #9
0
    def executeKFoldClassifier(self, featureInclusion, kFold):
        deleteFeatures = 0
        for i in range(0, len(featureInclusion)):
            if featureInclusion[i]:
                self.instances.deleteAttributeAt(i - deleteFeatures)
                deleteFeatures += 1
        self.instances.setClassIndex(self.instances.numAttributes - 1)

        cvParameterSelection = javabridge.make_instance(
            "weka/classifiers/meta/CVParameterSelection", "()V")
        javabridge.call(cvParameterSelection, "setNumFolds", "(I)V", kFold)
        javabridge.call(cvParameterSelection,
                        "buildClassifier(weka/core/Instances)V",
                        self.instances)

        eval = Evaluation(self.instances)
        eval.crossvalidate_model(cvParameterSelection, self.instances, kFold,
                                 random())

        return eval.percent_correct()
Exemple #10
0
def runBayes(file, bound):
    loader = Loader(classname="weka.core.converters.CSVLoader")
    data = loader.load_file(file)
    data.class_is_first()

    remove = Filter(classname="weka.filters.unsupervised.attribute.Remove",
                    options=["-R", bound])
    cls = Classifier(classname="weka.classifiers.bayes.NaiveBayes")

    remove.inputformat(data)
    filtered = remove.filter(data)

    evl = Evaluation(filtered)
    evl.crossvalidate_model(cls, filtered, 10, Random(1))

    print(evl.percent_correct)
    #print(evl.summary())
    result = evl.class_details()
    print(result)
    return result
Exemple #11
0
def HOV(dataset,  algo, num_datasets):
	#Executing HOV \_*-*_/

	loader = Loader(classname="weka.core.converters.ArffLoader")
	data = loader.load_file(dataset)
	data.class_is_last()

	train, test = data.train_test_split(70.0, Random(10))

	cls = Classifier(classname=algo)
	cls.build_classifier(train)

	evl = Evaluation(train)
	evl.test_model(cls, test)

	print(evl.summary("=== " +str(algo)+ " on" + str(dataset) + " ===",False))
        print(evl.matrix("=== on click prediction(confusion matrix) ==="))
	print("For Algo"+ str(algo)+"areaUnderROC/1: for HOV " + str(evl.area_under_roc(1)))

	return evl.area_under_roc(1)
Exemple #12
0
    def crossValidate(self,
                      arrfFile=None,
                      classname="weka.classifiers.trees.J48",
                      options=["-C", "0.3"]):

        if arrfFile is not None:
            self.initData(arrfFile)

        if self.data is None:
            return

        print 'Classificador ' + str(classname) + ' ' + ' '.join(options)
        cls = Classifier(classname=classname, options=options)

        evl = Evaluation(self.data)
        evl.crossvalidate_model(cls, self.data, 10, Random(1))

        print(evl.percent_correct)
        print(evl.summary())
        print(evl.class_details())
Exemple #13
0
def CV10(dataset, algo):
    print "inside 10cv"
    print("dataset ----" + dataset)
    print("algorithm ----" + algo)

    #Executing 10FCV

    #	jvm.start(packages=True)
    loader = Loader(classname="weka.core.converters.ArffLoader")
    data = loader.load_file(dataset)
    data.class_is_last()

    #print(data)

    cls = Classifier(classname=algo)

    evl = Evaluation(data)
    evl.crossvalidate_model(cls, data, 2, Random(5))

    print("areaUnderROC/1: " + str(evl.area_under_roc(1)))
Exemple #14
0
def train_and_eval_weka_classifier(clf, train, valid, n_instances):

    # total_inst = train.num_instances

    total_train_inst = train.num_instances

    percentage = (n_instances * 100) / total_train_inst

    if percentage == 100:
        opt = train
    else:
        opt, extra = train.train_test_split(percentage, Random(1))

    # inst_train2 = train2.num_instances

    print('total_train_inst:    ', total_train_inst, '| percentage:    ',
          percentage, '| used_inst:     ', opt.num_instances)

    import signal

    class AlarmException(Exception):
        pass

    def alarmHandler(signum, frame):
        raise AlarmException

    clf.build_classifier(opt)

    evl = Evaluation(opt)
    evl.test_model(clf, valid)

    acc = evl.percent_correct
    auc = evl.weighted_area_under_roc
    err = evl.error_rate
    log = evl.sf_mean_scheme_entropy

    print(
        "# validating  | loss: {:.2}, accuracy: {:.4}, AUC: {:.2}, error: {:.2}"
        .format(log, acc, auc, err))

    return {'loss': log, 'accuracy': acc, 'auc': auc, 'err': err}
def TrainingModel(arff, modelOutput, clsfier):
    # 启动java虚拟机
    jvm.start()
    # 导入训练集
    loader = Loader(classname="weka.core.converters.ArffLoader")
    train = loader.load_file(arff)
    train.class_is_first()
    # 使用RandomForest算法进行训练,因为在GUI版本weka中使用多种方式训练后发现此方式TPR与TNR较高
    cls_name = "weka.classifiers." + clsfier
    clsf = Classifier(classname=cls_name)
    clsf.build_classifier(train)
    print(clsf)
    # 建立模型
    fc = FilteredClassifier()
    fc.classifier = clsf
    evl = Evaluation(train)
    evl.crossvalidate_model(fc, train, 10, Random(1))
    print(evl.percent_correct)
    print(evl.summary())
    print(evl.class_details())
    print(evl.matrix())
    # 结果统计
    matrixResults = evl.confusion_matrix
    TN = float(matrixResults[0][0])
    FP = float(matrixResults[0][1])
    FN = float(matrixResults[1][0])
    TP = float(matrixResults[1][1])
    TPR = TP / (TP + FN)
    TNR = TN / (FP + TN)
    PPV = TP / (TP + FP)
    NPV = TN / (TN + FN)
    print("算法: " + clsfier)
    print("敏感度 TPR: " + str(TPR))
    print("特异度 TNR: " + str(TNR))
    print("PPV: " + str(PPV))
    print("NPV: " + str(NPV))
    # 保存模型
    clsf.serialize(modelOutput, header=train)
    # 退出虚拟机
    jvm.stop()
    print("分析模型建立完成")
Exemple #16
0
def HOV(dataset, algo):
    print "inside hov"
    print("dataset ----" + dataset)
    print("algorithm ----" + algo)

    #Executing HOV \_*-*_/

    #	jvm.start(packages=True)
    loader = Loader(classname="weka.core.converters.ArffLoader")
    data = loader.load_file(dataset)
    data.class_is_last()

    train, test = data.train_test_split(70.0, Random(10))

    cls = Classifier(classname=algo)
    cls.build_classifier(train)

    evl = Evaluation(train)
    evl.test_model(cls, test)

    return (str(evl.area_under_roc(1)))
Exemple #17
0
def SMOreg():
    loader = Loader(classname="weka.core.converters.ArffLoader")
    data = loader.load_file("First_trial_regression.arff")
    data.class_is_last()

    cls = KernelClassifier(classname="weka.classifiers.functions.SMOreg",
                           options=["-N", "0"])
    kernel = Kernel(
        classname="weka.classifiers.functions.supportVector.RBFKernel",
        options=["-G", "0.2"])
    cls.kernel = kernel
    pout = PredictionOutput(
        classname="weka.classifiers.evaluation.output.prediction.PlainText")
    evl = Evaluation(data)
    evl.crossvalidate_model(cls, data, 10, Random(486), pout)

    print(evl.summary())
    print(pout.buffer_content())

    # save model
    serialization.write_all("SMOreg.model2", cls)
Exemple #18
0
def weka_bayesnet(filearffpath='data/datatobayes.arff'):
    """Simple calling of the bayesian network from python.
    """
    #Preparing the data
    loader = Loader(classname="weka.core.converters.ArffLoader")
    data = loader.load_file('data/datatobayes.arff')
    #data = loader.load_file('data/Full.arff')
    remove = Filter(classname="weka.filters.unsupervised.attribute.Remove",
                    options=["-R", "first"])
    remove.inputformat(data)
    filtered = data  #remove.filter(data)

    #Classifier test
    from weka.classifiers import Classifier, Evaluation
    from weka.core.classes import Random
    filtered.class_is_last()
    classifier = Classifier(classname="weka.classifiers.bayes.BayesNet",
                            options=['-D'])  #
    evaluation = Evaluation(filtered)
    evaluation.crossvalidate_model(classifier, filtered, 10, Random(42))
    return evaluation.area_under_roc(class_index=0)  #ROC, no std of kfold
def DecisionTree(rnd_data, folds, seed, data):

    data_size = rnd_data.num_instances
    fold_size = math.floor(data_size / folds)

    # cross-validation
    evaluation = Evaluation(rnd_data)
    for i in range(folds):
        this_fold = fold_size
        test_start = i * fold_size
        test_end = (test_start + fold_size)
        if ((data_size - test_end) / fold_size < 1):
            this_fold = data_size - test_start
        test = Instances.copy_instances(rnd_data, test_start,
                                        this_fold)  # generate validation fold
        if i == 0:
            train = Instances.copy_instances(rnd_data, test_end,
                                             data_size - test_end)
        else:
            train_1 = Instances.copy_instances(rnd_data, 0, test_start)
            train_2 = Instances.copy_instances(rnd_data, test_end,
                                               data_size - test_end)
            train = Instances.append_instances(
                train_1, train_2)  # generate training fold

        # build and evaluate classifier
        cls = Classifier(classname="weka.classifiers.trees.J48")
        cls.build_classifier(train)  # build classifier on training set
        evaluation.test_model(cls,
                              test)  # test classifier on validation/test set

    print("")
    print("=== Decision Tree ===")
    print("Classifier: " + cls.to_commandline())
    print("Dataset: " + data.relationname)
    print("Folds: " + str(folds))
    print("Seed: " + str(seed))
    print("")
    print(
        evaluation.summary("=== " + str(folds) + "-fold Cross-Validation ==="))
Exemple #20
0
def run_ibk(file):
    # Get filename from Pathlib object
    filename = file.parts[-1]
    dir = file.parents[0]

    print("Running IBk on %s" % filename)

    if not filename.endswith(".arff"):
        print("%s not ARFF file." % filename)
        return

    # Removes '.arff' from filename
    filename_base = filename[:-5]

    # Load data with class as first attr
    data = load_Arff_file(file)
    data.class_is_first()

    # Use IBk and set options
    cls = Classifier(classname="weka.classifiers.lazy.IBk",
                     options=["-K", "3"])
    # print(cls.options)

    # Predictions stored in pout
    pout = PredictionOutput(
        classname="weka.classifiers.evaluation.output.prediction.PlainText")

    # Evaluate data
    evaluation = Evaluation(data)
    evaluation.crossvalidate_model(cls, data, 10, Random(1), output=pout)

    # Save summary, class details and confusion matrix to file
    result_output = filename_base + "_eval_results.txt"
    output_eval(evaluation, dir / result_output)

    # Save the predicited results to file
    prediction_output = filename_base + "_pred_results.txt"
    output_pred(pout, dir / prediction_output)

    print("IBk complete")
Exemple #21
0
def evaluate(classifier, data):
    """
    Private function that makes evaluation of classifier on
    given data. With command line arguments we can chose which 
    evaluation to use.

    :param classifier: Classifier
    :param data: weka arff data
    :return: Evaluation
    """
    args = evaluate_parser()
    evaluation = Evaluation(data)
    if args['evaluation'] == 'train_test':
        evaluation.evaluate_train_test_split(classifier, data,
                                             int(args['train_size']),
                                             Random(1))
    elif args['evaluation'] == 'cross_validate':
        evaluation.crossvalidate_model(classifier, data, int(args['folds']),
                                       Random(42))
    else:
        evaluation.test_model(classifier, data)
    return evaluation
	def training(self):
		# Preparação dos dados
		self.imp = Imputation(self.data)

		# Seleciona as caracteristicas
		self.features = FeatureSelection(self.imp.imputed_data)
		data_selected = self.features.data_selected
		self.selected_features = self.features.selected_features

		# Encontra os padrões ausentes
		self.missing_patterns = MissingPatterns(self.data, self.selected_features).missing_patterns

		# Realiza o treinamento dos classificadores
		#print('test train')
		for mpi in self.missing_patterns:

			# Seleciona as caracteristicas
			cpi = set(self.selected_features) - set(mpi)
			data_temp = Instances.copy_instances(data_selected, from_row=0, num_rows=data_selected.num_instances)
			data_temp.class_is_last()

			# Separa os dados de treinamento
			data_temp = self.reduceData(data_temp, cpi, self.data)

			
			# Treina os classificadores com os dados imputados
			classifier = Classifier(classname=self.learn_class, options=self.options)
			classifier.build_classifier(data_temp)
			
			#print(classifier.distribution_for_instance(data_selected.get_instance(30)))
			

			#!!!!!! Verica o peso de cada classificador (sua acuracia de classificação)
			evl = Evaluation(data_temp)
			evl.crossvalidate_model(classifier, data_temp, 15, Random(1))

			# Adiciona os classificadores treinados ao conjunto de classificadores
			my_classifier = MyClassifier(classifier, cpi, 1 - evl.mean_absolute_error)
			self.classifiers.add(my_classifier)
    def crossEvaluate(self):
        """
        Evaluate classifier using cross-validation using K folds
        :return:
        """
        if self.classifierInstance is not None:
            print '[Cross-validate data]'

            try:
                # Cross validation evaluation
                evaluatorInstance = Evaluation(self.classificationData)
                evaluatorInstance.crossvalidate_model(self.classifierInstance,
                                                      self.classificationData,
                                                      self.evaluationNumFolds,
                                                      Random(1))

                # Store evaluation results
                self.setEvaluationResults(evaluatorInstance)
                return True
            except:
                return False
        return False
Exemple #24
0
    def run_ibk_crossval(self, output_directory):
        # build classifier
        print("\nBuilding Classifier on training data.")
        buildTimeStart = time.time()
        cls = Classifier(
            classname="weka.classifiers.lazy.IBk",
            options=[
                "-K", "3", "-W", "0", "-A",
                "weka.core.neighboursearch.LinearNNSearch -A \"weka.core.EuclideanDistance -R first-last\""
            ])
        cls.build_classifier(self.training_data)

        resultsString = ""
        resultsString = self.print_both(str(cls), resultsString)

        buildTimeString = "IBK Cross Eval Classifier Built in " + str(
            time.time() - buildTimeStart) + " secs.\n"
        resultsString = self.print_both(buildTimeString, resultsString)

        #Evaluate Classifier
        resultsString = self.print_both("\nCross Evaluating on test data.",
                                        resultsString)

        buildTimeStart = time.time()
        evl = Evaluation(self.training_data)
        evl.crossvalidate_model(cls, self.training_data, 10, Random(1))

        resultsString = self.print_both(str(evl.summary()), resultsString)
        resultsString = self.print_both(str(evl.class_details()),
                                        resultsString)
        resultsString = self.print_both(str(evl.confusion_matrix),
                                        resultsString)
        buildTimeString = "\nIBK Cross Eval Classifier Evaluated in " + str(
            time.time() - buildTimeStart) + " secs.\n"
        resultsString = self.print_both(buildTimeString, resultsString)

        #Save Results and Cleanup
        self.save_results("IBK_Crossval", resultsString, output_directory)
Exemple #25
0
def train(option, sym, num):
    # load dataset given the symbol
    path = os.path.join('HistSet', 'histSet_%s.arff' % sym)
    loader = Loader("weka.core.converters.ArffLoader")
    dataset = loader.load_file(path)
    dataset.class_is_last()  # set the last attribute as class attribute

    # load testset
    # testset = loader.load_file(os.path.join('HistSet', 'testSet_LTC.arff'))
    # testset.class_is_last()

    # define classifier
    cmd = {
        'DecisionTable':
        'weka.classifiers.rules.DecisionTable -X 1 -S "weka.attributeSelection.BestFirst -D 1 -N 5"',
        'SMOreg':
        'weka.classifiers.functions.SMOreg -C 1.0 -N 0 -I "weka.classifiers.functions.supportVector.RegSMOImproved -L 0.001 -W 1 -P 1.0E-12 -T 0.001 -V" -K "weka.classifiers.functions.supportVector.PolyKernel -C 250007 -E 1.0"',
        'LinearRegression':
        'weka.classifiers.functions.LinearRegression -S 0 -R 1.0E-8',
        'GaussianProcesses':
        'weka.classifiers.functions.GaussianProcesses -L 1.0 -N 0 -K "weka.classifiers.functions.supportVector.RBFKernel -C 250007 -G 1.0"',
    }

    cls = from_commandline(cmd[option],
                           classname='weka.classifiers.Classifier')
    cls.build_classifier(dataset)

    # begin evaluating
    evaluation = Evaluation(dataset)

    # evaluation.evaluate_train_test_split(cls, dataset, 90, Random(1))   # evaluate by splitting train/test set
    evl = evaluation.test_model(cls, dataset)  # evaluate on test set
    print('predictions (' + str(len(evl)) + '): ')
    for i in range(num):
        print(evl[i - num], end=' ')
    # print(evaluation.summary())
    return evl[-num:]
def train_trees(data, attributes):

    clfs = []
    evls = []
    dt_y_hat = []

    unused_attributes = []

    for i, att in enumerate(attributes):

        data.class_index = i

        count_non_nans = np.count_nonzero(~np.isnan(data.values(i)))

        if count_non_nans < 5:

            unused_attributes.append(i)
            print('Not using attribute {}, only {} real values\n\n'.format(
                att, count_non_nans))
            clfs.append(None)
            evls.append(None)
            dt_y_hat.append(None)
            continue

        this_clf = Classifier(classname='weka.classifiers.trees.J48',
                              options=['-U', '-B', '-M', '2'])
        this_clf.build_classifier(data)

        this_evl = Evaluation(data)
        this_evl.crossvalidate_model(this_clf, data, 5, Random(1))

        dt_y_hat.append(this_clf.distributions_for_instances(data))
        clfs.append(this_clf)
        evls.append(this_evl)

    return clfs, evls, dt_y_hat, unused_attributes
Exemple #27
0
def obtainBayesNet(file):
    #The path of the arff extension file must be put.
    data = converters.load_any_file(folderPathOfArffFiles + file + ".arff")

    #In the case of this specific data set, the first two attributes were removed since they
    #   represent the name and ranking which are unique values that would affect the classification.
    #   Depending on the data set, certain attributes must be removed.
    remove = Filter(classname="weka.filters.unsupervised.attribute.Remove",
                    options=["-R", "1-2"])
    remove.inputformat(data)
    data = remove.filter(data)
    #It is specified that the class value is the last attribute.
    data.class_is_last()

    #Define the classifier to be used.
    classifier = Classifier(classname="weka.classifiers.bayes.BayesNet")
    evaluation = Evaluation(data)
    evaluation.crossvalidate_model(classifier, data, kFold, Random(42))

    #The ROC-AUC is extracted from the string that is received from Weka.
    info = evaluation.class_details()
    roc_area = float(info[406:411])

    return roc_area
data_dir = "\\\\egr-1l11qd2\\CLS_lab\\Junya Zhao\\Data driven model _paper [June 25_2018\\FeatureSelection\\EvlSearch\\"
globbed_files = glob.glob(data_dir + "*.csv")
for csv in globbed_files:
    data = converters.load_any_file(csv)
    data.class_is_last()
    search = ASSearch(classname="weka.attributeSelection.EvolutionarySearch",
                      options=[
                          "-population-size", "200", "-generations", "500",
                          "-crossover-probability", "0.6"
                      ])
    evaluator = ASEvaluation(classname="weka.attributeSelection.CfsSubsetEval",
                             options=["-P", "1", "E", "1"])
    attsel = AttributeSelection()
    attsel.folds(10)
    attsel.crossvalidation(True)
    attsel.seed(1)
    attsel.search(search)
    attsel.evaluator(evaluator)
    attsel.select_attributes(data)
    evl = Evaluation(data)
    print("# attributes: " + str(attsel.number_attributes_selected))
    print("attributes: " + str(attsel.selected_attributes))
    print("result string:\n" + attsel.results_string)
    print(evl)
    # write the report for each file
    with open(f"{csv}._report.csv", "a") as outfile:
        outfile.write(attsel.results_string)
    #with open(f"{csv}._label.txt","a") as output:
    #output.write(str(attsel.selected_attributes))

jvm.stop()
dataSet20x20 = loader.load_file("trainingSet/dataSet20x20.arff")
dataSet20x20.class_is_last()

dataSet20x50 = loader.load_file("trainingSet/dataSet20x50.arff")
dataSet20x50.class_is_last()

dataSet50x20 = loader.load_file("trainingSet/dataSet50x20.arff")
dataSet50x20.class_is_last()

classifier1 = Classifier(classname="weka.classifiers.functions.MultilayerPerceptron", options=["-L", "0.3", "-M", "0.2", "-N", "500", "-V", "0", "-S", "0", "-E", "20", "-H", "9"])
classifier2 = Classifier(classname="weka.classifiers.functions.MultilayerPerceptron", options=["-L", "0.3", "-M", "0.2", "-N", "500", "-V", "0", "-S", "0", "-E", "20", "-H", "11"])
classifier3 = Classifier(classname="weka.classifiers.functions.MultilayerPerceptron", options=["-L", "0.3", "-M", "0.2", "-N", "500", "-V", "0", "-S", "0", "-E", "20", "-H", "9"])

print "\n\nTraining neural network 1"
evaluation1 = Evaluation(dataSet20x20)                   
evaluation1.crossvalidate_model(classifier1, dataSet20x20, 10, Random(42))
classifier1.build_classifier(dataSet20x20)
serialization.write("trainingSet/nn1.model", classifier1)
print "\n\n====================================================== NUERAL NETWORK 1 ======================================================"
print(evaluation1.summary())
print(evaluation1.class_details())

print "Training neural network 2"
evaluation2 = Evaluation(dataSet20x50) 
evaluation2.crossvalidate_model(classifier2, dataSet20x50, 10, Random(42))
classifier2.build_classifier(dataSet20x50)
serialization.write("trainingSet/nn2.model", classifier2)
print "\n\n====================================================== NUERAL NETWORK 2 ======================================================"
print(evaluation2.summary())
print(evaluation2.class_details())
Exemple #30
0
    print(group)
    train = data_dir + os.sep + group + "_Cal.arff"
    test = data_dir + os.sep + group + "_Test.arff"
    pred = data_dir + os.sep + group + "_Val.arff"

    loader = Loader(classname="weka.core.converters.ArffLoader")
    print(train)
    train_data = loader.load_file(train)
    train_data.class_index = train_data.attribute_by_name(
        "reference value").index
    print(test)
    test_data = loader.load_file(test)
    test_data.class_index = test_data.attribute_by_name(
        "reference value").index
    print(pred)
    pred_data = loader.load_file(pred)
    pred_data.class_index = pred_data.attribute_by_name(
        "reference value").index

    cls = FilteredClassifier()
    cls.classifier = Classifier(
        classname="weka.classifiers.functions.LinearRegression",
        options=["-S", "1", "-C"])
    cls.filter = Filter(classname="weka.filters.unsupervised.attribute.Remove",
                        options=["-R", "first"])
    cls.build_classifier(train_data)
    evl = Evaluation(train_data)
    evl.test_model(cls, test_data)
    print(evl.summary())

jvm.stop()