def load_training_set(self, filename, encoding='UTF-8'): """\ Load the given training data set into memory and strip it if configured to via the train_part parameter. """ log_info('Loading training data set from ' + str(filename) + '...') train = DataSet() train.load_from_arff(filename, encoding) if self.train_part < 1: train = train.subset(0, int(round(self.train_part * len(train))), copy=False) return train
def evaluate(self, test_file, encoding='UTF-8', classif_file=None): """\ Evaluate on the given test data file. Return accuracy. If classif_file is set, save the classification results to this file. """ test = DataSet() test.load_from_arff(test_file, encoding) values = self.classify(test) golden = self.get_classes(test, dtype=None) if classif_file is not None: classif = DataSet() classif.load_from_vect(test.get_attrib(self.class_attr), values) classif.rename_attrib(self.class_attr, self.PREDICTED) test.merge(classif) test.save_to_arff(classif_file, encoding) return zero_one_score(golden, values)