def run_with_classifier(self, cls, data_for_folds): """ Perform cross-validation with one classifier. @param data_for_folds: Contains one list of True/False values for each of the folds to be run. Each list states for every item of the dataset, whether the item is in the current fold part of the training dataset or the test dataset. @param cls: Classifier to use in the experiment. @return: Measurements for quality and runtime metrics. """ runtimes = [] quality = [] for train, test in data_for_folds: #get the training and testing data for this fold data_train, data_test = self.dataset.data[ train], self.dataset.data[test] target_train, target_test = self.dataset.target[ train], self.dataset.target[test] #perform training train_time = datetime.now() cls = cls.fit(data_train, target_train) train_time = delta_in_ms(datetime.now() - train_time) #apply the classifier on the test data test_time = datetime.now() recommendations = cls.predict(data_test) test_time = delta_in_ms(datetime.now() - test_time) #add measurements for this replication to result collection runtimes.append({ "Training time": train_time, "Overall testing time": test_time, "Individual testing time": test_time / float(len(data_test)) }) quality.append( QualityMetricsCalculator(target_test, recommendations).calculate()) #calculate statistics over all replications return self.calculate_quality_stats( cls.name, quality), self.calculate_runtime_stats(cls.name, runtimes)
from evaluation import plot from evaluation.metrics import QualityMetricsCalculator import config #configuration data = load_dataset("../datasets/houseA.csv", "../datasets/houseA.config") classifiers = [ NaiveBayesClassifier(data.features, data.target_names), TemporalEvidencesClassifier(data.features, data.target_names) ] #run the experiment using full dataset as training and as test data results = [] for cls in classifiers: cls = cls.fit(data.data, data.target) r = cls.predict(data.data) r = QualityMetricsCalculator(data.target, r) results.append(r.true_positives_for_all()) #want for each classifier result only the measurements for cutoff=1 results = [r.loc[1] for r in results] results = pandas.concat(results, axis=1) results.columns = [cls.name for cls in classifiers] plot_conf = plot.plot_config(config.plot_directory, sub_dirs=[data.name], prefix="histogram_classifiers", img_type=config.img_type) plot.comparison_histogram(results, plot_conf) print "Results can be found in the \"%s\" directory" % config.plot_directory
sys.path.append("..") import pandas from recsys.classifiers.temporal import TemporalEvidencesClassifier from recsys.dataset import load_dataset from evaluation.metrics import QualityMetricsCalculator # configuration data = load_dataset("../datasets/houseA.csv", "../datasets/houseA.config") # data = load_dataset("../datasets/houseB.csv", "../datasets/houseB.config") # run the classifier on the whole dataset and calculate confusion matrix cls = TemporalEvidencesClassifier(data.features, data.target_names) cls = cls.fit(data.data, data.target) results = cls.predict(data.data) matrix = QualityMetricsCalculator(data.target, results).confusion_matrix() # format confusion matrix for pretty printing letters = list(map(chr, list(range(97, 123)))) + list(map(chr, list(range(65, 91)))) action_to_letter = {action: letter for action, letter in zip(matrix.index, letters)} matrix.columns = [action_to_letter[action] for action in matrix.columns] matrix.index = ["(%s) %s" % (action_to_letter[action], action) for action in matrix.index] matrix.index.name = "Actual action" pandas.set_option("expand_frame_repr", False) pandas.set_option("max_columns", 40) print matrix
import pandas from recsys.classifiers.temporal import TemporalEvidencesClassifier from recsys.classifiers.bayes import NaiveBayesClassifier from recsys.dataset import load_dataset from evaluation import plot from evaluation.metrics import QualityMetricsCalculator import config #configuration data = load_dataset("../datasets/houseA.csv", "../datasets/houseA.config") to_compare = [1, 2, 3, 4] #run classifier and count true positives cls = TemporalEvidencesClassifier(data.features, data.target_names) cls = cls.fit(data.data, data.target) results = cls.predict(data.data) results = QualityMetricsCalculator(data.target, results).true_positives_for_all() #only use the interesting cutoffs results = results.transpose()[to_compare] results.columns = ["cutoff=%s" % c for c in results.columns] conf = plot.plot_config(config.plot_directory, sub_dirs=[data.name], prefix="histogram_cutoffs", img_type=config.img_type) plot.comparison_histogram(results, conf) print "Results can be found in the \"%s\" directory" % config.plot_directory
from recsys.classifiers.temporal import TemporalEvidencesClassifier from recsys.classifiers.bayes import NaiveBayesClassifier from recsys.dataset import load_dataset from evaluation import plot from evaluation.metrics import QualityMetricsCalculator import config #configuration data = load_dataset("../datasets/houseA.csv", "../datasets/houseA.config") classifiers = [NaiveBayesClassifier(data.features, data.target_names), TemporalEvidencesClassifier(data.features, data.target_names)] #run the experiment using full dataset as training and as test data results = [] for cls in classifiers: cls = cls.fit(data.data, data.target) r = cls.predict(data.data) r = QualityMetricsCalculator(data.target, r) results.append(r.true_positives_for_all()) #want for each classifier result only the measurements for cutoff=1 results = [r.loc[1] for r in results] results = pandas.concat(results, axis=1) results.columns = [cls.name for cls in classifiers] plot_conf = plot.plot_config(config.plot_directory, sub_dirs=[data.name], prefix="histogram_classifiers", img_type=config.img_type) plot.comparison_histogram(results, plot_conf) print "Results can be found in the \"%s\" directory" % config.plot_directory
import pandas from recsys.classifiers.temporal import TemporalEvidencesClassifier from recsys.dataset import load_dataset from evaluation.metrics import QualityMetricsCalculator #configuration data = load_dataset("../datasets/houseA.csv", "../datasets/houseA.config") #data = load_dataset("../datasets/houseB.csv", "../datasets/houseB.config") #run the classifier on the whole dataset and calculate confusion matrix cls = TemporalEvidencesClassifier(data.features, data.target_names) cls = cls.fit(data.data, data.target) results = cls.predict(data.data) matrix = QualityMetricsCalculator(data.target, results).confusion_matrix() #format confusion matrix for pretty printing letters = list(map(chr, list(range(97, 123)))) + list( map(chr, list(range(65, 91)))) action_to_letter = { action: letter for action, letter in zip(matrix.index, letters) } matrix.columns = [action_to_letter[action] for action in matrix.columns] matrix.index = [ "(%s) %s" % (action_to_letter[action], action) for action in matrix.index ] matrix.index.name = "Actual action" pandas.set_option('expand_frame_repr', False)
import sys sys.path.append("..") import pandas from recsys.classifiers.temporal import TemporalEvidencesClassifier from recsys.classifiers.bayes import NaiveBayesClassifier from recsys.dataset import load_dataset from evaluation import plot from evaluation.metrics import QualityMetricsCalculator import config #configuration data = load_dataset("../datasets/houseA.csv", "../datasets/houseA.config") to_compare = [1, 2, 3, 4] #run classifier and count true positives cls = TemporalEvidencesClassifier(data.features, data.target_names) cls = cls.fit(data.data, data.target) results = cls.predict(data.data) results = QualityMetricsCalculator(data.target, results).true_positives_for_all() #only use the interesting cutoffs results = results.transpose()[to_compare] results.columns = ["cutoff=%s" % c for c in results.columns] conf = plot.plot_config(config.plot_directory, sub_dirs=[data.name], prefix="histogram_cutoffs", img_type=config.img_type) plot.comparison_histogram(results, conf) print "Results can be found in the \"%s\" directory" % config.plot_directory