コード例 #1
0
ファイル: experiment.py プロジェクト: krasch/smart-assistants
    def run_with_classifier(self, cls, data_for_folds):
        """
        Perform cross-validation with one classifier.
        @param data_for_folds: Contains one list of True/False values for each of the folds to be run. Each list states
        for every item of the dataset, whether the item is in the current fold part of the training dataset or the
        test dataset.
        @param cls: Classifier to use in the experiment.
        @return: Measurements for quality and runtime metrics.
        """
        runtimes = []
        quality = []
        for train, test in data_for_folds:

            #get the training and testing data for this fold
            data_train, data_test = self.dataset.data[
                train], self.dataset.data[test]
            target_train, target_test = self.dataset.target[
                train], self.dataset.target[test]

            #perform training
            train_time = datetime.now()
            cls = cls.fit(data_train, target_train)
            train_time = delta_in_ms(datetime.now() - train_time)

            #apply the classifier on the test data
            test_time = datetime.now()
            recommendations = cls.predict(data_test)
            test_time = delta_in_ms(datetime.now() - test_time)

            #add measurements for this replication to result collection
            runtimes.append({
                "Training time":
                train_time,
                "Overall testing time":
                test_time,
                "Individual testing time":
                test_time / float(len(data_test))
            })
            quality.append(
                QualityMetricsCalculator(target_test,
                                         recommendations).calculate())

        #calculate statistics over all replications
        return self.calculate_quality_stats(
            cls.name,
            quality), self.calculate_runtime_stats(cls.name, runtimes)
コード例 #2
0
from evaluation import plot
from evaluation.metrics import QualityMetricsCalculator
import config

#configuration
data = load_dataset("../datasets/houseA.csv", "../datasets/houseA.config")
classifiers = [
    NaiveBayesClassifier(data.features, data.target_names),
    TemporalEvidencesClassifier(data.features, data.target_names)
]

#run the experiment using full dataset as training and as test data
results = []
for cls in classifiers:
    cls = cls.fit(data.data, data.target)
    r = cls.predict(data.data)
    r = QualityMetricsCalculator(data.target, r)
    results.append(r.true_positives_for_all())

#want for each classifier result only the measurements for cutoff=1
results = [r.loc[1] for r in results]
results = pandas.concat(results, axis=1)
results.columns = [cls.name for cls in classifiers]

plot_conf = plot.plot_config(config.plot_directory,
                             sub_dirs=[data.name],
                             prefix="histogram_classifiers",
                             img_type=config.img_type)
plot.comparison_histogram(results, plot_conf)
print "Results can be found in the \"%s\" directory" % config.plot_directory
コード例 #3
0
sys.path.append("..")

import pandas

from recsys.classifiers.temporal import TemporalEvidencesClassifier
from recsys.dataset import load_dataset
from evaluation.metrics import QualityMetricsCalculator


# configuration
data = load_dataset("../datasets/houseA.csv", "../datasets/houseA.config")
# data = load_dataset("../datasets/houseB.csv", "../datasets/houseB.config")

# run the classifier on the whole dataset and calculate confusion matrix
cls = TemporalEvidencesClassifier(data.features, data.target_names)
cls = cls.fit(data.data, data.target)
results = cls.predict(data.data)
matrix = QualityMetricsCalculator(data.target, results).confusion_matrix()

# format confusion matrix for pretty printing
letters = list(map(chr, list(range(97, 123)))) + list(map(chr, list(range(65, 91))))
action_to_letter = {action: letter for action, letter in zip(matrix.index, letters)}
matrix.columns = [action_to_letter[action] for action in matrix.columns]
matrix.index = ["(%s) %s" % (action_to_letter[action], action) for action in matrix.index]
matrix.index.name = "Actual action"

pandas.set_option("expand_frame_repr", False)
pandas.set_option("max_columns", 40)
print matrix
コード例 #4
0
import pandas

from recsys.classifiers.temporal import TemporalEvidencesClassifier
from recsys.classifiers.bayes import NaiveBayesClassifier
from recsys.dataset import load_dataset
from evaluation import plot
from evaluation.metrics import QualityMetricsCalculator
import config

#configuration
data = load_dataset("../datasets/houseA.csv", "../datasets/houseA.config")
to_compare = [1, 2, 3, 4]

#run classifier and count true positives
cls = TemporalEvidencesClassifier(data.features, data.target_names)
cls = cls.fit(data.data, data.target)
results = cls.predict(data.data)
results = QualityMetricsCalculator(data.target,
                                   results).true_positives_for_all()

#only use the interesting cutoffs
results = results.transpose()[to_compare]
results.columns = ["cutoff=%s" % c for c in results.columns]

conf = plot.plot_config(config.plot_directory,
                        sub_dirs=[data.name],
                        prefix="histogram_cutoffs",
                        img_type=config.img_type)
plot.comparison_histogram(results, conf)
print "Results can be found in the \"%s\" directory" % config.plot_directory
コード例 #5
0
from recsys.classifiers.temporal import TemporalEvidencesClassifier
from recsys.classifiers.bayes import NaiveBayesClassifier
from recsys.dataset import load_dataset
from evaluation import plot
from evaluation.metrics import QualityMetricsCalculator
import config


#configuration
data = load_dataset("../datasets/houseA.csv", "../datasets/houseA.config")
classifiers = [NaiveBayesClassifier(data.features, data.target_names),
              TemporalEvidencesClassifier(data.features, data.target_names)]

#run the experiment using full dataset as training and as test data
results = []
for cls in classifiers:
    cls = cls.fit(data.data, data.target)
    r = cls.predict(data.data)
    r = QualityMetricsCalculator(data.target, r)
    results.append(r.true_positives_for_all())

#want for each classifier result only the measurements for cutoff=1
results = [r.loc[1] for r in results]
results = pandas.concat(results, axis=1)
results.columns = [cls.name for cls in classifiers]

plot_conf = plot.plot_config(config.plot_directory, sub_dirs=[data.name], prefix="histogram_classifiers", img_type=config.img_type)
plot.comparison_histogram(results, plot_conf)
print "Results can be found in the \"%s\" directory" % config.plot_directory
コード例 #6
0
import pandas

from recsys.classifiers.temporal import TemporalEvidencesClassifier
from recsys.dataset import load_dataset
from evaluation.metrics import QualityMetricsCalculator

#configuration
data = load_dataset("../datasets/houseA.csv", "../datasets/houseA.config")
#data = load_dataset("../datasets/houseB.csv", "../datasets/houseB.config")

#run the classifier on the whole dataset and calculate confusion matrix
cls = TemporalEvidencesClassifier(data.features, data.target_names)
cls = cls.fit(data.data, data.target)
results = cls.predict(data.data)
matrix = QualityMetricsCalculator(data.target, results).confusion_matrix()

#format confusion matrix for pretty printing
letters = list(map(chr, list(range(97, 123)))) + list(
    map(chr, list(range(65, 91))))
action_to_letter = {
    action: letter
    for action, letter in zip(matrix.index, letters)
}
matrix.columns = [action_to_letter[action] for action in matrix.columns]
matrix.index = [
    "(%s) %s" % (action_to_letter[action], action) for action in matrix.index
]
matrix.index.name = "Actual action"

pandas.set_option('expand_frame_repr', False)
コード例 #7
0
import sys
sys.path.append("..") 

import pandas

from recsys.classifiers.temporal import TemporalEvidencesClassifier
from recsys.classifiers.bayes import NaiveBayesClassifier
from recsys.dataset import load_dataset
from evaluation import plot
from evaluation.metrics import QualityMetricsCalculator
import config


#configuration
data = load_dataset("../datasets/houseA.csv", "../datasets/houseA.config")
to_compare = [1, 2, 3, 4]

#run classifier and count true positives
cls = TemporalEvidencesClassifier(data.features, data.target_names)
cls = cls.fit(data.data, data.target)
results = cls.predict(data.data)
results = QualityMetricsCalculator(data.target, results).true_positives_for_all()

#only use the interesting cutoffs
results = results.transpose()[to_compare]
results.columns = ["cutoff=%s" % c for c in results.columns]

conf = plot.plot_config(config.plot_directory, sub_dirs=[data.name], prefix="histogram_cutoffs", img_type=config.img_type)
plot.comparison_histogram(results, conf)
print "Results can be found in the \"%s\" directory" % config.plot_directory