예제 #1
0
    def generate_classifier(sensors):
        #create a source for the setting sensor=value, fill it with random observations
        def create_source(sensor, value, num_bins):
            random_observations = lambda: pandas.Series(
                numpy.random.randint(0, 100, len(targets)), index=targets)
            temporal = pandas.concat(
                [random_observations() for b in range(num_bins)], axis=1)
            total = random_observations()
            return Source(sensor, value, total, temporal)

        #initialize the classifier
        all_settings = [(sensor, value) for sensor in sensors.keys()
                        for value in sensors[sensor]]
        features = sorted(all_settings) + [
            "%s_timedelta" % sensor for sensor in sorted(sensors.keys())
        ]
        targets = [
            "%s=%s" % (sensor, value) for sensor, value in sorted(all_settings)
        ]
        cls = TemporalEvidencesClassifier(features, targets)

        #create a random sources for each possible setting
        cls.sources = {(sensor, value): create_source(sensor, value,
                                                      len(cls.bins))
                       for sensor, value in all_settings}
        cls.max_total = max(source.total_counts.sum()
                            for source in cls.sources.values())
        cls.max_temporal = max(source.max_temporal()
                               for source in cls.sources.values())

        return cls
예제 #2
0
def initialize_experiment():
    experiment = Experiment(data)
    experiment.add_classifier(TemporalEvidencesClassifier(
        data.features, data.target_names),
                              name="Our method")
    experiment.add_classifier(NaiveBayesClassifier(data.features,
                                                   data.target_names),
                              name="Naive Bayes")
    return experiment
예제 #3
0
def test_train():
    """
    Test that the classifier correctly extracts all observations from the test dataset.
    """
    #train the classifier
    data = load_dataset(data_file)
    cls = TemporalEvidencesClassifier(data.features, data.target_names)
    cls = cls.fit(data.data, data.target)

    #load expected sources and their observations from json file
    expected_sources = sources_from_json(sources_file)

    #compare expected with actual sources
    assert_array_equal(
        sorted(cls.sources.keys()),
        sorted(expected_sources.keys()),
    )
    for name in expected_sources.keys():
        assert_source_equal(cls.sources[name], expected_sources[name])
예제 #4
0
def test_recommend():
    """
    Test that the classifier generates the correct recommendations for the test dataset.
    """

    #train the classifier and calculate recommendations
    data = load_dataset(data_file)
    cls = TemporalEvidencesClassifier(data.features, data.target_names)
    cls = cls.fit(data.data, data.target)
    actual_recommendations = cls.predict(data.data,
                                         include_conflict_theta=True)

    #load expected results from json file
    with open(recommendations_file, 'r') as infile:
        expected_recommendations = json.load(infile)

    #compare expected with actual results
    for actual, expected in zip(actual_recommendations,
                                expected_recommendations):
        assert_recommendations_equal(actual, expected)
def houseB():
    """
    This dataset is partially dominated by one of the sensors, which makes the evaluation results less statistically
    sound, e.g. it leads to large confidence intervals when running 10-fold cross-validation.  
    """
    data = load_dataset("../datasets/houseB.csv", "../datasets/houseB.config")
    cutoff_results_at = 15
    return data, cutoff_results_at


#configuration
data, cutoff_results_at = houseA()

#run several classifiers on the same dataset, use 10-fold cross-validation
experiment = Experiment(data)
experiment.add_classifier(TemporalEvidencesClassifier(data.features,
                                                      data.target_names),
                          name="Our method")
experiment.add_classifier(NaiveBayesClassifier(data.features,
                                               data.target_names),
                          name="Naive Bayes")
experiment.add_classifier(RandomClassifier(data.features, data.target_names),
                          name="Random")
results = experiment.run(folds=10)

#print and plot results
results.print_quality_comparison_at_cutoff(
    cutoff=1, metrics=["Recall", "Precision", "F1"])
results.print_runtime_comparison()
plot_conf = plot.plot_config(config.plot_directory,
                             sub_dirs=[data.name],
                             img_type=config.img_type)
예제 #6
0
sys.path.append("..") 

import pandas

from evaluation.experiment import Experiment
from evaluation.metrics import quality_metrics
from recsys.classifiers.temporal import TemporalEvidencesClassifier, configure_dynamic_cutoff
from recsys.dataset import load_dataset


#configuration
data = load_dataset("../datasets/houseA.csv", "../datasets/houseA.config")
methods_to_test = [("Fixed cutoff", None),
                   ("dynamic cutoff=4", configure_dynamic_cutoff(1.0, 0.4, 4)),
                   ("dynamic cutoff=2", configure_dynamic_cutoff(1.0, 0.4, 2))]

#run all configured cutoffs with 10-fold cross-validation
experiment = Experiment(data)
for name, method in methods_to_test:
    experiment.add_classifier(TemporalEvidencesClassifier(data.features, data.target_names,
                              postprocess=method), name=name)
results = experiment.run(folds=10)

#print results
pandas.set_option('expand_frame_repr', False)
pandas.set_option('max_columns', 4)
print "Maximum 5 recommendations"
results.print_quality_comparison_at_cutoff(cutoff=5, metrics=quality_metrics)
print "Maximum 10 recommendations"
results.print_quality_comparison_at_cutoff(cutoff=10, metrics=quality_metrics)
예제 #7
0
sys.path.append("..")

import pandas

from recsys.classifiers.temporal import TemporalEvidencesClassifier
from recsys.classifiers.binning import initialize_bins
from recsys.dataset import load_dataset
from evaluation import plot
import config

#configuration
data = load_dataset("../datasets/houseA.csv", "../datasets/houseA.config")

#fit classifier to dataset
cls = TemporalEvidencesClassifier(data.features,
                                  data.target_names,
                                  bins=initialize_bins(0, 300, 10))
cls = cls.fit(data.data, data.target)

#create visualizations of habits around each user action
plot_conf = plot.plot_config(config.plot_directory,
                             sub_dirs=[data.name, "habits"],
                             img_type=config.img_type)
for source in cls.sources.values():
    observations = pandas.DataFrame(source.temporal_counts)
    observations.columns = data.target_names
    observations.index = cls.bins
    plot.plot_observations(source.name(), observations, plot_conf)

print "Results can be found in the \"%s\" directory" % config.plot_directory
예제 #8
0
sys.path.append("..")

import pandas

from recsys.classifiers.temporal import TemporalEvidencesClassifier
from recsys.classifiers.bayes import NaiveBayesClassifier
from recsys.dataset import load_dataset
from evaluation import plot
from evaluation.metrics import QualityMetricsCalculator
import config

#configuration
data = load_dataset("../datasets/houseA.csv", "../datasets/houseA.config")
classifiers = [
    NaiveBayesClassifier(data.features, data.target_names),
    TemporalEvidencesClassifier(data.features, data.target_names)
]

#run the experiment using full dataset as training and as test data
results = []
for cls in classifiers:
    cls = cls.fit(data.data, data.target)
    r = cls.predict(data.data)
    r = QualityMetricsCalculator(data.target, r)
    results.append(r.true_positives_for_all())

#want for each classifier result only the measurements for cutoff=1
results = [r.loc[1] for r in results]
results = pandas.concat(results, axis=1)
results.columns = [cls.name for cls in classifiers]