results = cls.predict(data.data, include_conflict_theta=True)

#extract conflict and uncertainty and convert recommendations to pandas representation
recommendations, conflict, uncertainty = zip(*results)
results = results_as_dataframe(data.target, list(recommendations))

#for each row, mark correct recommendations with "1", false recommendations with "0"
find_matches_in_row = lambda row: [1 if col == row.name else 0 for col in row]
results = results.apply(find_matches_in_row, axis=1)

#set uncertainty and conflict as multi-index
results.index = pandas.MultiIndex.from_tuples(zip(conflict, uncertainty),
                                              names=["Conflict", "Uncertainty"])

#found_within: the correct service was found within X recommendations
#-> apply cumulative sum on each row so that the "1" marker is set for all columns after it first appears
found_within = results.cumsum(axis=1)
#create one plot for each cutoff
conf = plot.plot_config(config.plot_directory, sub_dirs=[data.name, "conflict-uncertainty"],
                        prefix="found_within_", img_type=config.img_type)
plot.conflict_uncertainty_scatter(found_within, conf)

#not found withing: the correct service was not found within X recommendations, is the reverse of found_within
not_found_within = found_within.apply(lambda col: 1-col)
#create one plot for each cutoff
conf = plot.plot_config(config.plot_directory, sub_dirs=[data.name, "conflict-uncertainty"],
                        prefix="not_found_within_", img_type=config.img_type)
plot.conflict_uncertainty_scatter(not_found_within, conf)

print "Results can be found in the \"%s\" directory" % config.plot_directory
Ejemplo n.º 2
0
from evaluation import plot
from evaluation.metrics import QualityMetricsCalculator
import config

#configuration
data = load_dataset("../datasets/houseA.csv", "../datasets/houseA.config")
classifiers = [
    NaiveBayesClassifier(data.features, data.target_names),
    TemporalEvidencesClassifier(data.features, data.target_names)
]

#run the experiment using full dataset as training and as test data
results = []
for cls in classifiers:
    cls = cls.fit(data.data, data.target)
    r = cls.predict(data.data)
    r = QualityMetricsCalculator(data.target, r)
    results.append(r.true_positives_for_all())

#want for each classifier result only the measurements for cutoff=1
results = [r.loc[1] for r in results]
results = pandas.concat(results, axis=1)
results.columns = [cls.name for cls in classifiers]

plot_conf = plot.plot_config(config.plot_directory,
                             sub_dirs=[data.name],
                             prefix="histogram_classifiers",
                             img_type=config.img_type)
plot.comparison_histogram(results, plot_conf)
print "Results can be found in the \"%s\" directory" % config.plot_directory
Ejemplo n.º 3
0
    data = load_dataset("../datasets/houseB.csv", "../datasets/houseB.config")
    cutoff_results_at = 15
    return data, cutoff_results_at


#configuration
data, cutoff_results_at = houseA()

#run several classifiers on the same dataset, use 10-fold cross-validation
experiment = Experiment(data)
experiment.add_classifier(TemporalEvidencesClassifier(data.features,
                                                      data.target_names),
                          name="Our method")
experiment.add_classifier(NaiveBayesClassifier(data.features,
                                               data.target_names),
                          name="Naive Bayes")
experiment.add_classifier(RandomClassifier(data.features, data.target_names),
                          name="Random")
results = experiment.run(folds=10)

#print and plot results
results.print_quality_comparison_at_cutoff(
    cutoff=1, metrics=["Recall", "Precision", "F1"])
results.print_runtime_comparison()
plot_conf = plot.plot_config(config.plot_directory,
                             sub_dirs=[data.name],
                             img_type=config.img_type)
results.plot_quality_comparison(metrics=["Recall", "Precision", "F1"],
                                plot_config=plot_conf,
                                cutoff_results_at=cutoff_results_at)
Ejemplo n.º 4
0
    stats = [experiment.run_with_classifier(cls, [(train_data, test_data)])
             for cls in experiment.classifiers]
    #combine results of all classifiers for this training dataset, keep only results for cutoff=1
    quality_stats = pandas.concat([quality for quality, runtime in stats], axis=1).loc[1]

    results.append(quality_stats)


#make one big matrix with all results and add multi-index of training sizes and training times
results = pandas.concat(results, axis=1).transpose()
results.index = pandas.MultiIndex.from_tuples(zip(train_sizes, train_times),
                                             names=["Size of dataset", "Elapsed time (days)"])

#print confidence intervals for interesting metrics
interesting_columns = lambda metric: [(cls.name,metric, "Confidence interval") for cls in experiment.classifiers]
for metric in ["Precision", "Recall", "F1"]:
    r = results[interesting_columns(metric)]
    r.columns = [cls.name for cls in experiment.classifiers]
    r.name = metric
    print metric
    print r

#plot means for interesting metrics
plot_conf = plot.plot_config(config.plot_directory, sub_dirs=[data.name], prefix="trainsize_",
                             img_type=config.img_type)
interesting_columns = lambda metric: [(cls.name, metric, "Mean") for cls in experiment.classifiers]
for metric in ["Precision", "Recall", "F1"]:
    r = results[interesting_columns(metric)]
    r.columns = [cls.name for cls in experiment.classifiers]
    plot.plot_train_size(r, metric, plot_conf)
Ejemplo n.º 5
0
of the figure still stands: the user has some observable habits after closing the frontdoor.
"""

import sys
sys.path.append("..") 

import pandas

from recsys.classifiers.temporal import TemporalEvidencesClassifier
from recsys.classifiers.binning import initialize_bins
from recsys.dataset import load_dataset
from evaluation import plot
import config

#configuration
data = load_dataset("../datasets/houseA.csv", "../datasets/houseA.config")

#fit classifier to dataset
cls = TemporalEvidencesClassifier(data.features, data.target_names, bins=initialize_bins(0, 300, 10))
cls = cls.fit(data.data, data.target)

#create visualizations of habits around each user action
plot_conf = plot.plot_config(config.plot_directory, sub_dirs=[data.name, "habits"], img_type=config.img_type)
for source in cls.sources.values():
    observations = pandas.DataFrame(source.temporal_counts)
    observations.columns = data.target_names
    observations.index = cls.bins
    plot.plot_observations(source.name(), observations, plot_conf)
    
print "Results can be found in the \"%s\" directory" % config.plot_directory
Ejemplo n.º 6
0
results = results_as_dataframe(data.target, list(recommendations))

#for each row, mark correct recommendations with "1", false recommendations with "0"
find_matches_in_row = lambda row: [1 if col == row.name else 0 for col in row]
results = results.apply(find_matches_in_row, axis=1)

#set uncertainty and conflict as multi-index
results.index = pandas.MultiIndex.from_tuples(
    zip(conflict, uncertainty), names=["Conflict", "Uncertainty"])

#found_within: the correct service was found within X recommendations
#-> apply cumulative sum on each row so that the "1" marker is set for all columns after it first appears
found_within = results.cumsum(axis=1)
#create one plot for each cutoff
conf = plot.plot_config(config.plot_directory,
                        sub_dirs=[data.name, "conflict-uncertainty"],
                        prefix="found_within_",
                        img_type=config.img_type)
plot.conflict_uncertainty_scatter(found_within, conf)

#not found withing: the correct service was not found within X recommendations, is the reverse of found_within
not_found_within = found_within.apply(lambda col: 1 - col)
#create one plot for each cutoff
conf = plot.plot_config(config.plot_directory,
                        sub_dirs=[data.name, "conflict-uncertainty"],
                        prefix="not_found_within_",
                        img_type=config.img_type)
plot.conflict_uncertainty_scatter(not_found_within, conf)

print "Results can be found in the \"%s\" directory" % config.plot_directory
from recsys.classifiers.temporal import TemporalEvidencesClassifier
from recsys.classifiers.bayes import NaiveBayesClassifier
from recsys.dataset import load_dataset
from evaluation import plot
from evaluation.metrics import QualityMetricsCalculator
import config


#configuration
data = load_dataset("../datasets/houseA.csv", "../datasets/houseA.config")
classifiers = [NaiveBayesClassifier(data.features, data.target_names),
              TemporalEvidencesClassifier(data.features, data.target_names)]

#run the experiment using full dataset as training and as test data
results = []
for cls in classifiers:
    cls = cls.fit(data.data, data.target)
    r = cls.predict(data.data)
    r = QualityMetricsCalculator(data.target, r)
    results.append(r.true_positives_for_all())

#want for each classifier result only the measurements for cutoff=1
results = [r.loc[1] for r in results]
results = pandas.concat(results, axis=1)
results.columns = [cls.name for cls in classifiers]

plot_conf = plot.plot_config(config.plot_directory, sub_dirs=[data.name], prefix="histogram_classifiers", img_type=config.img_type)
plot.comparison_histogram(results, plot_conf)
print "Results can be found in the \"%s\" directory" % config.plot_directory
Ejemplo n.º 8
0
    results.append(quality_stats)

#make one big matrix with all results and add multi-index of training sizes and training times
results = pandas.concat(results, axis=1).transpose()
results.index = pandas.MultiIndex.from_tuples(
    zip(train_sizes, train_times),
    names=["Size of dataset", "Elapsed time (days)"])

#print confidence intervals for interesting metrics
interesting_columns = lambda metric: [(cls.name, metric, "Confidence interval")
                                      for cls in experiment.classifiers]
for metric in ["Precision", "Recall", "F1"]:
    r = results[interesting_columns(metric)]
    r.columns = [cls.name for cls in experiment.classifiers]
    r.name = metric
    print metric
    print r

#plot means for interesting metrics
plot_conf = plot.plot_config(config.plot_directory,
                             sub_dirs=[data.name],
                             prefix="trainsize_",
                             img_type=config.img_type)
interesting_columns = lambda metric: [(cls.name, metric, "Mean")
                                      for cls in experiment.classifiers]
for metric in ["Precision", "Recall", "F1"]:
    r = results[interesting_columns(metric)]
    r.columns = [cls.name for cls in experiment.classifiers]
    plot.plot_train_size(r, metric, plot_conf)