Esempio n. 1
0
def test_recommend():
    """
    Test that the classifier generates the correct recommendations for the test dataset.
    """

    #train the classifier and calculate recommendations
    data = load_dataset(data_file)
    cls = TemporalEvidencesClassifier(data.features, data.target_names)
    cls = cls.fit(data.data, data.target)
    actual_recommendations = cls.predict(data.data, include_conflict_theta=True)

    #load expected results from json file
    with open(recommendations_file, 'r') as infile:
        expected_recommendations = json.load(infile)

    #compare expected with actual results
    for actual, expected in zip(actual_recommendations, expected_recommendations):
        assert_recommendations_equal(actual, expected)
Esempio n. 2
0
def test_recommend():
    """
    Test that the classifier generates the correct recommendations for the test dataset.
    """

    #train the classifier and calculate recommendations
    data = load_dataset(data_file)
    cls = TemporalEvidencesClassifier(data.features, data.target_names)
    cls = cls.fit(data.data, data.target)
    actual_recommendations = cls.predict(data.data,
                                         include_conflict_theta=True)

    #load expected results from json file
    with open(recommendations_file, 'r') as infile:
        expected_recommendations = json.load(infile)

    #compare expected with actual results
    for actual, expected in zip(actual_recommendations,
                                expected_recommendations):
        assert_recommendations_equal(actual, expected)
import pandas

from recsys.classifiers.temporal import TemporalEvidencesClassifier
from recsys.dataset import load_dataset
from evaluation.metrics import results_as_dataframe
from evaluation import plot
import config


#configuration
data = load_dataset("../datasets/houseA.csv", "../datasets/houseA.config")

#run the classifier on the whole dataset
cls = TemporalEvidencesClassifier(data.features, data.target_names)
cls = cls.fit(data.data, data.target)
results = cls.predict(data.data, include_conflict_theta=True)

#extract conflict and uncertainty and convert recommendations to pandas representation
recommendations, conflict, uncertainty = zip(*results)
results = results_as_dataframe(data.target, list(recommendations))

#for each row, mark correct recommendations with "1", false recommendations with "0"
find_matches_in_row = lambda row: [1 if col == row.name else 0 for col in row]
results = results.apply(find_matches_in_row, axis=1)

#set uncertainty and conflict as multi-index
results.index = pandas.MultiIndex.from_tuples(zip(conflict, uncertainty),
                                              names=["Conflict", "Uncertainty"])

#found_within: the correct service was found within X recommendations
#-> apply cumulative sum on each row so that the "1" marker is set for all columns after it first appears
sys.path.append("..")

import pandas

from recsys.classifiers.temporal import TemporalEvidencesClassifier
from recsys.dataset import load_dataset
from evaluation.metrics import QualityMetricsCalculator


# configuration
data = load_dataset("../datasets/houseA.csv", "../datasets/houseA.config")
# data = load_dataset("../datasets/houseB.csv", "../datasets/houseB.config")

# run the classifier on the whole dataset and calculate confusion matrix
cls = TemporalEvidencesClassifier(data.features, data.target_names)
cls = cls.fit(data.data, data.target)
results = cls.predict(data.data)
matrix = QualityMetricsCalculator(data.target, results).confusion_matrix()

# format confusion matrix for pretty printing
letters = list(map(chr, list(range(97, 123)))) + list(map(chr, list(range(65, 91))))
action_to_letter = {action: letter for action, letter in zip(matrix.index, letters)}
matrix.columns = [action_to_letter[action] for action in matrix.columns]
matrix.index = ["(%s) %s" % (action_to_letter[action], action) for action in matrix.index]
matrix.index.name = "Actual action"

pandas.set_option("expand_frame_repr", False)
pandas.set_option("max_columns", 40)
print matrix
Esempio n. 5
0
import pandas

from recsys.classifiers.temporal import TemporalEvidencesClassifier
from recsys.classifiers.bayes import NaiveBayesClassifier
from recsys.dataset import load_dataset
from evaluation import plot
from evaluation.metrics import QualityMetricsCalculator
import config

#configuration
data = load_dataset("../datasets/houseA.csv", "../datasets/houseA.config")
to_compare = [1, 2, 3, 4]

#run classifier and count true positives
cls = TemporalEvidencesClassifier(data.features, data.target_names)
cls = cls.fit(data.data, data.target)
results = cls.predict(data.data)
results = QualityMetricsCalculator(data.target,
                                   results).true_positives_for_all()

#only use the interesting cutoffs
results = results.transpose()[to_compare]
results.columns = ["cutoff=%s" % c for c in results.columns]

conf = plot.plot_config(config.plot_directory,
                        sub_dirs=[data.name],
                        prefix="histogram_cutoffs",
                        img_type=config.img_type)
plot.comparison_histogram(results, conf)
print "Results can be found in the \"%s\" directory" % config.plot_directory
Esempio n. 6
0
import pandas

from recsys.classifiers.temporal import TemporalEvidencesClassifier
from recsys.dataset import load_dataset
from evaluation.metrics import results_as_dataframe
from evaluation import plot
import config

#configuration
data = load_dataset("../datasets/houseA.csv", "../datasets/houseA.config")

#run the classifier on the whole dataset
cls = TemporalEvidencesClassifier(data.features, data.target_names)
cls = cls.fit(data.data, data.target)
results = cls.predict(data.data, include_conflict_theta=True)

#extract conflict and uncertainty and convert recommendations to pandas representation
recommendations, conflict, uncertainty = zip(*results)
results = results_as_dataframe(data.target, list(recommendations))

#for each row, mark correct recommendations with "1", false recommendations with "0"
find_matches_in_row = lambda row: [1 if col == row.name else 0 for col in row]
results = results.apply(find_matches_in_row, axis=1)

#set uncertainty and conflict as multi-index
results.index = pandas.MultiIndex.from_tuples(
    zip(conflict, uncertainty), names=["Conflict", "Uncertainty"])

#found_within: the correct service was found within X recommendations
#-> apply cumulative sum on each row so that the "1" marker is set for all columns after it first appears