def test_recommend(): """ Test that the classifier generates the correct recommendations for the test dataset. """ #train the classifier and calculate recommendations data = load_dataset(data_file) cls = TemporalEvidencesClassifier(data.features, data.target_names) cls = cls.fit(data.data, data.target) actual_recommendations = cls.predict(data.data, include_conflict_theta=True) #load expected results from json file with open(recommendations_file, 'r') as infile: expected_recommendations = json.load(infile) #compare expected with actual results for actual, expected in zip(actual_recommendations, expected_recommendations): assert_recommendations_equal(actual, expected)
def test_recommend(): """ Test that the classifier generates the correct recommendations for the test dataset. """ #train the classifier and calculate recommendations data = load_dataset(data_file) cls = TemporalEvidencesClassifier(data.features, data.target_names) cls = cls.fit(data.data, data.target) actual_recommendations = cls.predict(data.data, include_conflict_theta=True) #load expected results from json file with open(recommendations_file, 'r') as infile: expected_recommendations = json.load(infile) #compare expected with actual results for actual, expected in zip(actual_recommendations, expected_recommendations): assert_recommendations_equal(actual, expected)
import pandas from recsys.classifiers.temporal import TemporalEvidencesClassifier from recsys.dataset import load_dataset from evaluation.metrics import results_as_dataframe from evaluation import plot import config #configuration data = load_dataset("../datasets/houseA.csv", "../datasets/houseA.config") #run the classifier on the whole dataset cls = TemporalEvidencesClassifier(data.features, data.target_names) cls = cls.fit(data.data, data.target) results = cls.predict(data.data, include_conflict_theta=True) #extract conflict and uncertainty and convert recommendations to pandas representation recommendations, conflict, uncertainty = zip(*results) results = results_as_dataframe(data.target, list(recommendations)) #for each row, mark correct recommendations with "1", false recommendations with "0" find_matches_in_row = lambda row: [1 if col == row.name else 0 for col in row] results = results.apply(find_matches_in_row, axis=1) #set uncertainty and conflict as multi-index results.index = pandas.MultiIndex.from_tuples(zip(conflict, uncertainty), names=["Conflict", "Uncertainty"]) #found_within: the correct service was found within X recommendations #-> apply cumulative sum on each row so that the "1" marker is set for all columns after it first appears
sys.path.append("..") import pandas from recsys.classifiers.temporal import TemporalEvidencesClassifier from recsys.dataset import load_dataset from evaluation.metrics import QualityMetricsCalculator # configuration data = load_dataset("../datasets/houseA.csv", "../datasets/houseA.config") # data = load_dataset("../datasets/houseB.csv", "../datasets/houseB.config") # run the classifier on the whole dataset and calculate confusion matrix cls = TemporalEvidencesClassifier(data.features, data.target_names) cls = cls.fit(data.data, data.target) results = cls.predict(data.data) matrix = QualityMetricsCalculator(data.target, results).confusion_matrix() # format confusion matrix for pretty printing letters = list(map(chr, list(range(97, 123)))) + list(map(chr, list(range(65, 91)))) action_to_letter = {action: letter for action, letter in zip(matrix.index, letters)} matrix.columns = [action_to_letter[action] for action in matrix.columns] matrix.index = ["(%s) %s" % (action_to_letter[action], action) for action in matrix.index] matrix.index.name = "Actual action" pandas.set_option("expand_frame_repr", False) pandas.set_option("max_columns", 40) print matrix
import pandas from recsys.classifiers.temporal import TemporalEvidencesClassifier from recsys.classifiers.bayes import NaiveBayesClassifier from recsys.dataset import load_dataset from evaluation import plot from evaluation.metrics import QualityMetricsCalculator import config #configuration data = load_dataset("../datasets/houseA.csv", "../datasets/houseA.config") to_compare = [1, 2, 3, 4] #run classifier and count true positives cls = TemporalEvidencesClassifier(data.features, data.target_names) cls = cls.fit(data.data, data.target) results = cls.predict(data.data) results = QualityMetricsCalculator(data.target, results).true_positives_for_all() #only use the interesting cutoffs results = results.transpose()[to_compare] results.columns = ["cutoff=%s" % c for c in results.columns] conf = plot.plot_config(config.plot_directory, sub_dirs=[data.name], prefix="histogram_cutoffs", img_type=config.img_type) plot.comparison_histogram(results, conf) print "Results can be found in the \"%s\" directory" % config.plot_directory
import pandas from recsys.classifiers.temporal import TemporalEvidencesClassifier from recsys.dataset import load_dataset from evaluation.metrics import results_as_dataframe from evaluation import plot import config #configuration data = load_dataset("../datasets/houseA.csv", "../datasets/houseA.config") #run the classifier on the whole dataset cls = TemporalEvidencesClassifier(data.features, data.target_names) cls = cls.fit(data.data, data.target) results = cls.predict(data.data, include_conflict_theta=True) #extract conflict and uncertainty and convert recommendations to pandas representation recommendations, conflict, uncertainty = zip(*results) results = results_as_dataframe(data.target, list(recommendations)) #for each row, mark correct recommendations with "1", false recommendations with "0" find_matches_in_row = lambda row: [1 if col == row.name else 0 for col in row] results = results.apply(find_matches_in_row, axis=1) #set uncertainty and conflict as multi-index results.index = pandas.MultiIndex.from_tuples( zip(conflict, uncertainty), names=["Conflict", "Uncertainty"]) #found_within: the correct service was found within X recommendations #-> apply cumulative sum on each row so that the "1" marker is set for all columns after it first appears