Python load_dataset Examples, recsys.dataset.load_dataset Python Examples

Example #1

0

Show file

File: compare_classifiers.py Project: krasch/smart-assistants

def houseB():   
    """
    This dataset is partially dominated by one of the sensors, which makes the evaluation results less statistically
    sound, e.g. it leads to large confidence intervals when running 10-fold cross-validation.  
    """
    data = load_dataset("../datasets/houseB.csv", "../datasets/houseB.config")
    cutoff_results_at = 15    
    return data, cutoff_results_at

Example #2

0

Show file

File: compare_classifiers.py Project: krasch/smart-assistants

def houseB():
    """
    This dataset is partially dominated by one of the sensors, which makes the evaluation results less statistically
    sound, e.g. it leads to large confidence intervals when running 10-fold cross-validation.  
    """
    data = load_dataset("../datasets/houseB.csv", "../datasets/houseB.config")
    cutoff_results_at = 15
    return data, cutoff_results_at

Example #3

0

Show file

File: compare_classifiers.py Project: krasch/smart-assistants

def houseA():
    """ 
    This dataset has 14 binary sensors, i.e. at most 14 services are typically available concurrently. The only anomaly
    is right at the beginning of the dataset, where the current status of the sensors are not known. In this case more
    than 14 services can be recommended. However, there will be few instances where this is the case and
    recommendation results will be be statistically insignificant for these values. For this reason, when printing or
    plotting results, cut the recommendation results at 14 services.
    """
    data = load_dataset("../datasets/houseA.csv", "../datasets/houseA.config")
    cutoff_results_at = 14
    return data, cutoff_results_at

Example #4

0

Show file

File: compare_classifiers.py Project: krasch/smart-assistants

def houseA():
    """ 
    This dataset has 14 binary sensors, i.e. at most 14 services are typically available concurrently. The only anomaly
    is right at the beginning of the dataset, where the current status of the sensors are not known. In this case more
    than 14 services can be recommended. However, there will be few instances where this is the case and
    recommendation results will be be statistically insignificant for these values. For this reason, when printing or
    plotting results, cut the recommendation results at 14 services.
    """
    data = load_dataset("../datasets/houseA.csv", "../datasets/houseA.config")
    cutoff_results_at = 14
    return data, cutoff_results_at

Example #5

0

Show file

File: test_bayes.py Project: krasch/smart-assistants

def test_houseA():
    """
    Check if recommendation results of the Naive Bayes classifier are as expected for the houseA dataset.
    """

    expected_precision = [
        0.36389964907400657, 0.35097921410756322, 0.33726522147404076,
        0.32783328310438886, 0.32350609998195629, 0.31650421215426117,
        0.31231855510715401, 0.30519493826462696, 0.29979871087041599,
        0.29551593405461341, 0.29150527293056733, 0.28660658398334166,
        0.28251775629715375, 0.27922900853036048, 0.27801361790520279,
        0.27550796383161991, 0.27466099306467773, 0.27342926821355851,
        0.27273588164873097, 0.27221800916720873, 0.2721689921161986,
        0.27166460270134835, 0.2715840524866433, 0.27152878300894606,
        0.27150502624152628, 0.27150496777883093
    ]
    expected_recall = [
        0.50280293229840445, 0.62440707201379908, 0.72229409228115571,
        0.78654592496765852, 0.84260457093574814, 0.87365243639499779,
        0.90426908150064678, 0.93014230271668819, 0.95170332039672267,
        0.96593359206554552, 0.97714532125916342, 0.98835705045278133,
        0.99396291504959033, 0.99525657611039242, 0.99568779646399308,
        0.99568779646399308, 0.99611901681759374, 0.99655023717119451,
        0.99655023717119451, 0.99655023717119451, 0.99655023717119451,
        0.99655023717119451, 0.99655023717119451, 0.99655023717119451,
        0.99655023717119451, 0.99655023717119451
    ]
    expected_f1 = [
        0.40969896551479879, 0.40872515566619289, 0.42494674577211028,
        0.41795013553355859, 0.41402807045377227, 0.40755506280680126,
        0.40304176773376232, 0.39634842720545965, 0.39155915552074971,
        0.38766523517736751, 0.38402205223880148, 0.37854826039224554,
        0.37434429038914058, 0.37108071816768462, 0.36996547420680476,
        0.36801458947276205, 0.36727323996045197, 0.36622750241189023,
        0.36561611066990368, 0.36516946998665389, 0.36510160971503941,
        0.36461007637070081, 0.36450029640432352, 0.36442385338288585,
        0.3643920355519879, 0.36439191983486152
    ]

    #perform classification using NaiveBayes on houseA
    dataset = load_dataset(houseA_csv, houseA_config)
    cls = NaiveBayesClassifier(dataset.features, dataset.target_names)
    cls = cls.fit(dataset.data, dataset.target)
    results = cls.predict(dataset.data)
    metrics = QualityMetricsCalculator(dataset.target, results).calculate()

    assert_almost_equal(metrics["Precision"].values,
                        expected_precision,
                        decimal=3)
    assert_almost_equal(metrics["Recall"].values, expected_recall, decimal=3)
    assert_almost_equal(metrics["F1"].values, expected_f1, decimal=3)

Example #6

0

Show file

File: test_temporal.py Project: krasch/smart-assistants

def test_train():
    """
    Test that the classifier correctly extracts all observations from the test dataset.
    """
    #train the classifier
    data = load_dataset(data_file)
    cls = TemporalEvidencesClassifier(data.features, data.target_names)
    cls = cls.fit(data.data, data.target)

    #load expected sources and their observations from json file
    expected_sources = sources_from_json(sources_file)

    #compare expected with actual sources
    assert_array_equal(sorted(cls.sources.keys()), sorted(expected_sources.keys()),)
    for name in expected_sources.keys():
        assert_source_equal(cls.sources[name], expected_sources[name])

Example #7

0

Show file

File: test_temporal.py Project: krasch/smart-assistants

def test_recommend():
    """
    Test that the classifier generates the correct recommendations for the test dataset.
    """

    #train the classifier and calculate recommendations
    data = load_dataset(data_file)
    cls = TemporalEvidencesClassifier(data.features, data.target_names)
    cls = cls.fit(data.data, data.target)
    actual_recommendations = cls.predict(data.data, include_conflict_theta=True)

    #load expected results from json file
    with open(recommendations_file, 'r') as infile:
        expected_recommendations = json.load(infile)

    #compare expected with actual results
    for actual, expected in zip(actual_recommendations, expected_recommendations):
        assert_recommendations_equal(actual, expected)

Example #8

0

Show file

def test_train():
    """
    Test that the classifier correctly extracts all observations from the test dataset.
    """
    #train the classifier
    data = load_dataset(data_file)
    cls = TemporalEvidencesClassifier(data.features, data.target_names)
    cls = cls.fit(data.data, data.target)

    #load expected sources and their observations from json file
    expected_sources = sources_from_json(sources_file)

    #compare expected with actual sources
    assert_array_equal(
        sorted(cls.sources.keys()),
        sorted(expected_sources.keys()),
    )
    for name in expected_sources.keys():
        assert_source_equal(cls.sources[name], expected_sources[name])

Example #9

0

Show file

def test_recommend():
    """
    Test that the classifier generates the correct recommendations for the test dataset.
    """

    #train the classifier and calculate recommendations
    data = load_dataset(data_file)
    cls = TemporalEvidencesClassifier(data.features, data.target_names)
    cls = cls.fit(data.data, data.target)
    actual_recommendations = cls.predict(data.data,
                                         include_conflict_theta=True)

    #load expected results from json file
    with open(recommendations_file, 'r') as infile:
        expected_recommendations = json.load(infile)

    #compare expected with actual results
    for actual, expected in zip(actual_recommendations,
                                expected_recommendations):
        assert_recommendations_equal(actual, expected)

Example #10

0

Show file

File: test_bayes.py Project: krasch/smart-assistants

def test_houseA():
    """
    Check if recommendation results of the Naive Bayes classifier are as expected for the houseA dataset.
    """

    expected_precision = [0.36389964907400657, 0.35097921410756322, 0.33726522147404076, 0.32783328310438886,
                          0.32350609998195629, 0.31650421215426117, 0.31231855510715401, 0.30519493826462696,
                          0.29979871087041599, 0.29551593405461341, 0.29150527293056733, 0.28660658398334166,
                          0.28251775629715375, 0.27922900853036048, 0.27801361790520279, 0.27550796383161991,
                          0.27466099306467773, 0.27342926821355851, 0.27273588164873097, 0.27221800916720873,
                          0.2721689921161986, 0.27166460270134835, 0.2715840524866433, 0.27152878300894606,
                          0.27150502624152628, 0.27150496777883093]
    expected_recall = [0.50280293229840445, 0.62440707201379908, 0.72229409228115571, 0.78654592496765852,
                       0.84260457093574814, 0.87365243639499779, 0.90426908150064678, 0.93014230271668819,
                       0.95170332039672267, 0.96593359206554552, 0.97714532125916342, 0.98835705045278133,
                       0.99396291504959033, 0.99525657611039242, 0.99568779646399308, 0.99568779646399308,
                       0.99611901681759374, 0.99655023717119451, 0.99655023717119451, 0.99655023717119451,
                       0.99655023717119451, 0.99655023717119451, 0.99655023717119451, 0.99655023717119451,
                       0.99655023717119451, 0.99655023717119451]
    expected_f1 = [0.40969896551479879, 0.40872515566619289, 0.42494674577211028, 0.41795013553355859,
                   0.41402807045377227, 0.40755506280680126, 0.40304176773376232, 0.39634842720545965,
                   0.39155915552074971, 0.38766523517736751, 0.38402205223880148, 0.37854826039224554,
                   0.37434429038914058, 0.37108071816768462, 0.36996547420680476, 0.36801458947276205,
                   0.36727323996045197, 0.36622750241189023, 0.36561611066990368, 0.36516946998665389,
                   0.36510160971503941, 0.36461007637070081, 0.36450029640432352, 0.36442385338288585,
                   0.3643920355519879, 0.36439191983486152]

    #perform classification using NaiveBayes on houseA
    dataset = load_dataset(houseA_csv, houseA_config)
    cls = NaiveBayesClassifier(dataset.features, dataset.target_names)
    cls = cls.fit(dataset.data, dataset.target)
    results = cls.predict(dataset.data)
    metrics = QualityMetricsCalculator(dataset.target, results).calculate()

    assert_almost_equal(metrics["Precision"].values, expected_precision, decimal=3)
    assert_almost_equal(metrics["Recall"].values, expected_recall, decimal=3)
    assert_almost_equal(metrics["F1"].values, expected_f1, decimal=3)

Example #11

0

Show file

File: conflict_uncertainty.py Project: krasch/smart-assistants

"""

import sys
sys.path.append("..") 

import pandas

from recsys.classifiers.temporal import TemporalEvidencesClassifier
from recsys.dataset import load_dataset
from evaluation.metrics import results_as_dataframe
from evaluation import plot
import config


#configuration
data = load_dataset("../datasets/houseA.csv", "../datasets/houseA.config")

#run the classifier on the whole dataset
cls = TemporalEvidencesClassifier(data.features, data.target_names)
cls = cls.fit(data.data, data.target)
results = cls.predict(data.data, include_conflict_theta=True)

#extract conflict and uncertainty and convert recommendations to pandas representation
recommendations, conflict, uncertainty = zip(*results)
results = results_as_dataframe(data.target, list(recommendations))

#for each row, mark correct recommendations with "1", false recommendations with "0"
find_matches_in_row = lambda row: [1 if col == row.name else 0 for col in row]
results = results.apply(find_matches_in_row, axis=1)

#set uncertainty and conflict as multi-index

Example #12

0

Show file

Further details for this experiment can be found in the paper in Section 6.6 and the dissertation in Section 5.5.7
"""

import sys
sys.path.append("..") 

import pandas

from evaluation.experiment import Experiment
from evaluation.metrics import quality_metrics
from recsys.classifiers.temporal import TemporalEvidencesClassifier, configure_dynamic_cutoff
from recsys.dataset import load_dataset


#configuration
data = load_dataset("../datasets/houseA.csv", "../datasets/houseA.config")
methods_to_test = [("Fixed cutoff", None),
                   ("dynamic cutoff=4", configure_dynamic_cutoff(1.0, 0.4, 4)),
                   ("dynamic cutoff=2", configure_dynamic_cutoff(1.0, 0.4, 2))]

#run all configured cutoffs with 10-fold cross-validation
experiment = Experiment(data)
for name, method in methods_to_test:
    experiment.add_classifier(TemporalEvidencesClassifier(data.features, data.target_names,
                              postprocess=method), name=name)
results = experiment.run(folds=10)

#print results
pandas.set_option('expand_frame_repr', False)
pandas.set_option('max_columns', 4)
print "Maximum 5 recommendations"