コード例 #1
0
    def test_num_iterations(self):
        data = Dataset([("source 1", "x", 7), ("source 2", "x", 8)])
        voting_res = MajorityVoting().run(data)
        assert voting_res.iterations is None

        sums_res = Sums(iterator=FixedIterator(13)).run(data)
        assert sums_res.iterations == 13
コード例 #2
0
 def test_basic(self, data):
     voting = MajorityVoting()
     results = voting.run(data)
     assert results.trust == {"s1": 1, "s2": 1, "s3": 1}
     assert results.belief == {
         "x": {
             "one": 1
         },
         "y": {
             "nine": 0.5,
             "eight": 0.5
         },
         "z": {
             "seven": 1
         }
     }
コード例 #3
0
    def test_basic(self, example_cls, file_contents, tmpdir):
        input_file = tmpdir.join("test_input.dataset")
        input_file.write(file_contents)
        dataset = example_cls(str(input_file))
        # Claims should be:
        # 0: xyz = 85
        # 1: xyz = 7
        # 2: XYZ = 15
        # 3: XYZ = 13
        expected_sc = np.array([[1, 0, 1, 0], [0, 1, 0, 1], [0, 0, 1, 0]])
        assert np.array_equal(dataset.sc.toarray(), expected_sc)

        # Use voting algorithm to get results, and check they are as expected
        res = MajorityVoting().run(dataset)

        assert res.trust == {"source abc": 1, "source def": 1, "source ghi": 1}
        assert res.belief == {
            "var xyz": {
                85: 0.5,
                7: 0.5
            },
            "var XYZ": {
                15: 1,
                13: 0.5
            }
        }
コード例 #4
0
 def test_get_parameter_names(self):
     assert MajorityVoting.get_parameter_names() == set([])
     assert PooledInvestment.get_parameter_names() == {
         "priors", "iterator", "g"
     }
     assert TruthFinder.get_parameter_names() == {
         "priors", "iterator", "influence_param", "dampening_factor",
         "initial_trust"
     }
コード例 #5
0
 def test_time_taken(self):
     """
     Test run time in Result objects for iterative and non-iterative
     algorithms
     """
     data = Dataset([("source 1", "x", 7), ("source 2", "x", 8)])
     res = MajorityVoting().run(data)
     assert res.time_taken == 5
     res = Sums().run(data)
     assert res.time_taken == 5
コード例 #6
0
    def test_empty_dataset(self):
        data = Dataset([])
        non_it = MajorityVoting()
        it = Sums()
        for alg in [non_it, it]:
            with pytest.raises(EmptyDatasetError) as excinfo:
                alg.run(data)
        err_msg = "Cannot run algorithm on empty dataset"
        assert str(excinfo.value) == err_msg

        # Test with run_iter also
        with pytest.raises(EmptyDatasetError) as excinfo2:
            _l = list(it.run_iter(data))
        assert str(excinfo2.value) == err_msg
コード例 #7
0
synthetic datasets, and graphing results
"""
from collections import OrderedDict
import itertools
import json
import sys

import numpy as np
import matplotlib.pyplot as plt

from truthdiscovery.input import SyntheticData
from truthdiscovery.algorithm import (AverageLog, Investment, MajorityVoting,
                                      PooledInvestment, Sums, TruthFinder)

ALGORITHMS = OrderedDict({
    "Voting": MajorityVoting(),
    "Sums": Sums(),
    "Average.Log": AverageLog(),
    "Investment": Investment(),
    "Pooled Investment": PooledInvestment(),
    "TruthFinder": TruthFinder()
})


class Experiment:
    # labels for values of independent variable
    labels = None
    # dict mapping algorithm labels to objects
    algorithms = None
    # number of trials to perform for each value
    reps = 10
コード例 #8
0
 def test_voting(self, data):
     voting = MajorityVoting()
     self.check_results(voting, data, "voting_results.json")
コード例 #9
0
def main():
    # Show usage
    if len(sys.argv) > 1 and sys.argv[1] in ("-h", "--help"):
        usage()
        return

    dataset = None
    sup = None

    # Unpickle dataset from a file if only one argument given
    if len(sys.argv) == 2:
        print("unpickling data...")
        start = time.time()
        with open(sys.argv[1], "rb") as pickle_file:
            sup = pickle.load(pickle_file)
        end = time.time()
        print("  unpickled in {:.3f} seconds".format(end - start))
        dataset = sup.data

    elif len(sys.argv) == 3:
        data_path, truth_path = sys.argv[1:]
        print("loading data...")
        start = time.time()
        dataset = StockDataset(data_path)
        end = time.time()
        print("  loaded in {:.3f} seconds".format(end - start))

        print("loading true values...")
        start = time.time()
        sup = SupervisedStockData(dataset, truth_path)
        end = time.time()
        print("  loaded in {:.3f} seconds".format(end - start))

        pickle_path = "/tmp/stock_data.pickle"
        with open(pickle_path, "wb") as pickle_file:
            pickle.dump(sup, pickle_file)
        print("pickled to {}".format(pickle_path))

    else:
        usage(sys.stderr)
        sys.exit(1)

    print("")
    print("dataset has {} sources, {} claims, {} variables".format(
        dataset.num_sources, dataset.num_claims, dataset.num_variables
    ))

    start = time.time()
    print("calculating connected components...")
    components = dataset.num_connected_components()
    end = time.time()
    print("  calculated in {:.3f} seconds: {} components".format(
        end - start, components
    ))

    algorithms = [
        MajorityVoting(), Sums(), AverageLog(), Investment(),
        PooledInvestment(), TruthFinder()
    ]

    for alg in algorithms:
        print("running {}...".format(alg.__class__.__name__))
        res = alg.run(sup.data)
        acc = sup.get_accuracy(res)
        print("  {:.3f} seconds, {:.3f} accuracy".format(res.time_taken, acc))