def test_num_iterations(self): data = Dataset([("source 1", "x", 7), ("source 2", "x", 8)]) voting_res = MajorityVoting().run(data) assert voting_res.iterations is None sums_res = Sums(iterator=FixedIterator(13)).run(data) assert sums_res.iterations == 13
def test_basic(self, data): voting = MajorityVoting() results = voting.run(data) assert results.trust == {"s1": 1, "s2": 1, "s3": 1} assert results.belief == { "x": { "one": 1 }, "y": { "nine": 0.5, "eight": 0.5 }, "z": { "seven": 1 } }
def test_basic(self, example_cls, file_contents, tmpdir): input_file = tmpdir.join("test_input.dataset") input_file.write(file_contents) dataset = example_cls(str(input_file)) # Claims should be: # 0: xyz = 85 # 1: xyz = 7 # 2: XYZ = 15 # 3: XYZ = 13 expected_sc = np.array([[1, 0, 1, 0], [0, 1, 0, 1], [0, 0, 1, 0]]) assert np.array_equal(dataset.sc.toarray(), expected_sc) # Use voting algorithm to get results, and check they are as expected res = MajorityVoting().run(dataset) assert res.trust == {"source abc": 1, "source def": 1, "source ghi": 1} assert res.belief == { "var xyz": { 85: 0.5, 7: 0.5 }, "var XYZ": { 15: 1, 13: 0.5 } }
def test_get_parameter_names(self): assert MajorityVoting.get_parameter_names() == set([]) assert PooledInvestment.get_parameter_names() == { "priors", "iterator", "g" } assert TruthFinder.get_parameter_names() == { "priors", "iterator", "influence_param", "dampening_factor", "initial_trust" }
def test_time_taken(self): """ Test run time in Result objects for iterative and non-iterative algorithms """ data = Dataset([("source 1", "x", 7), ("source 2", "x", 8)]) res = MajorityVoting().run(data) assert res.time_taken == 5 res = Sums().run(data) assert res.time_taken == 5
def test_empty_dataset(self): data = Dataset([]) non_it = MajorityVoting() it = Sums() for alg in [non_it, it]: with pytest.raises(EmptyDatasetError) as excinfo: alg.run(data) err_msg = "Cannot run algorithm on empty dataset" assert str(excinfo.value) == err_msg # Test with run_iter also with pytest.raises(EmptyDatasetError) as excinfo2: _l = list(it.run_iter(data)) assert str(excinfo2.value) == err_msg
synthetic datasets, and graphing results """ from collections import OrderedDict import itertools import json import sys import numpy as np import matplotlib.pyplot as plt from truthdiscovery.input import SyntheticData from truthdiscovery.algorithm import (AverageLog, Investment, MajorityVoting, PooledInvestment, Sums, TruthFinder) ALGORITHMS = OrderedDict({ "Voting": MajorityVoting(), "Sums": Sums(), "Average.Log": AverageLog(), "Investment": Investment(), "Pooled Investment": PooledInvestment(), "TruthFinder": TruthFinder() }) class Experiment: # labels for values of independent variable labels = None # dict mapping algorithm labels to objects algorithms = None # number of trials to perform for each value reps = 10
def test_voting(self, data): voting = MajorityVoting() self.check_results(voting, data, "voting_results.json")
def main(): # Show usage if len(sys.argv) > 1 and sys.argv[1] in ("-h", "--help"): usage() return dataset = None sup = None # Unpickle dataset from a file if only one argument given if len(sys.argv) == 2: print("unpickling data...") start = time.time() with open(sys.argv[1], "rb") as pickle_file: sup = pickle.load(pickle_file) end = time.time() print(" unpickled in {:.3f} seconds".format(end - start)) dataset = sup.data elif len(sys.argv) == 3: data_path, truth_path = sys.argv[1:] print("loading data...") start = time.time() dataset = StockDataset(data_path) end = time.time() print(" loaded in {:.3f} seconds".format(end - start)) print("loading true values...") start = time.time() sup = SupervisedStockData(dataset, truth_path) end = time.time() print(" loaded in {:.3f} seconds".format(end - start)) pickle_path = "/tmp/stock_data.pickle" with open(pickle_path, "wb") as pickle_file: pickle.dump(sup, pickle_file) print("pickled to {}".format(pickle_path)) else: usage(sys.stderr) sys.exit(1) print("") print("dataset has {} sources, {} claims, {} variables".format( dataset.num_sources, dataset.num_claims, dataset.num_variables )) start = time.time() print("calculating connected components...") components = dataset.num_connected_components() end = time.time() print(" calculated in {:.3f} seconds: {} components".format( end - start, components )) algorithms = [ MajorityVoting(), Sums(), AverageLog(), Investment(), PooledInvestment(), TruthFinder() ] for alg in algorithms: print("running {}...".format(alg.__class__.__name__)) res = alg.run(sup.data) acc = sup.get_accuracy(res) print(" {:.3f} seconds, {:.3f} accuracy".format(res.time_taken, acc))