Beispiel #1
0
    def test_basic(self, triples):
        def imp_func(var, val1, val2):
            m = {"eight": 8, "nine": 9}
            # Make sure implication is asymmetric
            diff = m[val1] - m[val2]
            if diff > 0:
                return np.exp(-0.5 * diff * diff)
            return 0.4

        imp_data = Dataset(triples, implication_function=imp_func)
        n = 50
        t_0 = 0.4
        gamma = 0.5
        ro = 0.25

        trust = [t_0, t_0, t_0]
        belief = [0] * 4

        for _ in range(n):
            tau = [
                -np.log(1 - trust[0]), -np.log(1 - trust[1]),
                -np.log(1 - trust[2])
            ]
            sigma = [tau[0] + tau[1], tau[0], tau[1], tau[0] + tau[2]]
            sigma_star = [
                sigma[0], sigma[1] + ro * sigma[2] * 0.4,
                sigma[2] + ro * sigma[1] * np.exp(-0.5), sigma[3]
            ]
            belief = [
                1 / (1 + np.exp(-gamma * sigma_star[0])),
                1 / (1 + np.exp(-gamma * sigma_star[1])),
                1 / (1 + np.exp(-gamma * sigma_star[2])),
                1 / (1 + np.exp(-gamma * sigma_star[3]))
            ]
            trust = [(belief[0] + belief[1] + belief[3]) / 3,
                     (belief[0] + belief[2]) / 2, belief[3]]

        truthfinder = TruthFinder(dampening_factor=gamma,
                                  influence_param=ro,
                                  initial_trust=t_0,
                                  iterator=FixedIterator(n))
        results = truthfinder.run(imp_data)

        assert np.isclose(results.trust["s1"], trust[0])
        assert np.isclose(results.trust["s2"], trust[1])
        assert np.isclose(results.trust["s3"], trust[2])

        assert set(results.belief["x"].keys()) == {"one"}
        assert set(results.belief["y"].keys()) == {"eight", "nine"}
        assert set(results.belief["z"].keys()) == {"seven"}

        assert np.isclose(results.belief["x"]["one"], belief[0])
        assert np.isclose(results.belief["y"]["nine"], belief[1])
        assert np.isclose(results.belief["y"]["eight"], belief[2])
        assert np.isclose(results.belief["z"]["seven"], belief[3])
Beispiel #2
0
    def test_trust_invalid(self):
        """
        In theory trust scores cannot be 1 for any source. In practise scores
        get so close to 1 that they are rounded to 1, which causes problems
        when we do log(1 - trust).

        This test checks that iteration stops in this case
        """
        data = MatrixDataset(
            np.array([[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3]]))
        it = FixedIterator(100)
        alg = TruthFinder(iterator=it)
        res = alg.run(data)
        # Iteration should stop after only 7 iterations, instead of 100
        assert it.it_count == 7
        assert res.iterations == 7
Beispiel #3
0
    def test_no_implications(self, data):
        """
        Perform the same run as above, but do not bother with implications
        between claims. This is to check that implications are ignored if no
        implication function is given
        """
        n = 50
        t_0 = 0.4
        gamma = 0.5

        trust = [t_0, t_0, t_0]
        belief = [0] * 4

        for _ in range(n):
            tau = [
                -np.log(1 - trust[0]), -np.log(1 - trust[1]),
                -np.log(1 - trust[2])
            ]
            sigma = [tau[0] + tau[1], tau[0], tau[1], tau[0] + tau[2]]
            belief = [
                1 / (1 + np.exp(-gamma * sigma[0])),
                1 / (1 + np.exp(-gamma * sigma[1])),
                1 / (1 + np.exp(-gamma * sigma[2])),
                1 / (1 + np.exp(-gamma * sigma[3]))
            ]
            trust = [(belief[0] + belief[1] + belief[3]) / 3,
                     (belief[0] + belief[2]) / 2, belief[3]]

        truthfinder = TruthFinder(dampening_factor=gamma,
                                  initial_trust=t_0,
                                  iterator=FixedIterator(n))
        results = truthfinder.run(data)

        assert np.isclose(results.trust["s1"], trust[0])
        assert np.isclose(results.trust["s2"], trust[1])
        assert np.isclose(results.trust["s3"], trust[2])

        assert set(results.belief["x"].keys()) == {"one"}
        assert set(results.belief["y"].keys()) == {"eight", "nine"}
        assert set(results.belief["z"].keys()) == {"seven"}

        assert np.isclose(results.belief["x"]["one"], belief[0])
        assert np.isclose(results.belief["y"]["nine"], belief[1])
        assert np.isclose(results.belief["y"]["eight"], belief[2])
        assert np.isclose(results.belief["z"]["seven"], belief[3])
Beispiel #4
0
 def test_get_parameter_names(self):
     assert MajorityVoting.get_parameter_names() == set([])
     assert PooledInvestment.get_parameter_names() == {
         "priors", "iterator", "g"
     }
     assert TruthFinder.get_parameter_names() == {
         "priors", "iterator", "influence_param", "dampening_factor",
         "initial_trust"
     }
Beispiel #5
0
    def test_truthfinder(self, data):
        it = ConvergenceIterator(DistanceMeasures.COSINE, 0.001)
        truthfinder = TruthFinder(iterator=it)

        def imp(var, val1, val2):
            diff = val1 - val2
            return np.exp(-0.5 * diff**2)

        data = MatrixDataset(data.sv, implication_function=imp)
        self.check_results(truthfinder, data, "truthfinder_results.json")
import sys

import numpy as np
import matplotlib.pyplot as plt

from truthdiscovery.input import SyntheticData
from truthdiscovery.algorithm import (AverageLog, Investment, MajorityVoting,
                                      PooledInvestment, Sums, TruthFinder)

ALGORITHMS = OrderedDict({
    "Voting": MajorityVoting(),
    "Sums": Sums(),
    "Average.Log": AverageLog(),
    "Investment": Investment(),
    "Pooled Investment": PooledInvestment(),
    "TruthFinder": TruthFinder()
})


class Experiment:
    # labels for values of independent variable
    labels = None
    # dict mapping algorithm labels to objects
    algorithms = None
    # number of trials to perform for each value
    reps = 10
    # parameters to pass to synthetic data generation. Value for independent
    # variable should be an iterable of values of callables
    synth_params = None

    def __init__(self):
def main():
    # Show usage
    if len(sys.argv) > 1 and sys.argv[1] in ("-h", "--help"):
        usage()
        return

    dataset = None
    sup = None

    # Unpickle dataset from a file if only one argument given
    if len(sys.argv) == 2:
        print("unpickling data...")
        start = time.time()
        with open(sys.argv[1], "rb") as pickle_file:
            sup = pickle.load(pickle_file)
        end = time.time()
        print("  unpickled in {:.3f} seconds".format(end - start))
        dataset = sup.data

    elif len(sys.argv) == 3:
        data_path, truth_path = sys.argv[1:]
        print("loading data...")
        start = time.time()
        dataset = StockDataset(data_path)
        end = time.time()
        print("  loaded in {:.3f} seconds".format(end - start))

        print("loading true values...")
        start = time.time()
        sup = SupervisedStockData(dataset, truth_path)
        end = time.time()
        print("  loaded in {:.3f} seconds".format(end - start))

        pickle_path = "/tmp/stock_data.pickle"
        with open(pickle_path, "wb") as pickle_file:
            pickle.dump(sup, pickle_file)
        print("pickled to {}".format(pickle_path))

    else:
        usage(sys.stderr)
        sys.exit(1)

    print("")
    print("dataset has {} sources, {} claims, {} variables".format(
        dataset.num_sources, dataset.num_claims, dataset.num_variables
    ))

    start = time.time()
    print("calculating connected components...")
    components = dataset.num_connected_components()
    end = time.time()
    print("  calculated in {:.3f} seconds: {} components".format(
        end - start, components
    ))

    algorithms = [
        MajorityVoting(), Sums(), AverageLog(), Investment(),
        PooledInvestment(), TruthFinder()
    ]

    for alg in algorithms:
        print("running {}...".format(alg.__class__.__name__))
        res = alg.run(sup.data)
        acc = sup.get_accuracy(res)
        print("  {:.3f} seconds, {:.3f} accuracy".format(res.time_taken, acc))