def testResolveQuestion(self):
    # First check that we get nothing back when we test with an empty model:
    cm = confusion_matrices.ConfusionMatrices()
    cm.priors = {}
    cm.confusion_matrices = {}
    resolution_map = cm.ResolveQuestion(test_util.IPEIROTIS_RESPONSES[0])
    test_util.AssertMapsAlmostEqual(self, {}, resolution_map)

    # Use data from the solution to the Ipeirotis example:
    cm = confusion_matrices.ConfusionMatrices()
    cm.priors = IPEIROTIS_MLE_PRIORS
    cm.confusion_matrices = IPEIROTIS_MLE_CM
    for i in range(len(test_util.IPEIROTIS_DATA)):
      resolution_map = cm.ResolveQuestion(test_util.IPEIROTIS_RESPONSES[i])
      test_util.AssertMapsAlmostEqual(self,
                                      test_util.IPEIROTIS_ALL_ANSWERS[i],
                                      resolution_map,
                                      label='question ' + str(i) + ', answer')
    # And again for the Dawid & Skene example:
    cm.priors = DS_MLE_PRIORS
    cm.confusion_matrices = DS_MLE_CM
    for i in range(len(test_util.DS_DATA)):
      resolution_map = cm.ResolveQuestion(test_util.DS_RESPONSES[i])
      test_util.AssertMapsAlmostEqual(self,
                                      test_util.DS_EM_CM_RESOLUTIONS[i],
                                      resolution_map,
                                      label='question ' + str(i) + ', answer')
 def testMLEResolutionPriors(self):
   # First check that we get an agnostic prior when we have no resolutions:
   test_util.AssertMapsAlmostEqual(
       self,
       {},
       confusion_matrices.MLEResolutionPriors(test_util.DS_DATA),
       label='answer')
   # Now check that we get an agnostic prior when we have one resolution of
   # each of two answers:
   test_util.AssertMapsAlmostEqual(
       self,
       {'notporn': 0.5, 'p**n': 0.5},
       confusion_matrices.MLEResolutionPriors(test_util.IPEIROTIS_DATA),
       label='answer')
   # Now check that we get the correct prior when we do have resolutions:
   test_util.AssertMapsAlmostEqual(
       self,
       IPEIROTIS_MLE_PRIORS,
       confusion_matrices.MLEResolutionPriors(test_util.IPEIROTIS_DATA_FINAL),
       label='answer')
   # And again for the Dawid & Skene example:
   test_util.AssertMapsAlmostEqual(
       self,
       DS_MLE_PRIORS,
       confusion_matrices.MLEResolutionPriors(test_util.DS_DATA_FINAL),
       label='answer')
   # Check that the weighted test data gives the same results as the original:
   test_util.AssertMapsAlmostEqual(
       self,
       DS_MLE_PRIORS,
       confusion_matrices.MLEResolutionPriors(
           test_util.DS_DATA_EXTRA,
           question_weights=test_util.DS_EXTRA_WEIGHTS),
       label='answer')
  def testMutualInformation(self):
    # Use data from the solution to the Ipeirotis example:
    cm = confusion_matrices.ConfusionMatrices()
    cm.priors = IPEIROTIS_MLE_PRIORS
    cm.confusion_matrices = IPEIROTIS_MLE_CM

    # First we'll test for a first judgment:
    expected = {'worker1': 0.0,
                'worker2': 0.419973,
                'worker3': 0.970951,
                'worker4': 0.970951,
                'worker5': 0.970951}
    result = {}
    for contributor in expected:
      result[contributor] = cm.MutualInformation(contributor)
    test_util.AssertMapsAlmostEqual(self, expected, result, label='contributor')

    # Now we'll test for a second judgment:
    previous_responses = [('worker2', 'p**n', {})]
    expected = {'worker1': 0.0,
                'worker2': 0.4581059,
                'worker3': 0.9182958,
                'worker4': 0.9182958,
                'worker5': 0.9182958}
    result = {}
    for contributor in expected:
      result[contributor] = cm.MutualInformation(
          contributor, previous_responses=previous_responses)
    test_util.AssertMapsAlmostEqual(self, expected, result, label='contributor')

    # However, if the first judgment was given by a perfect contributor (for
    # example, worker3), then no second judgment can give any more information:
    previous_responses = [('worker3', 'notporn', {})]
    self.assertAlmostEqual(0.0, cm.MutualInformation(
        'worker2', previous_responses=previous_responses))
 def testResolveQuestion(self):
     gc = gaussian_contributors.GaussianContributors()
     gc.bias = {'worker1': -0.5, 'worker2': 0.8}
     gc.precision = {'worker1': 1.0 / 0.3, 'worker2': 1.0 / 0.2}
     expected = {
         gaussian_contributors.MEAN: 2.32,
         gaussian_contributors.VARIANCE: 0.12
     }
     resolution_map = gc.ResolveQuestion([('worker1', 2.0, {}),
                                          ('worker2', 3.0, {})])
     test_util.AssertMapsAlmostEqual(self,
                                     expected,
                                     resolution_map,
                                     label='variable')
     # Now try with infinite-precision contributors:
     gc.bias = {'worker1': 1.0, 'worker2': 2.0, 'worker3': 3.0}
     gc.precision = {
         'worker1': 1.0,
         'worker2': gaussian_contributors.INFINITY,
         'worker3': gaussian_contributors.INFINITY
     }
     expected = {
         gaussian_contributors.MEAN: -2.5,
         gaussian_contributors.VARIANCE: 0.25
     }
     resolution_map = gc.ResolveQuestion([('worker1', 0.0, {}),
                                          ('worker2', 0.0, {}),
                                          ('worker3', 0.0, {})])
     test_util.AssertMapsAlmostEqual(self,
                                     expected,
                                     resolution_map,
                                     label='variable')
     # Check that non-numeric judgments cause a TypeError:
     self.assertRaises(TypeError, gc.ResolveQuestion,
                       [('worker1', 'WTF', {})])
Beispiel #5
0
    def testResolveQuestion(self):
        decision_tree_model = decision_tree.DecisionTree()
        decision_tree_model.SetMLEParameters(TEST_DATA)

        # Using the MLE model parameters we know from above, we worked out the
        # following resolutions by hand:
        responses = [('c1', ('ABO', 'A'), {})]
        expected = {('ABO', ): 0.8, ('Rh', ): 0.2}
        expected[('ABO', 'A')] = expected[('ABO', )] * 0.4
        expected[('ABO', 'B')] = expected[('ABO', )] * 0.4
        expected[('ABO', 'O')] = expected[('ABO', )] * 0.2
        expected[('Rh', '+')] = expected[('Rh', )] * 0.6 / 1.4
        expected[('Rh', '-')] = expected[('Rh', )] * 0.8 / 1.4
        result = decision_tree_model.ResolveQuestion(responses)
        test_util.AssertMapsAlmostEqual(self, expected, result)

        responses = [('c2', ('Rh', '-'), {})]
        expected = {('ABO', ): 0.5, ('Rh', ): 0.5}
        expected[('ABO', 'A')] = expected[('ABO', )] * 0.4
        expected[('ABO', 'B')] = expected[('ABO', )] * 0.4
        expected[('ABO', 'O')] = expected[('ABO', )] * 0.2
        expected[('Rh', '+')] = expected[('Rh', )] * 1.0 / 3.0
        expected[('Rh', '-')] = expected[('Rh', )] * 2.0 / 3.0
        result = decision_tree_model.ResolveQuestion(responses)
        test_util.AssertMapsAlmostEqual(self, expected, result)
 def testMLEGaussianParameters(self):
     result_mle_bias, result_mle_precision = (
         gaussian_contributors.MLEGaussianParameters(TEST_STATISTICS))
     test_util.AssertMapsAlmostEqual(self,
                                     TEST_MLE_BIAS,
                                     result_mle_bias,
                                     label='MLE bias, contributor')
     test_util.AssertMapsAlmostEqual(self,
                                     TEST_MLE_PRECISION,
                                     result_mle_precision,
                                     label='MLE precision, contributor')
 def testVariationalGaussianParameters(self):
     result_mle_bias, result_variational_precision = (
         gaussian_contributors.VariationalGaussianParameters(
             TEST_STATISTICS))
     test_util.AssertMapsAlmostEqual(self,
                                     TEST_MLE_BIAS,
                                     result_mle_bias,
                                     label='MLE bias, contributor')
     test_util.AssertMapsAlmostEqual(
         self,
         TEST_VARIATIONAL_PRECISION,
         result_variational_precision,
         label='variational precision, contributor')
 def testVariationalResolutionPriors(self):
   # Test with the Ipeirotis data:
   test_util.AssertMapsAlmostEqual(
       self,
       IPEIROTIS_VARIATIONAL_PRIORS,
       confusion_matrices.VariationalResolutionPriors(
           IPEIROTIS_PRIORS_DIRICHLET),
       label='answer')
   # And again with the Dawid & Skene data:
   test_util.AssertMapsAlmostEqual(
       self,
       DS_VARIATIONAL_PRIORS,
       confusion_matrices.VariationalResolutionPriors(DS_PRIORS_DIRICHLET),
       label='answer')
 def testResolutionPriorsDirichletParameters(self):
   # Check the Dirichlet alpha vector for the Ipeirotis data:
   result = confusion_matrices.ResolutionPriorsDirichletParameters(
       test_util.IPEIROTIS_DATA_FINAL)
   test_util.AssertMapsAlmostEqual(
       self, IPEIROTIS_PRIORS_DIRICHLET, result, label='answer')
   # And for the Dawid & Skene data:
   result = confusion_matrices.ResolutionPriorsDirichletParameters(
       test_util.DS_DATA_FINAL)
   test_util.AssertMapsAlmostEqual(
       self, DS_PRIORS_DIRICHLET, result, label='answer')
   # Check that the weighted test data gives the same results as the original:
   result = confusion_matrices.ResolutionPriorsDirichletParameters(
       test_util.DS_DATA_EXTRA, question_weights=test_util.DS_EXTRA_WEIGHTS)
   test_util.AssertMapsAlmostEqual(self, DS_PRIORS_DIRICHLET, result,
                                   label='answer')
 def testSetMLEParameters(self):
   # Use data from the solution to the Ipeirotis example:
   cm = confusion_matrices.ConfusionMatrices()
   cm.SetMLEParameters(test_util.IPEIROTIS_DATA_FINAL)
   # Check that cm.priors was set correctly:
   test_util.AssertMapsAlmostEqual(self, IPEIROTIS_MLE_PRIORS, cm.priors,
                                   label='answer')
   # Check that cm.confusion_matrices was set correctly:
   test_util.AssertConfusionMatricesAlmostEqual(self,
                                                IPEIROTIS_MLE_CM,
                                                cm.confusion_matrices)
  def testSampleResolutionPriors(self):
    # Seed the random number generator to produce deterministic test results:
    numpy.random.seed(0)

    # We also need the Dirichlet parameter vectors to have fixed order:
    ipeirotis_priors_dirichlet_ordered = collections.OrderedDict(
        sorted(IPEIROTIS_PRIORS_DIRICHLET.iteritems()))
    ds_priors_dirichlet_ordered = collections.OrderedDict(
        sorted(DS_PRIORS_DIRICHLET.iteritems()))

    # Check that a set of randomly-sampled priors sums to unity:
    self.assertAlmostEqual(
        1.0,
        sum(confusion_matrices.SampleResolutionPriors(
            ipeirotis_priors_dirichlet_ordered).itervalues()))
    self.assertAlmostEqual(
        1.0,
        sum(confusion_matrices.SampleResolutionPriors(
            ds_priors_dirichlet_ordered).itervalues()))

    # Check that the mean of 50000 samples is close to the actual mean of the
    # Dirichlet distribution, which is the normalized alpha vector:
    alpha = DS_PRIORS_DIRICHLET
    norm = sum(alpha.itervalues())
    expected_mean = dict([(answer, alpha[answer] / norm) for answer in alpha])
    samples = [confusion_matrices.SampleResolutionPriors(alpha)
               for _ in range(50000)]
    mean = {}
    for answer in alpha:
      mean[answer] = numpy.mean([sample[answer] for sample in samples])
    test_util.AssertMapsAlmostEqual(self, expected_mean, mean, label='answer')
    # Also check the variance, which is given by the formula below:
    expected_variance = {}
    variance = {}
    for answer in alpha:
      expected_variance[answer] = (
          alpha[answer] * (norm - alpha[answer]) / (norm * norm * (norm + 1.0)))
      variance[answer] = numpy.var([sample[answer] for sample in samples])
    test_util.AssertMapsAlmostEqual(self, expected_variance, variance,
                                    label='answer', places=4)
    def testGaussianStatistics(self):
        result_judgment_count, result_mle_bias, result_sum_squared_deviation = (
            gaussian_contributors.GaussianStatistics(TEST_DATA))
        test_util.AssertMapsAlmostEqual(self,
                                        TEST_JUDGMENT_COUNT,
                                        result_judgment_count,
                                        label='judgment count, contributor')
        test_util.AssertMapsAlmostEqual(self,
                                        TEST_MLE_BIAS,
                                        result_mle_bias,
                                        label='MLE bias, contributor')
        test_util.AssertMapsAlmostEqual(
            self,
            TEST_SUM_SQUARED_DEVIATION,
            result_sum_squared_deviation,
            label='sum of squared deviations, contributor')

        # Next, check that question weights are correctly interpreted:
        weighted_data = copy.deepcopy(TEST_DATA)
        # Duplicate question 4:
        weighted_data['q5'] = weighted_data['q4']
        # Check we get the same statistics when we use the modified data with
        # weights on q4 and q5 summing to 1.0:
        result_judgment_count, result_mle_bias, result_sum_squared_deviation = (
            gaussian_contributors.GaussianStatistics(weighted_data,
                                                     question_weights={
                                                         'q4': 0.75,
                                                         'q5': 0.25
                                                     }))
        test_util.AssertMapsAlmostEqual(self,
                                        TEST_JUDGMENT_COUNT,
                                        result_judgment_count,
                                        label='judgment count, contributor')
        test_util.AssertMapsAlmostEqual(self,
                                        TEST_MLE_BIAS,
                                        result_mle_bias,
                                        label='MLE bias, contributor')
        test_util.AssertMapsAlmostEqual(
            self,
            TEST_SUM_SQUARED_DEVIATION,
            result_sum_squared_deviation,
            label='sum of squared deviations, contributor')

        # Check that data without resolutions (or with the
        # wrong resolution format) cause a KeyError:
        self.assertRaises(KeyError, gaussian_contributors.GaussianStatistics,
                          test_util.DS_DATA)
        self.assertRaises(KeyError, gaussian_contributors.GaussianStatistics,
                          test_util.IPEIROTIS_DATA)
        # Check that non-numeric judgments cause a TypeError:
        self.assertRaises(
            TypeError, gaussian_contributors.GaussianStatistics, {
                'q1': ([('c1', 'WTF', {})], {
                    gaussian_contributors.MEAN: 5.0,
                    gaussian_contributors.VARIANCE: 0.0
                })
            })
Beispiel #13
0
 def test_ExpandResolution(self):  # pylint: disable=g-bad-name
     resolution_map = {
         ('apidae', ): 0.5,  # this one should get overwritten!
         ('apidae', 'apis', 'mellifera'): 0.9,
         ('apidae', 'apis', 'cerana'): 0.05,
         ('apidae', 'bombus'): 0.05
     }
     expected = {
         ('apidae', ): 1.0,
         ('apidae', 'apis'): 0.95,
         ('apidae', 'apis', 'mellifera'): 0.9,
         ('apidae', 'apis', 'cerana'): 0.05,
         ('apidae', 'bombus'): 0.05
     }
     decision_tree.DecisionTree._ExpandResolution(resolution_map)
     test_util.AssertMapsAlmostEqual(self,
                                     expected,
                                     resolution_map,
                                     label='answer')
Beispiel #14
0
    def testSetMLEParameters(self):
        # The previous test checked that we call submodels using the correct paths;
        # this test is more end-to-end, checking that the correct submodels are
        # created and that we set the correct parameters for one of them.
        decision_tree_model = decision_tree.DecisionTree()
        decision_tree_model.SetMLEParameters(TEST_DATA)

        # Test that the correct submodels were created:
        self.assertEqual(set(((), ('ABO', ), ('Rh', ))),
                         set(decision_tree_model.model_tree.keys()))

        # Test the root confusion matrix parameters:
        expected_priors = {
            'ABO': 1.0 / 3.0,
            'Rh': 1.4 / 3.0,
            'Other': 0.6 / 3.0
        }
        test_util.AssertMapsAlmostEqual(
            self, expected_priors, decision_tree_model.model_tree[()].priors)
        expected_cm = {
            'c1': {
                'ABO': {
                    'ABO': 0.8 / 1.0,
                    'Rh': 0.2 / 1.0
                },
                'Rh': {
                    'ABO': 0.2 / 1.4,
                    'Rh': 1.2 / 1.4
                },
                'Other': {
                    'Rh': 1.0
                }
            },
            'c2': {
                'ABO': {
                    'Rh': 1.0
                },
                'Rh': {
                    'Rh': 1.0 / 1.4,
                    'Other': 0.4 / 1.4
                },
                'Other': {
                    'Other': 1.0
                }
            },
            'c3': {
                'ABO': {
                    'ABO': 1.0
                },
                'Rh': {
                    'ABO': 1.0 / 1.4,
                    'Rh': 0.4 / 1.4
                },
                'Other': {
                    'Rh': 1.0
                }
            }
        }
        test_util.AssertConfusionMatricesAlmostEqual(
            self, expected_cm,
            decision_tree_model.model_tree[()].confusion_matrices)

        # Test the ('ABO',) confusion matrix parameters:
        expected_priors = {'A': 0.4, 'B': 0.4, 'O': 0.2}
        test_util.AssertMapsAlmostEqual(
            self, expected_priors,
            decision_tree_model.model_tree[('ABO', )].priors)
        expected = {
            'c1': {
                'A': {
                    'A': 1.0
                },
                'B': {
                    'A': 1.0
                }
            },
            # c2 never said 'ABO', so it has no entry here
            'c3': {
                'A': {
                    'B': 1.0
                },
                'B': {
                    'B': 1.0
                },
                'O': {
                    'O': 1.0
                }
            }
        }
        test_util.AssertConfusionMatricesAlmostEqual(
            self, expected,
            decision_tree_model.model_tree[('ABO', )].confusion_matrices)

        # Test the ('Rh',) confusion matrix parameters:
        expected_priors = {'+': 0.6 / 1.4, '-': 0.8 / 1.4}
        test_util.AssertMapsAlmostEqual(
            self, expected_priors,
            decision_tree_model.model_tree[('Rh', )].priors)
        expected = {
            'c1': {
                '+': {
                    '+': 1.0
                },
                '-': {
                    '+': 0.5,
                    '-': 0.5
                }
            },
            'c2': {
                '+': {
                    '+': 0.2 / 0.6,
                    '-': 0.4 / 0.6
                },
                '-': {
                    '-': 1.0
                }
            },
            'c3': {
                '-': {
                    '-': 1.0
                }
            }
        }
        test_util.AssertConfusionMatricesAlmostEqual(
            self, expected,
            decision_tree_model.model_tree[('Rh', )].confusion_matrices)