def LogLikelihoodGradient(self): """The gradient (aka Jacobian) of ``LogLikelihood``.""" expected_feature_vector = vsum([ self.Expectation(x, self.FeaturesAsNumPyArray(x)) for x, _ in self._training_data ]) return self._observed_feature_vector - expected_feature_vector
def testVsumEmptyWithoutShape(self): """Ensure ``vsum`` returns ``None`` when expected. The empty summation should return the zero vector. However, since we don't know the shape of the vectors in the list, we don't know what shape of zero vector to return. Thus, we return ``None`` as the only sensible result. This test ensures that actually does happen. """ self.assertIsNone(vsum([]))
def testVsumWithNonFloatVector(self): """Tests that ``vsum`` works for list of float-like objects.""" class MimicFloat(object): def __init__(self, value): self.value = float(value) def __add__(self, number): return math.fsum([self.value, number]) __radd__ = __add__ lists = [[2.3, 0.4], [0.2, 0.3]] array_lists = [np.array(l) for l in lists] mimic_float_lists = [[MimicFloat(number) for number in l] for l in lists] array_mimic_float_lists = [np.array(l) for l in mimic_float_lists] self.assertListEqual(vsum(array_lists).tolist(), vsum(array_mimic_float_lists).tolist())
def testVsumEmptyWithShape(self): """Ensure ``vsum`` returns the zero vector when expected. The empty summation should return the zero vector. If we know the shape of the vectors in the list then we can in fact return the zero vector of the correct shape. This test ensures that actually does happen. """ expected_shape = (3, ) total = vsum([], shape=expected_shape) self.assertIsNotNone(total) self.assertTupleEqual(expected_shape, total.shape) self.assertListEqual(np.zeros(expected_shape).tolist(), total.tolist())
def testVsumKeepsPrecision(self): """Ensure that ``vsum`` retains precision over ``sum``. Because ``BIG`` is big and ``LITTLE`` is little, performing summation naively will cause the ``LITTLE`` to be lost in rounding errors. This is the same test case as ``testFsumKeepsPrecision``. We make the arrays have more than one element to make sure ``vsum`` actually does work on vectors, as intended. We use variations on ``x`` in the different components just so we don't do the same thing over and over; there's nothing special about negation or doubling. """ vs = [np.array([x, -x, 2 * x]) for x in self._xs] total = vsum(vs) self.assertIsNotNone(total) self.assertListEqual([LITTLE, -LITTLE, 2 * LITTLE], total.tolist())
def __init__(self, Y_given_X, training_data, meta_feature, meta_weight, epsilon=None): """ Args: Y_given_X: a function from ``X`` to an iterable object giving the subset of ``Y`` which has non-zero probability given the ``x``. When in doubt about whether some ``y`` has zero probability or not, it is always safe/correct to return a larger subset of ``Y`` (it'll just take more computation time is all). This is needed for computing the partition function and expectation. N.B., we do not actually need to know/enumerate of *all* of ``Y``, only the subsets for each ``x``. training_data (iterable): a collection of ``(x, y)`` pairs where ``y`` is the known-correct label for ``x``. meta_feature: A function from ``X`` to ``Y`` to a list of ``float``. N.B., the length of the list must be the same for all ``x`` and ``y``, and must be the same as the length of the list of meta_weight. meta_weight (dict from str to (Vector)Weight): the pre-training coefficients for how much we believe components of the feature vector. This provides the seed for training; this starting value shouldn't affect the final meta_weight obtained by training (thanks to convexity), but will affect how long it takes for training to converge. N.B. The dict should not be sparse (only contains non-zero meta_weight), because we only train those features whose names are keys in this dict. epsilon (float): The absolute-error threshold for considering a weight to be "equal to zero". N.B., this should be a positive number, as we will compare it against the absolute value of each weight. """ super(TrainableLogLinearModel, self).__init__(Y_given_X, meta_feature, meta_weight, epsilon) self._training_data = training_data # Use self._meta_weight instead of initialz_meta_weight, # since self._meta_weight already filtered zero meta_weight in the __init__ # of superclass. self._serializer = GetSerializer(meta_feature) self._np_weight = self._MetaToNumPyArray(self.meta_weight) self._observed_feature_vector = vsum( [self.FeaturesAsNumPyArray(x)(y) for x, y in self._training_data])
def Expectation(self, x, f): """Compute the expectation of a function with respect to ``Probability(x)``. Args: x (X): the value of the independent variable. f: a function of type ``Y -> np.ndarray(float)``. Returns: An ``np.ndarray`` of ``float`` called the "expected value". N.B., the particular array returned may not actually be one that the function returns; rather, it's a sort of average of all the results returned. For more information you can take a look at Wikipedia <https://en.wikipedia.org/wiki/Expected_value>. """ prob_given_x = self.Probability(x) # N.B., the ``*`` below is vector scaling! If we want to make this # method polymorphic in the return type of ``f`` then we'll need an # API that provides both scaling and ``vsum``. return vsum([prob_given_x(y) * f(y) for y in self._Y(x)])