Beispiel #1
0
    def setUpClass(cls):
        cls.do_subclass_setup()
        print("Setting up tests for %s" % cls.test_name)
        cls.generating_hmm = FirstOrderHMM(
            state_priors=ArrayFactor(cls.arrays["state_priors"]),
            trans_probs=MatrixFactor(cls.arrays["transition_probs"]),
            emission_probs=cls.get_emission_probs())

        cls.naive_hmm = FirstOrderHMM(
            state_priors=ArrayFactor(cls.arrays["naive_state_priors"]),
            trans_probs=MatrixFactor(cls.arrays["naive_transition_probs"]),
            emission_probs=cls.get_naive_emission_probs())

        # generate 100 training examples
        cls.training_examples = [
            cls.generating_hmm.generate(cls.example_len)[1]
            for _ in range(cls.num_examples)
        ]

        # retrain
        cls.training_results = train_baum_welch(
            cls.naive_hmm,
            cls.training_examples,
            state_prior_estimator=cls.state_prior_estimator,
            transition_estimator=cls.transition_estimator,
            emission_estimator=cls.emission_estimator,
            miniter=80,
            maxiter=1000,
            processes=1,
            logfunc=DefaultLoggerFactory(ScreenWriter(),
                                         cls.naive_hmm,
                                         maxcols=5),
        )
Beispiel #2
0
    def test_from_dict_with_hmm(self):
        for (starting_order, num_states), model in sorted(self.models.items()):
            if starting_order >= 5:
                continue

            state_priors = numpy.random.random(model.low_order_states)
            state_priors /= state_priors.sum()

            trans_probs = numpy.random.random(
                (model.low_order_states, model.low_order_states))
            trans_probs = (trans_probs.T / trans_probs.sum(1)).T

            my_hmm = FirstOrderHMM(state_priors=ArrayFactor(state_priors),
                                   trans_probs=MatrixFactor(trans_probs),
                                   emission_probs=[None] *
                                   model.low_order_states)

            dtmp = {
                "starting_order": starting_order,
                "num_states": num_states,
                "hmm": my_hmm,
            }

            found = ModelReducer._from_dict(dtmp)
            expected = ModelReducer(starting_order, num_states)
            expected.hmm = my_hmm

            for k in ("starting_order", "high_order_states"):
                yield check_equal, getattr(found, k), getattr(expected, k)

            for k in ("trans_probs", "state_priors"):
                yield check_array_equal, getattr(found.hmm,
                                                 k).data, getattr(my_hmm,
                                                                  k).data
Beispiel #3
0
    def test_to_dict_with_hmm(self):
        for (starting_order, num_states), model in sorted(self.models.items()):
            if starting_order >= 5:
                continue

            state_priors = numpy.random.random(model.low_order_states)
            state_priors /= state_priors.sum()

            trans_probs = numpy.random.random(
                (model.low_order_states, model.low_order_states))
            trans_probs = (trans_probs.T / trans_probs.sum(1)).T

            my_hmm = FirstOrderHMM(state_priors=ArrayFactor(state_priors),
                                   trans_probs=MatrixFactor(trans_probs),
                                   emission_probs=[None] *
                                   model.low_order_states)

            model.hmm = my_hmm
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                found = model._to_dict()

            expected = {
                "starting_order": starting_order,
                "num_states": num_states,
                "hmm": my_hmm,
            }
            model.hmm = None
            for k in expected:
                if k == "first_order_hmm":
                    yield check_dict_equal, found[k], expected[k]
                else:
                    yield check_equal, found[k], expected[k]
Beispiel #4
0
    def construct_factors(self,
                          model,
                          reduced_data,
                          noise_weight=0,
                          pseudocount_weight=1e-10):
        """Construct discrete transition factor for an HMM using reduced data
        from observation sequences

        model : :class:`~minihmm.hmm.FirstOrderHMM` or subclass

        reduced_data : numpy.ndarray
            sufficient statistics for observations, from
            :meth:`DiscreteTransitionEstimator.reduce_data`

        noise_weight : float, optional
            weight of noise to add, relative to number of of observations (e.g.
            transition counts, state prior counts, emission counts, et c) in
            data set. (Default: 0)

        pseudocount_weight : float, optional
            weight of pseudocounts to add, relative to number of of
            observations (transition counts, state prior counts, emission
            counts, et c) in data set (Default: 1e-8)

        Returns
        -------
        :class:`~minihmm.factors.MatrixFactor`
            Transition probability factor
        """
        A = sum(reduced_data)
        A_sum = A.sum()
        A += get_model_noise(A, noise_weight)
        A += (pseudocount_weight * A_sum / len(A.ravel()))
        A_normed = (A.T / A.sum(1)).T
        return MatrixFactor(A_normed)
Beispiel #5
0
    def do_subclass_setup(cls):
        cls.name = "Two gaussian example"
        cls.min_frac_equal = 0.7
        transitions = numpy.matrix([[0.9, 0.1], [0.25, 0.75]])

        cls.generating_hmm = FirstOrderHMM(
            **{
                "trans_probs":
                MatrixFactor(transitions),
                "state_priors":
                ArrayFactor([0.8, 0.2]),
                "emission_probs": [
                    ScipyDistributionFactor(scipy.stats.norm, loc=0,
                                            scale=0.5),
                    ScipyDistributionFactor(scipy.stats.norm, loc=5, scale=10)
                ],
            })

        cls.hmm_dict = {
            "emission_probs": [],
            "state_priors": {
                "shape": (1, 2),
                "row": [0, 0],
                "col": [0, 1],
                "data": [0.8, 0.2],
            },
            "trans_probs": {
                "shape": (2, 2),
                "row": [0, 0, 1, 1],
                "col": [0, 1, 0, 1],
                "data": [0.9, 0.1, 0.25, 0.75],
            }
        }
Beispiel #6
0
    def do_subclass_setup(cls):
        cls.name = "Coin example"
        cls.min_frac_equal = 0.69

        cls.generating_hmm = FirstOrderHMM(
            **{
                "state_priors":
                ArrayFactor([0.005, 0.995]),
                "trans_probs":
                MatrixFactor(numpy.array([[0.8, 0.2], [0.3, 0.7]])),
                "emission_probs":
                [ArrayFactor([0.6, 0.4]),
                 ArrayFactor([0.15, 0.85])],
            })

        cls.hmm_dict = {
            "emission_probs": [],
            "state_priors": {
                "shape": (1, 2),
                "row": [0, 0],
                "col": [0, 1],
                "data": [0.005, 0.995],
            },
            "trans_probs": {
                "shape": (2, 2),
                "row": [0, 0, 1, 1],
                "col": [0, 1, 0, 1],
                "data": [0.8, 0.2, 0.3, 0.7],
            }
        }
Beispiel #7
0
    def do_subclass_setup(cls):
        for my_len in range(10, 100, 200):
            ary = numpy.random.random((my_len, my_len))
            ary = (ary.T / ary.sum(1)).T
            cls.factors.append(MatrixFactor(ary, row_conditional=True))
            cls.examples.append([
                (X, Y)
                for (X,
                     Y) in numpy.random.randint(0, high=my_len, size=(50, 2))
            ])

            cls.factors.append(MatrixFactor(ary, row_conditional=False))
            cls.examples.append([
                (X, Y)
                for (X,
                     Y) in numpy.random.randint(0, high=my_len, size=(50, 2))
            ])
Beispiel #8
0
    def construct_factors(self,
                          model,
                          reduced_data,
                          noise_weight=0,
                          pseudocount_weight=1e-10):
        """Construct transition factor for an HMM using reduced data from
        observation sequences

        model : :class:`~minihmm.hmm.FirstOrderHMM` or subclass

        reduced_data : numpy.ndarray
            sufficient statistics for observations, from
            :meth:`TiedTransitionEstimator.reduce_data`

        noise_weight : float, optional
            weight of noise to add, relative to number of of observations (e.g.
            transition counts, state prior counts, emission counts, et c) in
            data set. (Default: 0)

        pseudocount_weight : float, optional
            weight of pseudocounts to add, relative to number of of
            observations (transition counts, state prior counts, emission
            counts, et c) in data set (Default: 1e-8)

        Returns
        -------
        :class:`~minihmm.factors.MatrixFactor`
            Tied state transition probability table
        """
        A_raw = sum(reduced_data)
        A_sum = A_raw.sum()
        reduced_vector = numpy.zeros(1 + self.index_map.max())

        for i in range(A_raw.shape[0]):
            for j in range(A_raw.shape[1]):
                reduced_vector[self.index_map[i, j]] += A_raw[i, j]

        # add noise
        reduced_vector += get_model_noise(
            reduced_vector,
            noise_weight,
            assymetric_weights=self.index_weights
        )  # FIXME: THIS WILL ADD NOISE TO FORBIDDEN CELLS

        # divide each starting cell by number of destination cells
        reduced_vector /= self.index_weights

        # populate destination vector
        A_proc = numpy.zeros_like(A_raw, dtype=float)
        for i in range(A_proc.shape[0]):
            for j in range(A_proc.shape[1]):
                A_proc[i, j] = reduced_vector[self.index_map[i, j]]

        # add pseudocounts
        A_proc += (pseudocount_weight * A_sum *
                   self.pseudocount_array) / self.pseudocount_array.sum()
        A_proc *= self.pseudocount_mask  # re-zero forbidden cells that became zero via noise addition

        # normalize
        A_proc = (A_proc.T / A_proc.sum(1)).T

        return MatrixFactor(A_proc)
Beispiel #9
0
    def remap_from_first_order(self, native_hmm):
        """Remap parameters from a native first order HMM onto a first-order
        translation of a high-order HMM, in order to, for example, provide a
        reasonable non-random starting point for refinement training of the
        high-order HMM.

        Parameters
        ----------
        native_hmm : :class:`minihmm.hmm.FirstOrderHMM`
            Native, first-order HMM, preferably with trained parameters

        Returns
        -------
        :class:`~minihmm.hmm.FirstOrderHMM`
            First-order representation of the high-order HMM structure
            described by `self`, with parameters from `native_hmm` remapped
            into corresponding positions.
        """
        htl = self.high_states_to_low

        # check that number of states is compatible
        if self.high_order_states != native_hmm.num_states:
            raise ValueError(
                "Native HMM (%d states), has different number of states than `self` (%d states)" %
                (native_hmm.num_states, self.high_order_states)
            )

        # For transitions
        # Each high-order state transitiono  `(n-i, ... , n-1) ->  (n-i+1 , ... , n)`
        # should be mapped to appropiate transformations of the parameters (n - 1 , n)

        # For state priors and emission probabilities
        # each high-order state (n-i, ...,  n) should be given the parameters matching
        # native state `n`

        # will need to make appropriate state-tying matrices for emissions, as well
        sp_source = native_hmm.state_priors.data
        sp_dest = numpy.zeros(self.low_order_states, dtype=float)

        trans_source = native_hmm.trans_probs.data
        trans_dest = numpy.zeros((self.low_order_states, self.low_order_states), dtype=float)

        em_source = native_hmm.emission_probs
        em_dest = [None] * self.low_order_states

        for my_tuple, trans_state in htl.items():
            native_state = my_tuple[-1]
            sp_dest[trans_state] = sp_source[native_state]
            em_dest[trans_state] = copy.deepcopy(em_source[native_state])

            for next_native_state in range(self.high_order_states):
                next_tuple = tuple(list(my_tuple)[1:] + [next_native_state])
                next_trans_state = htl[next_tuple]
                trans_dest[trans_state, next_trans_state] = trans_source[native_state,
                                                                         next_native_state]

        # renormalize
        sp_dest /= sp_dest.sum()
        sp_dest = ArrayFactor(sp_dest)

        # shoudln't have to renormalize; check this
        trans_dest = (trans_dest.T / trans_dest.sum(1)).T
        trans_dest = MatrixFactor(trans_dest)

        return FirstOrderHMM(state_priors=sp_dest, emission_probs=em_dest, trans_probs=trans_dest)