Beispiel #1
0
    def _sample_forwards(self,Als,aBls,betals,possible_states):
        stateseq = np.empty(self.T,dtype=np.int)

        log_nextstate_unsmoothed = np.log(np.array([self.pi_0[k] for k in possible_states[0]]))
        for t,(labels,aBl,betal) in enumerate(zip(possible_states[:-1],aBls[:-1],betals[:-1])):
            stateseq[t] = labels[sample_discrete_from_log(aBl + betal + log_nextstate_unsmoothed)]
            log_nextstate_unsmoothed = np.log(np.array([self.A[stateseq[t],k] for k in possible_states[t+1]]))
        stateseq[-1] = possible_states[-1][sample_discrete_from_log(aBls[-1] + log_nextstate_unsmoothed)]

        self.stateseq = stateseq
    def _resample_a_word(self, hsmm_states):
        # hsmm_states = [letter_state for letter_state in self.letter_hsmm.states_list if letter_state.word_idx == word_idx]
        candidates = [
            tuple(letter_state.stateseq_norep) for letter_state in hsmm_states
        ]
        unique_candidates = list(set(candidates))
        ref_array = np.array(
            [unique_candidates.index(candi) for candi in candidates])
        if len(candidates) == 0:
            return self.generate_word()
        elif len(unique_candidates) == 1:
            return unique_candidates[0]
        cache_score = np.empty((len(unique_candidates), len(candidates)))
        likelihoods = np.array(
            [letter_state.log_likelihood() for letter_state in hsmm_states])
        range_tmp = list(range(len(candidates)))

        for candi_idx, candi in enumerate(unique_candidates):
            tmp = range_tmp[:]
            if (ref_array == candi_idx).sum() == 1:
                tmp.remove(np.where(ref_array == candi_idx)[0][0])
            for tmp_idx in tmp:
                # print(hsmm_states[tmp_idx].likelihood_block_word(candi)[-1])
                cache_score[candi_idx, tmp_idx] = hsmm_states[
                    tmp_idx].likelihood_block_word(candi)[-1]
        cache_scores_matrix = cache_score[ref_array]
        for i in range_tmp:
            cache_scores_matrix[i, i] = 0.0
        scores = cache_scores_matrix.sum(axis=1) + likelihoods

        sampled_candi_idx = sample_discrete_from_log(scores)
        return candidates[sampled_candi_idx]
    def resample_label_version(self):
        # NOTE never changes first label: we assume the initial state
        # distribution is a delta at that label
        for t in (np.random.permutation(self.T-1)+1):
            self.stateseq[t] = SAMPLING
            ks = self.model._occupied()
            self.beta.housekeeping(ks)
            ks = list(ks)

            # sample a new value
            scores = np.array([self._label_score(t,k) for k in ks] + [self._new_label_score(t,ks)])
            newlabelidx = sample_discrete_from_log(scores)
            if newlabelidx == scores.shape[0]-1:
                self.stateseq[t] = self._new_label(ks)
            else:
                self.stateseq[t] = ks[newlabelidx]
    def resample(self):
        model = self.model

        for t in np.random.permutation(self.T):
            # throw out old value
            self.stateseq[t] = SAMPLING
            ks = list(model._occupied())
            self.beta.housekeeping(ks)

            # form the scores and sample from them
            scores = np.array([self._score(k,t) for k in ks]+[self._new_score(ks,t)])
            idx = sample_discrete_from_log(scores)

            # set the state
            if idx == scores.shape[0]-1:
                self.stateseq[t] = self._new_label(ks)
            else:
                self.stateseq[t] = ks[idx]
Beispiel #5
0
    def resample(self,data=np.array([]),niter=1,**kwargs):
        n = float(len(data))
        if n == 0:
            self.weights.resample()
            for c in self.components:
                c.resample()
        else:
            for itr in range(niter):
                # sample labels
                log_scores = self._log_scores(data)
                labels = sample_discrete_from_log(log_scores,axis=0)

                # resample weights
                self.weights.resample(labels)

                # resample component parameters
                for idx, c in enumerate(self.components):
                    c.resample(data[labels == idx])