def _sample_forwards(self,Als,aBls,betals,possible_states): stateseq = np.empty(self.T,dtype=np.int) log_nextstate_unsmoothed = np.log(np.array([self.pi_0[k] for k in possible_states[0]])) for t,(labels,aBl,betal) in enumerate(zip(possible_states[:-1],aBls[:-1],betals[:-1])): stateseq[t] = labels[sample_discrete_from_log(aBl + betal + log_nextstate_unsmoothed)] log_nextstate_unsmoothed = np.log(np.array([self.A[stateseq[t],k] for k in possible_states[t+1]])) stateseq[-1] = possible_states[-1][sample_discrete_from_log(aBls[-1] + log_nextstate_unsmoothed)] self.stateseq = stateseq
def _resample_a_word(self, hsmm_states): # hsmm_states = [letter_state for letter_state in self.letter_hsmm.states_list if letter_state.word_idx == word_idx] candidates = [ tuple(letter_state.stateseq_norep) for letter_state in hsmm_states ] unique_candidates = list(set(candidates)) ref_array = np.array( [unique_candidates.index(candi) for candi in candidates]) if len(candidates) == 0: return self.generate_word() elif len(unique_candidates) == 1: return unique_candidates[0] cache_score = np.empty((len(unique_candidates), len(candidates))) likelihoods = np.array( [letter_state.log_likelihood() for letter_state in hsmm_states]) range_tmp = list(range(len(candidates))) for candi_idx, candi in enumerate(unique_candidates): tmp = range_tmp[:] if (ref_array == candi_idx).sum() == 1: tmp.remove(np.where(ref_array == candi_idx)[0][0]) for tmp_idx in tmp: # print(hsmm_states[tmp_idx].likelihood_block_word(candi)[-1]) cache_score[candi_idx, tmp_idx] = hsmm_states[ tmp_idx].likelihood_block_word(candi)[-1] cache_scores_matrix = cache_score[ref_array] for i in range_tmp: cache_scores_matrix[i, i] = 0.0 scores = cache_scores_matrix.sum(axis=1) + likelihoods sampled_candi_idx = sample_discrete_from_log(scores) return candidates[sampled_candi_idx]
def resample_label_version(self): # NOTE never changes first label: we assume the initial state # distribution is a delta at that label for t in (np.random.permutation(self.T-1)+1): self.stateseq[t] = SAMPLING ks = self.model._occupied() self.beta.housekeeping(ks) ks = list(ks) # sample a new value scores = np.array([self._label_score(t,k) for k in ks] + [self._new_label_score(t,ks)]) newlabelidx = sample_discrete_from_log(scores) if newlabelidx == scores.shape[0]-1: self.stateseq[t] = self._new_label(ks) else: self.stateseq[t] = ks[newlabelidx]
def resample(self): model = self.model for t in np.random.permutation(self.T): # throw out old value self.stateseq[t] = SAMPLING ks = list(model._occupied()) self.beta.housekeeping(ks) # form the scores and sample from them scores = np.array([self._score(k,t) for k in ks]+[self._new_score(ks,t)]) idx = sample_discrete_from_log(scores) # set the state if idx == scores.shape[0]-1: self.stateseq[t] = self._new_label(ks) else: self.stateseq[t] = ks[idx]
def resample(self,data=np.array([]),niter=1,**kwargs): n = float(len(data)) if n == 0: self.weights.resample() for c in self.components: c.resample() else: for itr in range(niter): # sample labels log_scores = self._log_scores(data) labels = sample_discrete_from_log(log_scores,axis=0) # resample weights self.weights.resample(labels) # resample component parameters for idx, c in enumerate(self.components): c.resample(data[labels == idx])