def update_weight(self, xseq, yseq, **kwargs): """ xseq (list): list of np.arrays, each of size (n_feat) yseq (list): list of class labels (int) """ L = len(yseq) phi_start = np.zeros(self.n_classes) phi_start[yseq[0]] = 1 initial_scores = np.multiply(self.W_start, phi_start) phi_stop = np.zeros(self.n_classes) phi_stop[yseq[-1]] = 1 final_scores = np.multiply(self.W_stop, phi_stop) phi_transition = np.zeros((L - 1, self.n_classes, self.n_classes)) s_t = np.zeros((L - 1, self.n_classes, self.n_classes)) for i in range(L - 1): phi_transition[i][yseq[i + 1], yseq[i]] = 1 s_t[i] = np.dot(self.W_bigrams, phi_transition[i]) s_u = np.dot(np.array(xseq), self.W_unigrams.T) yseq_hat = viterbi(initial_scores, s_t, final_scores, s_u) if yseq_hat != yseq: for t in range(L): if yseq_hat[t] != yseq[t]: self.W_unigrams[yseq_hat[t]] -= xseq[t] self.W_unigrams[yseq[t]] += xseq[t] if t == 0: self.W_start[yseq_hat[t]] -= 1 self.W_start[yseq[t]] += 1 else: self.W_bigrams[yseq_hat[t], yseq_hat[t - 1]] -= phi_transition[ t - 1][yseq_hat[t], yseq_hat[t - 1]] self.W_bigrams[yseq[t], yseq[t - 1]] += phi_transition[t - 1][yseq[t], yseq[t - 1]] if t == L - 1: self.W_stop[yseq[t]] += 1 self.W_stop[yseq_hat[t]] -= 1 return 1 return 0
def main(): weather = ["Sunny", "Windy", "Rainy"] activities = ["Surf", "Beach", "Videogame", "Study"] str2state = {w: i for i, w in enumerate(weather)} str2emission = {e: i for i, e in enumerate(activities)} # emissions probabilities from the handout: emission_probabilities[i, j] # is the probability of the emission i given the state j emission_probabilities = np.array([[0.4, 0.5, 0.1], [0.4, 0.1, 0.1], [0.1, 0.2, 0.3], [0.1, 0.2, 0.5]]) # transition probabilities from the handout: transition_probabilities[i, j] # is the probability of transitioning to state i from state j transition_probabilities = np.array([[0.6, 0.3, 0.2], [0.3, 0.5, 0.3], [0.1, 0.2, 0.5]]) observations = [ "Videogame", "Study", "Study", "Surf", "Beach", "Videogame", "Beach" ] initial_weather = "Rainy" final_weather = "Sunny" x = [str2emission[observation] for observation in observations] emission_scores = np.log(emission_probabilities[x]) transition_scores = np.log( np.array([transition_probabilities for observation in observations])) initial_state = str2state[initial_weather] final_state = str2state[final_weather] initial_scores = np.log(transition_probabilities[:, initial_state]) final_scores = np.log(transition_probabilities[final_state]) viterbi_path = viterbi(initial_scores, transition_scores, final_scores, emission_scores) from IPython.core.debugger import Pdb Pdb().set_trace() viterbi_weather = [weather[i] for i in viterbi_path] posteriors, _, _ = forward_backward(initial_scores, transition_scores, final_scores, emission_scores) posterior_path = posteriors.argmax(1) posterior_weather = [weather[i] for i in posterior_path] print("Sequence given by Viterbi: %s" % " -> ".join(viterbi_weather)) print("Sequence given by posterior: %s" % " -> ".join(posterior_weather))
def evaluate(self, X, y): """Evaluate model on data.""" correct = 0 total = 0 for xseq, yseq in zip(X, y): if self.feature_function is not None: xseq = [self.feature_function(x) for x in xseq] emission_scores = np.zeros((len(xseq), self.n_classes)) transition_scores = np.zeros( (len(xseq) - 1, self.n_classes, self.n_classes)) initial_scores = np.zeros(self.n_classes) final_scores = np.zeros(self.n_classes) initial_scores[:] = self.W_start for t in range(len(xseq)): emission_scores[t] = self.W_unigrams.dot(xseq[t]) if t > 0: transition_scores[t - 1] = self.W_bigrams final_scores[:] = self.W_stop yseq_hat = viterbi(initial_scores, transition_scores, final_scores, emission_scores) correct += sum([yseq[t] == yseq_hat[t] for t in range(len(yseq))]) total += len(yseq) return correct / total