def inference(self, X, w): """Run Viterbi inference. This methods is a wrapper that converts the CRF weights into different arrays of scores that represent transition and emission. Then this method can call the general purpose Viterbi code in viterbi.py to compute the best label sequence. This function just returns the best sequence, y. """ from viterbi import run_viterbi L = self.num_classes N = len(X) start_scores = np.zeros(L) end_scores = np.zeros(L) trans_scores = np.zeros((L, L)) emission_scores = np.zeros((N, L)) # fill the above arrays for the weight vector for j in xrange(L): start_scores[j] = w[0, self.get_start_trans_idx(j)] end_scores[j] = w[0, self.get_end_trans_idx(j)] # transition for k in xrange(L): trans_scores[j][k] = w[0, self.get_trans_idx(j, k)] # emission for i in xrange(N): score = 0.0 for fidx in X[i]: score += w[0, self.get_ftr_idx(fidx, j)] emission_scores[i][j] = score # now run the viterbi code! (score, yhat) = run_viterbi(emission_scores, trans_scores, start_scores, end_scores) return yhat
def task(self): data_filename = "robot_no_momentum.data" hmm, d = train_hmm_from_data(data_filename) err_full = run_viterbi(hmm, d) data_filename_m = "robot_with_momentum.data" hmm_m, d_m = train_hmm_from_data(data_filename_m) err_full_m = run_viterbi(hmm_m, d_m) listNames = ["Without momentum", "With momentum"] listData = [1 - err_full, 1 - err_full_m] chart = {"chart": {"defaultSeriesType": "column"}, "xAxis": {"categories": listNames}, "yAxis": {"title": {"text": "Fraction Correct"}}, "title": {"text": "HMM performance on" " inferring robot location."}, "series": [{"name": "Test set performance", "data": listData}]} return chart
def main(): ''' Load test data ''' # Input: Testing, generate new windows, oversampling, viterbi training DATA_TYPE = "testing" GENERATE_NEW_WINDOWS = True OVERSAMPLING = False VITERBI = False data_set = get_data_set(DATA_TYPE, GENERATE_NEW_WINDOWS, OVERSAMPLING, VITERBI) ''' Create network ''' cnn = Convolutional_Neural_Network() cnn.set_data_set(data_set) cnn.load_model() '''''' actual = data_set._labels cnn_result = cnn.get_predictions() np.savetxt(V.VITERBI_PREDICTION_PATH_TESTING, cnn_result, delimiter=",") cnn_result = pd.read_csv(V.VITERBI_PREDICTION_PATH_TESTING, header=None, sep='\,',engine='python').as_matrix() viterbi_result = run_viterbi() np.savetxt(V.VITERBI_RESULT_TESTING, viterbi_result, delimiter=",") viterbi_result = pd.read_csv(V.VITERBI_RESULT_TESTING, header=None, sep='\,',engine='python').as_matrix() ''' Add results in array with actual label''' result = np.zeros((len(cnn_result), 3)) for i in range(0,len(cnn_result)): a = np.argmax(actual[i]) c = np.argmax(cnn_result[i]) v = viterbi_result[i]-1 result[i] = [a,c,v] # Remove activities labelled as -100 - activites such as shuffling, transition ... See data.py boolean_actual = np.invert(actual[:,0] == -100).T result = result[boolean_actual] np.savetxt(V.PREDICTION_RESULT_TESTING, result, delimiter=",") result = pd.read_csv(V.PREDICTION_RESULT_TESTING, header=None, sep='\,',engine='python').as_matrix() produce_statistics_json(result) visualize(result)
def inference(self, X, w): """Run Viterbi inference. This methods is a wrapper that converts the CRF weights into different arrays of scores that represent transition and emission. Then this method can call the general purpose Viterbi code in viterbi.py to compute the best label sequence. This function just returns the best sequence, y. """ from viterbi import run_viterbi L = self.num_classes start_scores, end_scores, trans_scores, emission_scores = self.load_weights( w, L, X) # now run the viterbi code! score, yhat = run_viterbi(emission_scores, trans_scores, start_scores, end_scores) return yhat
def main(): ''' Load test data ''' # Input: Testing, generate new windows, oversampling, viterbi training DATA_TYPE = "predicting" GENERATE_NEW_WINDOWS = True OVERSAMPLING = False VITERBI = False data_set = get_data_set(DATA_TYPE, GENERATE_NEW_WINDOWS, OVERSAMPLING, VITERBI) ''' Create network ''' cnn = Convolutional_Neural_Network() cnn.set_data_set(data_set) cnn.load_model() '''''' cnn_result = cnn.get_predictions() viterbi_result = run_viterbi() print 'Prediction saved at path', V.VITERBI_RESULT_PREDICTING
def run_viterbi_test(): """A simple tester for Viterbi algorithm. This function generates a bunch of random emission and transition scores, and computes the best sequence by performing a brute force search over all possible sequences and scoring them. It then runs Viterbi code to see what is the score and sequence returned by it. Compares both the best sequence and its score to make sure Viterbi is correct. """ from viterbi import run_viterbi from numpy import random import numpy as np from itertools import product maxN = 7 # maximum length of a sentence (min is 1) maxL = 4 # maximum number of labels (min is 2) num_tests = 1000 # number of sentences to generate random.seed(0) tolerance = 1e-5 # how close do the scores have to be? emission_var = 1.0 # variance of the gaussian generating emission scores trans_var = 1.0 # variance of the gaussian generating transition scores passed_y = 0 # how many times the correct sequence was predicted passed_s = 0 # how many times the correct score was returned for t in xrange(num_tests): N = random.randint(1, maxN + 1) L = random.randint(2, maxL + 1) # Generate the scores emission_scores = random.normal(0.0, emission_var, (N, L)) trans_scores = random.normal(0.0, trans_var, (L, L)) start_scores = random.normal(0.0, trans_var, L) end_scores = random.normal(0.0, trans_var, L) # run viterbi (viterbi_s, viterbi_y) = run_viterbi(emission_scores, trans_scores, start_scores, end_scores) # print ("Viterbi", viterbi_s, viterbi_y) # compute the best sequence and score best_y = [] best_s = -np.inf for y in product(range(L), repeat=N): # all possible ys # compute its score score = 0.0 score += start_scores[y[0]] # print(y,'y') # print(y) # print(score,'1') for i in xrange(N - 1): score += trans_scores[y[i], y[i + 1]] # print(score,'2',trans_scores[y[i], y[i+1]]) score += emission_scores[i, y[i]] # print(score,'3',emission_scores[i,y[i]]) score += emission_scores[N - 1, y[N - 1]] # print(score,'4',emission_scores[N-1,y[N-1]]) score += end_scores[y[N - 1]] # print(score,'5',end_scores[y[N-1]]) # if 8.38435628640<score<8.38435628650: # break # if y[0]==1 and y[1]==2 and y[2]==2 and y[3]==0 and y[4]==0: # break # update the best if score > best_s: best_s = score best_y = list(y) # break # print ("Brute", best_s, best_y) # mismatch if any label prediction doesn't match match_y = True for i in xrange(len(best_y)): if viterbi_y[i] != best_y[i]: match_y = False if match_y: passed_y += 1 # the scores should also be very close if abs(viterbi_s - best_s) < tolerance: passed_s += 1 print "Passed(y)", passed_y * 100.0 / num_tests print "Passed(s)", passed_s * 100.0 / num_tests assert passed_y == num_tests assert passed_s == num_tests
def viterbi_tags(self, logits: torch.Tensor, mask: torch.Tensor) -> List[Tuple[List[int], float]]: """ Uses viterbi algorithm to find most likely tags for the given inputs. If constraints are applied, disallows all other transitions. """ _, max_seq_length, num_tags = logits.size() # Get the tensors out of the variables logits, mask = logits.data, mask.data # Augment transitions matrix with start and end transitions start_tag = num_tags end_tag = num_tags + 1 transitions = torch.Tensor(num_tags + 2, num_tags + 2).fill_(-10000.) # Apply transition constraints constrained_transitions = ( self.transitions * self._constraint_mask[:num_tags, :num_tags] + -10000.0 * (1 - self._constraint_mask[:num_tags, :num_tags])) transitions[:num_tags, :num_tags] = constrained_transitions.data if self.include_start_end_transitions: transitions[start_tag, :num_tags] = ( self.start_transitions.detach() * self._constraint_mask[start_tag, :num_tags].data + -10000.0 * (1 - self._constraint_mask[start_tag, :num_tags].detach())) transitions[:num_tags, end_tag] = ( self.end_transitions.detach() * self._constraint_mask[:num_tags, end_tag].data + -10000.0 * (1 - self._constraint_mask[:num_tags, end_tag].detach())) else: transitions[start_tag, :num_tags] = ( -10000.0 * (1 - self._constraint_mask[start_tag, :num_tags].detach())) transitions[:num_tags, end_tag] = -10000.0 * ( 1 - self._constraint_mask[:num_tags, end_tag].detach()) transitions = transitions.cpu().numpy() best_paths = [] # Pad the max sequence length by 2 to account for start_tag + end_tag. tag_sequence = torch.Tensor(max_seq_length + 2, num_tags + 2) for prediction, prediction_mask in zip(logits, mask): sequence_length = torch.sum(prediction_mask) # Start with everything totally unlikely tag_sequence.fill_(-10000.) # At timestep 0 we must have the START_TAG tag_sequence[0, start_tag] = 0. # At steps 1, ..., sequence_length we just use the incoming prediction tag_sequence[1:(sequence_length + 1), :num_tags] = prediction[:sequence_length] # And at the last timestep we must have the END_TAG tag_sequence[sequence_length + 1, end_tag] = 0. # We pass the tags and the transitions to ``run_viterbi``. target_tag_sequence = tag_sequence[:(sequence_length + 2)].cpu().numpy() viterbi_score, viterbi_path =\ viterbi.run_viterbi(target_tag_sequence[1:-1, :num_tags], transitions[:num_tags, :num_tags], transitions[start_tag, :num_tags], transitions[:num_tags, end_tag]) best_paths.append((viterbi_path, viterbi_score)) return best_paths
def run_viterbi_test(): """A simple tester for Viterbi algorithm. This function generates a bunch of random emission and transition scores, and computes the best sequence by performing a brute force search over all possible sequences and scoring them. It then runs Viterbi code to see what is the score and sequence returned by it. Compares both the best sequence and its score to make sure Viterbi is correct. """ from viterbi import run_viterbi from numpy import random from itertools import product maxN = 7 # maximum length of a sentence (min is 1) maxL = 4 # maximum number of labels (min is 2) num_tests = 1 # number of sentences to generate random.seed(0) tolerance = 1e-5 # how close do the scores have to be? emission_var = 1.0 # variance of the gaussian generating emission scores trans_var = 1.0 # variance of the gaussian generating transition scores passed_y = 0 # how many times the correct sequence was predicted passed_s = 0 # how many times the correct score was returned for t in xrange(num_tests): N = 2 L = 3 # Generate the scores # emission_scores = random.normal(0.0, emission_var, (N,L)) # trans_scores = random.normal(0.0, trans_var, (L,L)) # start_scores = random.normal(0.0, trans_var, L) # end_scores = random.normal(0.0, trans_var, L) #print start_scores emission_scores = np.array([[0.1, 0.3, 0.25], [0.2, 0.45, 0.31]]) trans_scores = np.array([[0.3, 0.2, 0.5], [0.12, 0.4, 0.3], [0.1, 0.6, 0.5]]) start_scores = np.array([0.2, 0.5, 0.7]) end_scores = np.array([0.5, 0.4, 0.1]) # run viterbi (viterbi_s, viterbi_y) = run_viterbi(emission_scores, trans_scores, start_scores, end_scores) print "Viterbi", viterbi_s, viterbi_y # compute the best sequence and score best_y = [] best_s = -np.inf for y in product(range(L), repeat=N): # all possible ys # compute its score score = 0.0 score += start_scores[y[0]] for i in xrange(N - 1): score += trans_scores[y[i], y[i + 1]] score += emission_scores[i, y[i]] score += emission_scores[N - 1, y[N - 1]] score += end_scores[y[N - 1]] # update the best if score > best_s: best_s = score best_y = list(y) print "Brute", best_s, best_y # mismatch if any label prediction doesn't match match_y = True for i in xrange(len(best_y)): if viterbi_y[i] != best_y[i]: match_y = False if match_y: passed_y += 1 # the scores should also be very close print "scores: " print viterbi_s, best_s if abs(viterbi_s - best_s) < tolerance: passed_s += 1 print "Passed(y)", passed_y * 100.0 / num_tests print "Passed(s)", passed_s * 100.0 / num_tests assert passed_y == num_tests assert passed_s == num_tests
def test_small_robot_dataset(self): data_filename = "robot_small.data" data_filename = normalize_filename(data_filename) hmm, d = train_hmm_from_data(data_filename) err_full = run_viterbi(hmm, d, True) self.assertAlmostEqual(err_full, 2.0 / 9)