def presicionTest(self, cv_data, res_label): ''' simplest presicion test : caculate the precentage of right labels''' psn = [] i = 0 for sent_label in eachSent(cv_data, label=True): count = 0 for ele in zip(sent_label, res_label[i]): if ele[0] == ele[1]: count += 1 psn.append(count / (len(sent_label)+0.0)) i += 1 return psn
def makePrediction(self, te_data): ''' viterbi algorithm ''' res = [] res_label = [] for sent in eachSent(te_data): th_len = len(sent) pai = np.zeros((th_len, self.label_num, self.label_num)) backpointer = np.zeros((th_len, self.label_num, self.label_num), dtype='int') fir = np.zeros((self.label_num, self.label_num)) # move forward begin_two = np.zeros(2) for k in range(2): try: begin_two[k] = self.ele_dict[sent[k]] except KeyError: begin_two[k] = self.ele_num for i in self.label: ii = self.label_idx[i] for j in self.label: jj = self.label_idx[j] fir[ii, jj] = \ self.linear_2nd[('*', '*', i)] * \ self.emission[ii, begin_two[0] ] * \ self.linear_2nd[('*', i, j)] * \ self.emission[jj, begin_two[1] ] pai[1,:,:] = fir for k in range(2, th_len): try: th_idx = self.ele_dict[sent[k]] except KeyError: th_idx = self.ele_num for i in self.label: ii = self.label_idx[i] for j in self.label: jj = self.label_idx[j] candidate = np.zeros(self.label_num) for w in self.label: ww = self.label_idx[w] candidate[ww] = pai[k-1, ww, ii] * \ self.linear_2nd[(w, i, j)] * \ self.emission[jj, th_idx] pai[k, ii, jj] = candidate.max() backpointer[k, ii, jj] = candidate.argmax() # move backward end_two_bp = np.unravel_index(pai[th_len-1, :, :].argmax(), \ pai.shape[1:3]) # first, add last two best labels best_label_idx = [e for e in end_two_bp[::-1]] # then loop for k in range(2, th_len)[::-1] : th_label_idx = backpointer[k, best_label_idx[-1], best_label_idx[-2] ] best_label_idx.append(th_label_idx) # reverse label best_label = [self.label[e] for e in best_label_idx[::-1]] res_label.append(best_label) res.append(zip(sent, best_label)) return res_label, res