Exemple #1
0
 def presicionTest(self, cv_data, res_label):
     ''' simplest presicion test : caculate the precentage of right labels'''
     
     psn = []
     i = 0
     for sent_label in eachSent(cv_data, label=True):
         count = 0
         for ele in zip(sent_label, res_label[i]):
             if ele[0] == ele[1]:
                 count += 1
         psn.append(count / (len(sent_label)+0.0))
         i += 1
     return psn
Exemple #2
0
    def makePrediction(self, te_data):
        ''' viterbi algorithm '''
        res = []
        res_label = []
        for sent in eachSent(te_data):
            th_len = len(sent)
            pai = np.zeros((th_len, self.label_num, self.label_num))
            backpointer = np.zeros((th_len, self.label_num, self.label_num), dtype='int')
            fir = np.zeros((self.label_num, self.label_num))
            
            # move forward
            begin_two = np.zeros(2)
            for k in range(2):
                try:                
                    begin_two[k] = self.ele_dict[sent[k]]
                except KeyError:
                    begin_two[k] = self.ele_num

            for i in self.label:
                ii = self.label_idx[i]
                for j in self.label:
                    jj = self.label_idx[j]
                    fir[ii, jj] = \
                    self.linear_2nd[('*', '*', i)] * \
                    self.emission[ii, begin_two[0] ] * \
                    self.linear_2nd[('*', i, j)] * \
                    self.emission[jj, begin_two[1] ]
            
            pai[1,:,:] = fir
            for k in range(2, th_len):
                try:
                    th_idx = self.ele_dict[sent[k]]
                except KeyError:
                    th_idx = self.ele_num
                
                for i in self.label:
                    ii = self.label_idx[i]
                    for j in self.label:
                        jj = self.label_idx[j]
                        candidate = np.zeros(self.label_num)
                        for w in self.label:
                            ww = self.label_idx[w]
                            candidate[ww] = pai[k-1, ww, ii] * \
                                self.linear_2nd[(w, i, j)] * \
                                self.emission[jj, th_idx]
                        pai[k, ii, jj] = candidate.max()
                        backpointer[k, ii, jj] = candidate.argmax()
            # move backward
            end_two_bp = np.unravel_index(pai[th_len-1, :, :].argmax(), \
                pai.shape[1:3])
                
            # first, add last two best labels
            best_label_idx = [e for e in end_two_bp[::-1]]
            # then loop
            for k in range(2, th_len)[::-1] :
                th_label_idx = backpointer[k, best_label_idx[-1], best_label_idx[-2] ]
                best_label_idx.append(th_label_idx)
            # reverse label
            best_label = [self.label[e] for e in best_label_idx[::-1]]
            res_label.append(best_label)
            res.append(zip(sent, best_label))
            
        return res_label, res