Exemple #1
0
def test_crf_marginals(xseq, yseq, algorithm):
    crf = CRF(algorithm=algorithm)
    crf.fit([xseq], [yseq])

    y_pred_marginals = crf.predict_marginals([xseq])
    assert len(y_pred_marginals) == 1
    marginals = y_pred_marginals[0]
    assert len(marginals) == len(yseq)

    labels = crf.tagger_.labels()
    for m in marginals:
        assert isinstance(m, dict)
        assert set(m.keys()) == set(labels)
        assert abs(sum(m.values()) - 1.0) < 1e-6
def test_crf_marginals(xseq, yseq, algorithm):
    crf = CRF(algorithm)
    crf.fit([xseq], [yseq])

    y_pred_marginals = crf.predict_marginals([xseq])
    assert len(y_pred_marginals) == 1
    marginals = y_pred_marginals[0]
    assert len(marginals) == len(yseq)

    labels = crf.tagger_.labels()
    for m in marginals:
        assert isinstance(m, dict)
        assert set(m.keys()) == set(labels)
        assert abs(sum(m.values()) - 1.0) < 1e-6
Exemple #3
0
class ConditionalRandomFields(Tagger):
    """A Conditional Random Fields model."""
    @staticmethod
    def _predict_proba(X):
        del X
        pass

    @staticmethod
    def load(model_path):
        del model_path
        pass

    def fit(self, X, y):
        self._clf.fit(X, y)
        return self

    def set_params(self, **parameters):
        self._clf = CRF()
        self._clf.set_params(**parameters)
        return self

    def get_params(self, deep=True):
        return self._clf.get_params()

    def predict(self, X, dynamic_resource=None):
        return self._clf.predict(X)

    def predict_proba(self, examples, config, resources):
        """
        Args:
            examples (list of mindmeld.core.Query): a list of queries to predict on
            config (ModelConfig): The ModelConfig which may contain information used for feature
                                  extraction
            resources (dict): Resources which may be used for this model's feature extraction

        Returns:
            list of tuples of (mindmeld.core.QueryEntity): a list of predicted labels \
             with confidence scores
        """
        X, _, _ = self.extract_features(examples, config, resources)
        seq = self._clf.predict(X)
        marginals_dict = self._clf.predict_marginals(X)
        marginal_tuples = []
        for query_index, query_seq in enumerate(seq):
            query_marginal_tuples = []
            for i, tag in enumerate(query_seq):
                query_marginal_tuples.append(
                    [tag, marginals_dict[query_index][i][tag]])
            marginal_tuples.append(query_marginal_tuples)
        return marginal_tuples

    def extract_features(self, examples, config, resources, y=None, fit=True):
        """Transforms a list of examples into a feature matrix.

        Args:
            examples (list of mindmeld.core.Query): a list of queries
            config (ModelConfig): The ModelConfig which may contain information used for feature
                                  extraction
            resources (dict): Resources which may be used for this model's feature extraction

        Returns:
            (list of list of str): features in CRF suite format
        """
        # Extract features and classes
        feats = []
        for _, example in enumerate(examples):
            feats.append(
                self.extract_example_features(example, config, resources))
        X = self._preprocess_data(feats, fit)
        return X, y, None

    @staticmethod
    def extract_example_features(example, config, resources):
        """Extracts feature dicts for each token in an example.

        Args:
            example (mindmeld.core.Query): A query.
            config (ModelConfig): The ModelConfig which may contain information used for feature \
                                  extraction.
            resources (dict): Resources which may be used for this model's feature extraction.

        Returns:
            list[dict]: Features.
        """
        return extract_sequence_features(example, config.example_type,
                                         config.features, resources)

    def _preprocess_data(self, X, fit=False):
        """Converts data into formats of CRF suite.

        Args:
            X (list of dict): features of an example
            fit (bool, optional): True if processing data at fit time, false for predict time.

        Returns:
            (list of list of str): features in CRF suite format
        """
        if fit:
            self._feat_binner.fit(X)

        new_X = []
        for feat_seq in self._feat_binner.transform(X):
            feat_list = []
            for feature in feat_seq:
                temp_list = []
                for feat_type in sorted(feature.keys()):
                    temp_list.append("{}={}".format(feat_type,
                                                    str(feature[feat_type])))
                feat_list.append(temp_list)
            new_X.append(feat_list)
        return new_X

    def setup_model(self, config):
        self._feat_binner = FeatureBinner()
Exemple #4
0
class SeqModel():
    def __init__(self, data):

        print("build batched lstmcrf...")

        self.label_alphabet=data.label_alphabet
        self.word_alphabet=data.word_alphabet

        self.crf = CRF(
            algorithm='lbfgs',
            c1=0.1,
            c2=0.1,
            max_iterations=100,
            all_possible_states=False,
            all_possible_transitions=True
        )
        self.examiner = Examiner(data)
        self.useExaminer = False
        self.loss_function = nn.NLLLoss()
        self.topk=5
        self.X_train=[]
        self.Y_train=[]
        self.pos_mask_list=[]
        self.instances=[]
        self.scores_refs=[]
        self.pos_mask=None
        self.tag_size=data.label_alphabet_size


    def masked_label(self,pos_mask,mask,batch_label,tag_seq):
        """
            generate masked label sequence
        """   


        batch_label=batch_label.mul(1-pos_mask)
        # print(tag_seq)
        # print(pos_mask)
        tag_seq=Variable(tag_seq).mul(pos_mask)

        return batch_label+tag_seq

    def ner(sentence):
        sentence_features = [self.features(sentence, index) for index in range(len(sentence))]
        return list(zip(sentence, model.predict([sentence_features])[0]))

    def rand_mask(self,word_inputs,mask):
        """
            generate random mask
        """   
        batch_size = word_inputs.size(0)
        seq_len = word_inputs.size(1)
        total_word = batch_size * seq_len
        # if self.full==True:
        #     return Variable(torch.zeros(batch_size,seq_len).long(),requires_grad=False)
        rand_vec=Variable(torch.rand(batch_size,seq_len),requires_grad=False)

        rand_vec=mask.cpu().float() * rand_vec

        if seq_len>=self.topk:
            topk, indices = rand_vec.topk(self.topk,dim=1)
        else:
            topk, indices = rand_vec.topk(seq_len,dim=1)
        pos_mask=Variable(torch.ones(batch_size, seq_len))
        pos_mask=pos_mask.scatter(1,indices,0).long()
        return pos_mask.cuda()

    def sent2features(self,sent):
        return [self.features(sent, i) for i in range(len(sent))]
    def sent2labels(sent):
        return [label for token, postag, label in sent]
    def sent2tokens(sent):
        return [token for token, postag, label in sent]

    def tensor_to_sequence(self, _alphabet, word_inputs, label=True):
        #seq_len = word_inputs.size(1)
        if label==True:
            return [[_alphabet.get_instance(x) for x in word_inputs[0]]]
        else:
            #print(word_inputs)
            return [self.sent2features([_alphabet.get_instance(x) for x in word_inputs[0]])]
    def sequence_to_tensor(self,_alphabet, word_inputs):
        return torch.LongTensor([[_alphabet.get_index(x) for x in word_inputs[0]]])

    def pos_selection(self, word_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover, batch_label, mask,t=None, pos_mask=None):
        """
            a function to directly get reward while generating the new label sequence
        """        
        batch_size = word_inputs.size(0)
        seq_len = word_inputs.size(1)
        #get score and tag_seq

        tag_seq = self.sequence_to_tensor(self.label_alphabet,self.crf.predict(self.tensor_to_sequence(self.word_alphabet,word_inputs,label=False)))
        distributions=self.crf.predict_marginals(self.tensor_to_sequence(self.word_alphabet,word_inputs,label=False))
        tag_seq = tag_seq.cuda()

        tag_prob=Variable(torch.zeros(1,word_seq_lengths[0], self.tag_size))
        for j,key in enumerate(self.label_alphabet.instances):
            for i in range(word_seq_lengths[0]):
                if key in distributions[0][i]:
                    tag_prob[0,i,j]=distributions[0][i][key]
                else:
                    tag_prob[0,i,j]=0.0
        if t!=None:
            t_mask=self.pos_mask_list[t]

            indices, pos_mask, scores_ref,score = self.examiner.neg_log_likelihood_loss(word_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover,batch_label,tag_seq,tag_prob,mask*(1-t_mask).byte(),self.crf)
        else:
            indices, pos_mask, scores_ref,score = self.examiner.neg_log_likelihood_loss(word_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover,batch_label,tag_seq,tag_prob,mask,self.crf)

        self.pos_mask=pos_mask
        new_batch_label=self.masked_label(pos_mask,mask,batch_label, tag_seq)
        
        return new_batch_label,tag_seq,tag_prob,pos_mask,score,indices,scores_ref

    def add_instance(self,word_inputs,batch_label,pos_mask):
        """
            add instances to training dataset
        """
        batch_size = word_inputs.size(0)
        seq_len = word_inputs.size(1)
        self.X_train.append(self.tensor_to_sequence(self.word_alphabet,word_inputs,label=False)[0])
        self.Y_train.append(self.tensor_to_sequence(self.label_alphabet,batch_label)[0])

        if pos_mask is None:
            self.pos_mask_list.append(Variable(torch.zeros(batch_size, seq_len).long())) 
        else:
            self.pos_mask_list.append(pos_mask) 



    def reinforcement_reward(self, word_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover, batch_label,tag_seq,tag_prob, mask,mode):
        """
            a function to directly get reward instead of generating the new label sequence
        """
        batch_size = word_inputs.size(0)
        seq_len = word_inputs.size(1)
        tag_seq = self.sequence_to_tensor(self.label_alphabet,self.crf.predict(self.tensor_to_sequence(self.word_alphabet,word_inputs,label=False)))
        distributions=self.crf.predict_marginals(self.tensor_to_sequence(self.word_alphabet,word_inputs,label=False))
        tag_seq = tag_seq.cuda()

        tag_prob=Variable(torch.zeros(1,word_seq_lengths[0], self.tag_size))
        for j,key in enumerate(self.label_alphabet.instances):
            for i in range(word_seq_lengths[0]):
                if key in distributions[0][i]:
                    tag_prob[0,i,j]=distributions[0][i][key]
                else:
                    tag_prob[0,i,j]=0.0
        indices,pos_mask,scores_ref,full_loss = self.examiner.neg_log_likelihood_loss(word_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover,batch_label,tag_seq,tag_prob,mask,self.crf)        
        '''
            indices: the selected positions as indices
            pos_mask: the selected positions as mask vector

        '''
        if mode=="supervised_partial":
            return pos_mask,(full_loss*(1-pos_mask.float())).sum()
        elif mode=="supervised_full":
            return pos_mask,full_loss.sum()
        else:
            return pos_mask,scores_ref



    # def pop_instance(self,x):
    #     self.X_train.pop(0)
    #     self.Y_train.pop(0)
    # def reevaluate_instance(self, mask):
    #     for i in range(len(self.X_train)):
    #         #X_train[i]
    #         tag_seq = self.sequence_to_tensor(self.label_alphabet,self.crf.predict([self.X_train[i]]))


    #         pos_mask=self.pos_mask_list[i]

    #         batch_label=self.masked_label(pos_mask,mask,Variable(self.sequence_to_tensor(self.label_alphabet,[self.Y_train[i]])), tag_seq)
    #         self.Y_train[i]=self.tensor_to_sequence(self.label_alphabet,batch_label)[0]


    def features(self,sent, i):
        # obtain some overall information of the point name string
        num_part = 4
        len_string = len(sent)
        mod = len_string % num_part
        part_size = int(math.floor(len_string/num_part))
        # determine which part the current character belongs to
        # larger part will be at the beginning if the whole sequence can't be divided evenly
        size_list = []
        mod_count = 0
        for j in range(num_part):
            if mod_count < mod:
                size_list.append(part_size+1)
                mod_count += 1
            else:
                size_list.append(part_size)
        # for current character
        part_cumulative = [0]*num_part
        for j in range(num_part):
            if j > 0:
                part_cumulative[j] = part_cumulative[j-1] + size_list[j]
            else:
                part_cumulative[j] = size_list[j] - 1   # indices start from 0
        part_indicator = [0]*num_part
        for j in range(num_part):
            if part_cumulative[j] >= i:
                part_indicator[j] = 1
                break
        word = sent[i][0]
        if word.isdigit():
            itself = 'NUM'
        else:
            itself = word
        features = {
            'word': itself,
            'part0': part_indicator[0] == 1,
            'part1': part_indicator[1] == 1,
            'part2': part_indicator[2] == 1,
            'part3': part_indicator[3] == 1,
        }
        # for previous character
        if i > 0:
            part_indicator = [0] * num_part
            for j in range(num_part):
                if part_cumulative[j] >= i-1:
                    part_indicator[j] = 1
                    break
            word1 = sent[i-1]
            if word1.isdigit():
                itself1 = 'NUM'
            else:
                itself1 = word1
            features.update({
                '-1:word': itself1,
                '-1:part0': part_indicator[0] == 1,
                '-1:part1': part_indicator[1] == 1,
                '-1:part2': part_indicator[2] == 1,
                '-1:part3': part_indicator[3] == 1,
            })
        else:
            features['BOS'] = True
        # for next character
        if i < len(sent)-1:
            part_indicator = [0] * num_part
            for j in range(num_part):
                if part_cumulative[j] >= i + 1:
                    part_indicator[j] = 1
                    break
            word1 = sent[i+1]
            if word1.isdigit():
                itself1 = 'NUM'
            else:
                itself1 = word1
            features.update({
                '+1:word': itself1,
                '+1:part0': part_indicator[0] == 1,
                '+1:part1': part_indicator[1] == 1,
                '+1:part2': part_indicator[2] == 1,
                '+1:part3': part_indicator[3] == 1,
            })
        else:
            features['EOS'] = True
        return features
    
    def train(self):

        self.crf.fit(self.X_train, self.Y_train)
        return 
    def sample_train(self,left,right):

        self.crf.fit(self.X_train[left:right], self.Y_train[left:right])

        return 
    def clear(self):
        self.X_train=[]
        self.Y_train=[]

        self.pos_mask_list=[]
        return 

    def test(self,word_inputs):
        tag_seq = self.sequence_to_tensor(self.label_alphabet,self.crf.predict(self.tensor_to_sequence(self.word_alphabet,word_inputs,label=False)))
        return Variable(tag_seq)



        
Exemple #5
0
class EntityExtractor:
    def __init__(
        self,
        hyper_params: Dict[str, float] = None,
        model_path: str = None,
    ):
        if model_path:
            self.load_model(model_path=model_path)
        else:
            algorithm = (hyper_params["algorithm"] if hyper_params
                         and "algorithm" in hyper_params else "lbfgs")
            c1 = hyper_params[
                "c1"] if hyper_params and "c1" in hyper_params else 0.1
            c2 = hyper_params[
                "c2"] if hyper_params and "c2" in hyper_params else 0.1
            max_iters = (hyper_params["max_iterations"] if hyper_params
                         and "max_iterations" in hyper_params else 100)
            apt = (hyper_params["all_possible trainsitions"] if hyper_params
                   and "max_iterations" in hyper_params else True)

            self.fe = FeatureExtractor()

            self.crf = CRF(
                algorithm=algorithm,
                c1=c1,
                c2=c2,
                max_iterations=max_iters,
                all_possible_transitions=apt,
            )

    def save_model(self, output_path: str):
        """
        save model
        """
        with open(output_path, "wb") as f:
            pickle.dump(self.crf, f)

    def load_model(self, model_path: str):
        """
        load model
        """
        with open(model_path, "rb") as f:
            self.crf = pickle.load(f)

    def train(self, sentences: List[Sentence]):
        """
        execute training
        """
        x = [self.fe.extract_feature(s) for s in sentences]
        y = [s.labels for s in sentences]
        self.crf.fit(x, y)

    def evaluate(self,
                 sentences: List[Sentence],
                 fix_invalid_labels=True,
                 decoder='greedy',
                 k=5,
                 debug=False):
        """
        return predicted class
        """
        x = [self.fe.extract_feature(s) for s in sentences]
        y_test = [s.labels for s in sentences]
        y_pred = self.__predict(x, decoder, k)
        if debug:
            for t, p in list(zip(y_test, y_pred))[:10]:
                print(t)
                print(p)

        print('raw, ', decoder)
        print(
            metrics.flat_classification_report(y_test,
                                               y_pred,
                                               labels=sorted(LABELS),
                                               digits=3))
        if fix_invalid_labels:
            print('fix-invalid, ', decoder)
            y_pred = self.fix_labels(y_pred)
            if debug:
                for t, p in list(zip(y_test, y_pred))[:10]:
                    print(t)
                    print(p)
            print(
                metrics.flat_classification_report(y_test,
                                                   y_pred,
                                                   labels=sorted(LABELS),
                                                   digits=3))
        return metrics.flat_f1_score(y_test,
                                     y_pred,
                                     average='weighted',
                                     labels=LABELS)

    def fix_labels(self, labels):
        labels_fixed = []
        with multiprocessing.Pool() as p:
            labels_fixed.append(p.map(_fix_valid, labels))
        return labels_fixed[0]

    def __predict(self, x, decoder='greedy', k=5):
        if decoder == 'greedy':
            return self.crf.predict(x)
        else:
            y_probs = self.__predict_prob(x)
            # (n_data, max_sequence_length, n_labels) -> (n_data, k, max_sequence_length)
            args = [(probability, k) for probability in y_probs]
            results = []
            with multiprocessing.Pool(multiprocessing.cpu_count()) as p:
                results.append(p.map(_beam_search_decoder, args))
            beams_list = results[0]

            # validのみ残す
            args = [[[LABELS[idx - 1] if idx > 0 else 'O' for idx in beam[0]]
                     for beam in beams] for beams in beams_list]
            labels_selected = []
            with multiprocessing.Pool(multiprocessing.cpu_count()) as p:
                labels_selected.append(p.map(_filter_valid, args))
                labels_selected = labels_selected[0]

            return labels_selected

    def predict(self, sentences: List[Sentence], decoder='greedy', k=5):
        """
        return predicted class
        """
        x = [self.fe.extract_feature(s) for s in sentences]
        return self.__predict(x, decoder, k)

    def __predict_prob(self, x):
        probs = self.crf.predict_marginals(x)
        # (n_data, max_sequence_length, n_labels)
        return [[[p_token['O']] + [p_token[label] for label in LABELS]
                 for p_token in prob] for prob in probs]

    def predict_prob(self, sentences: List[Sentence]):
        """
        return probabilities for each class
        :param data:
        :return:
        """
        x = [self.fe.extract_feature(s) for s in sentences]
        return self.__predict_prob(x)
Exemple #6
0
class BiLSTM_CRF():
    def __init__(self, data):

        print "build batched lstmcrf..."

        self.label_alphabet = data.label_alphabet
        self.word_alphabet = data.word_alphabet

        self.crf = CRF(algorithm='lbfgs',
                       c1=0.1,
                       c2=0.1,
                       max_iterations=100,
                       all_possible_transitions=False)
        self.examiner = Examiner(data)
        self.useExaminer = False
        self.loss_function = nn.NLLLoss()
        self.topk = 5
        self.X_train = []
        self.Y_train = []
        self.pos_mask_list = []
        self.instances = []
        self.scores_refs = []
        self.pos_mask = None
        self.tag_size = data.label_alphabet_size

    #For the afterward updating of the crf
    def masked_label(self, pos_mask, mask, batch_label, tag_seq):

        batch_label = batch_label.mul(1 - pos_mask)

        tag_seq = Variable(tag_seq).cuda().mul(pos_mask)

        return batch_label + tag_seq

    def ner(sentence):
        sentence_features = [
            self.features(sentence, index) for index in range(len(sentence))
        ]
        return list(zip(sentence, model.predict([sentence_features])[0]))

    def rand_mask(self, word_inputs, mask):
        batch_size = word_inputs.size(0)
        seq_len = word_inputs.size(1)
        total_word = batch_size * seq_len
        if self.full == True:
            return Variable(torch.zeros(batch_size, seq_len).cuda().long(),
                            requires_grad=False)
        rand_vec = Variable(torch.rand(batch_size, seq_len),
                            requires_grad=False)

        rand_vec = mask.float() * rand_vec.cuda()

        if seq_len >= self.topk:
            topk, indices = rand_vec.topk(self.topk, dim=1)
        else:
            topk, indices = rand_vec.topk(seq_len, dim=1)
        pos_mask = Variable(torch.ones(batch_size, seq_len).cuda())
        pos_mask = pos_mask.scatter(1, indices, 0).long()
        return pos_mask

    #For the afterward updating of the crf
    def sent2features(self, sent):
        return [self.features(sent, i) for i in range(len(sent))]

    def sent2labels(sent):
        return [label for token, postag, label in sent]

    def sent2tokens(sent):
        return [token for token, postag, label in sent]

    def tensor_to_sequence(self, _alphabet, word_inputs, label=True):
        #seq_len = word_inputs.size(1)
        if label == True:
            return [[
                _alphabet.get_instance(x.data[0]) for x in word_inputs[0]
            ]]
        else:
            return [
                self.sent2features([
                    _alphabet.get_instance(x.data[0]) for x in word_inputs[0]
                ])
            ]

    def sequence_to_tensor(self, _alphabet, word_inputs):
        return torch.LongTensor(
            [[_alphabet.get_index(x) for x in word_inputs[0]]])

    def crf_loss(self,
                 word_inputs,
                 word_seq_lengths,
                 char_inputs,
                 char_seq_lengths,
                 char_seq_recover,
                 batch_label,
                 mask,
                 t=None,
                 pos_mask=None):

        batch_size = word_inputs.size(0)
        seq_len = word_inputs.size(1)
        #get score and tag_seq
        #outs = self.lstm.get_output_score(word_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover)

        tag_seq = self.sequence_to_tensor(
            self.label_alphabet,
            self.crf.predict(
                self.tensor_to_sequence(self.word_alphabet,
                                        word_inputs,
                                        label=False)))
        hehe = self.crf.predict_marginals(
            self.tensor_to_sequence(self.word_alphabet,
                                    word_inputs,
                                    label=False))
        #print("hehe",self.label_alphabet.instances)
        #print("gg",word_seq_lengths[0])
        tag_prob = Variable(
            torch.zeros(1, word_seq_lengths[0], self.tag_size).cuda())
        j = 0
        for key in self.label_alphabet.instances:
            for i in range(word_seq_lengths[0]):
                if key in hehe[0][i]:
                    tag_prob[0, i, j] = hehe[0][i][key]
                else:
                    tag_prob[0, i, j] = 0.0
            j += 1
        if t != None:
            t_mask = self.pos_mask_list[t]
            #print("t_mask",t_mask)
            indices, pos_mask, scores_ref, score, correct = self.examiner.neg_log_likelihood_loss(
                word_inputs, word_seq_lengths, char_inputs, char_seq_lengths,
                char_seq_recover, batch_label, tag_seq, tag_prob,
                mask * t_mask.byte())
        else:
            indices, pos_mask, scores_ref, score, correct = self.examiner.neg_log_likelihood_loss(
                word_inputs, word_seq_lengths, char_inputs, char_seq_lengths,
                char_seq_recover, batch_label, tag_seq, tag_prob, mask)
        #pos_mask = self.rand_mask(word_inputs,mask)#currently we are using random mask
        self.pos_mask = pos_mask
        batch_label = self.masked_label(pos_mask, mask, batch_label, tag_seq)

        #total_loss = self.crf.neg_log_likelihood_loss(outs, mask, batch_label)

        return batch_label, tag_seq, tag_prob, pos_mask, score, indices, scores_ref

    def add_instance(self, word_inputs, batch_label, pos_mask, instance,
                     scores_ref):
        batch_size = word_inputs.size(0)
        seq_len = word_inputs.size(1)
        self.X_train.append(
            self.tensor_to_sequence(self.word_alphabet,
                                    word_inputs,
                                    label=False)[0])
        self.Y_train.append(
            self.tensor_to_sequence(self.label_alphabet, batch_label)[0])
        #print("self.tag_mask",self.tag_mask.size())

        if pos_mask is None:
            self.pos_mask_list.append(
                Variable(torch.zeros(batch_size, seq_len).long()).cuda())
        else:
            self.pos_mask_list.append(pos_mask)
        self.instances.append(instance)
        self.scores_refs.append(scores_ref)

    def readd_instance(self, batch_label, mask, pos_mask, i, scores_ref):
        tag_seq = self.sequence_to_tensor(self.label_alphabet,
                                          self.crf.predict([self.X_train[i]]))

        pos_mask = self.pos_mask_list[i].long() * pos_mask.long()

        batch_label = self.masked_label(pos_mask, mask, batch_label, tag_seq)
        self.Y_train[i] = self.tensor_to_sequence(self.label_alphabet,
                                                  batch_label)[0]
        self.pos_mask_list[i] = pos_mask
        self.scores_refs[i] = scores_ref

    def reinforment_reward(self, word_inputs, word_seq_lengths, char_inputs,
                           char_seq_lengths, char_seq_recover, batch_label,
                           tag_seq, tag_prob, mask):
        batch_size = word_inputs.size(0)
        seq_len = word_inputs.size(1)

        indices, pos_mask, scores_ref, score, correct = self.examiner.neg_log_likelihood_loss(
            word_inputs, word_seq_lengths, char_inputs, char_seq_lengths,
            char_seq_recover, batch_label, tag_seq, tag_prob, mask)

        return pos_mask, scores_ref, (score * (1 - pos_mask.float())).sum()

    def reinforment_supervised(self, word_inputs, word_seq_lengths,
                               char_inputs, char_seq_lengths, char_seq_recover,
                               batch_label, tag_seq, tag_prob, mask):
        batch_size = word_inputs.size(0)
        seq_len = word_inputs.size(1)
        #get score and tag_seq
        #outs = self.lstm.get_output_score(word_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover)
        #tag_seq = self.sequence_to_tensor(self.label_alphabet,self.crf.predict(self.tensor_to_sequence(self.word_alphabet,word_inputs,label=False)))
        #print("tag_seq",score)
        #print(batch_label)

        #get_selected position
        indices, pos_mask, scores_ref, score, correct = self.examiner.neg_log_likelihood_loss(
            word_inputs, word_seq_lengths, char_inputs, char_seq_lengths,
            char_seq_recover, batch_label, tag_seq, tag_prob, mask)

        return pos_mask, score

    def pop_instance(self, x):
        self.X_train.pop(0)
        self.Y_train.pop(0)

    def reevaluate_instance(self, mask):
        for i in range(len(self.X_train)):
            #X_train[i]
            tag_seq = self.sequence_to_tensor(
                self.label_alphabet, self.crf.predict([self.X_train[i]]))

            pos_mask = self.pos_mask_list[i]

            batch_label = self.masked_label(
                pos_mask, mask,
                Variable(
                    self.sequence_to_tensor(self.label_alphabet,
                                            [self.Y_train[i]])).cuda(),
                tag_seq)
            self.Y_train[i] = self.tensor_to_sequence(self.label_alphabet,
                                                      batch_label)[0]

    def features(self, sent, i):
        # obtain some overall information of the point name string
        num_part = 4
        len_string = len(sent)
        mod = len_string % num_part
        part_size = int(math.floor(len_string / num_part))
        # determine which part the current character belongs to
        # larger part will be at the beginning if the whole sequence can't be divided evenly
        size_list = []
        mod_count = 0
        for j in range(num_part):
            if mod_count < mod:
                size_list.append(part_size + 1)
                mod_count += 1
            else:
                size_list.append(part_size)
        # for current character
        part_cumulative = [0] * num_part
        for j in range(num_part):
            if j > 0:
                part_cumulative[j] = part_cumulative[j - 1] + size_list[j]
            else:
                part_cumulative[j] = size_list[j] - 1  # indices start from 0
        part_indicator = [0] * num_part
        for j in range(num_part):
            if part_cumulative[j] >= i:
                part_indicator[j] = 1
                break
        word = sent[i][0]
        if word.isdigit():
            itself = 'NUM'
        else:
            itself = word
        features = {
            'word': itself,
            'part0': part_indicator[0] == 1,
            'part1': part_indicator[1] == 1,
            'part2': part_indicator[2] == 1,
            'part3': part_indicator[3] == 1,
        }
        # for previous character
        if i > 0:
            part_indicator = [0] * num_part
            for j in range(num_part):
                if part_cumulative[j] >= i - 1:
                    part_indicator[j] = 1
                    break
            word1 = sent[i - 1]
            if word1.isdigit():
                itself1 = 'NUM'
            else:
                itself1 = word1
            features.update({
                '-1:word': itself1,
                '-1:part0': part_indicator[0] == 1,
                '-1:part1': part_indicator[1] == 1,
                '-1:part2': part_indicator[2] == 1,
                '-1:part3': part_indicator[3] == 1,
            })
        else:
            features['BOS'] = True
        # for next character
        if i < len(sent) - 1:
            part_indicator = [0] * num_part
            for j in range(num_part):
                if part_cumulative[j] >= i + 1:
                    part_indicator[j] = 1
                    break
            word1 = sent[i + 1]
            if word1.isdigit():
                itself1 = 'NUM'
            else:
                itself1 = word1
            features.update({
                '+1:word': itself1,
                '+1:part0': part_indicator[0] == 1,
                '+1:part1': part_indicator[1] == 1,
                '+1:part2': part_indicator[2] == 1,
                '+1:part3': part_indicator[3] == 1,
            })
        else:
            features['EOS'] = True
        return features
        #

    def train(self):

        self.crf.fit(self.X_train, self.Y_train)
        return

    def sample_train(self, left, right):

        self.crf.fit(self.X_train[left:right], self.Y_train[left:right])

        return

    def test(self, word_inputs):
        tag_seq = self.sequence_to_tensor(
            self.label_alphabet,
            self.crf.predict(
                self.tensor_to_sequence(self.word_alphabet,
                                        word_inputs,
                                        label=False)))
        return Variable(tag_seq).cuda()