Example #1
0
 def __init__(self, test_data_path, feature_template_list,
              feature_weight_vector, output_path):
     self.test_data_path = test_data_path
     self.featureModel = FeatureVectorWeight(feature_template_list,
                                             list(LABEL_SET),
                                             feature_weight_vector)
     self.output_path = output_path
Example #2
0
class PosTagger(object):
    def __init__(self, test_data_path, feature_template_list, feature_weight_vector, output_path):
        self.test_data_path = test_data_path
        self.featureModel = FeatureVectorWeight(feature_template_list, list(LABEL_SET), feature_weight_vector)
        self.output_path = output_path

    ######################################################
    #@func: calculate precison when test our model
    ######################################################
    def _cal_precision(self, sentence, predict_label):
        correct = 0
        total = 0
        if len(sentence) > 0 and len(sentence[0]) > 1:
            ideal_lable = [term[1] for term in sentence]
            for i in xrange(len(ideal_lable)):
                if ideal_lable[i] == predict_label[i]:
                    correct += 1
                total += 1
        return correct*1.0/total

    #####################################################
    #@func: tag a sequence using trained model
    #####################################################
    def tag(self):
        output = open(self.output_path, 'w')
        for sentence in read_data(self.test_data_path):
            word_seq = [part[0] for part in sentence]
            predict_label = self.featureModel.viterbi(word_seq)
            for (word, predict_tag) in zip(word_seq, predict_label):
                output.write(word + '\t' + predict_tag + '\n')
            output.write('\n')
Example #3
0
class PosTagger(object):
    def __init__(self, test_data_path, feature_template_list,
                 feature_weight_vector, output_path):
        self.test_data_path = test_data_path
        self.featureModel = FeatureVectorWeight(feature_template_list,
                                                list(LABEL_SET),
                                                feature_weight_vector)
        self.output_path = output_path

    ######################################################
    #@func: calculate precison when test our model
    ######################################################
    def _cal_precision(self, sentence, predict_label):
        correct = 0
        total = 0
        if len(sentence) > 0 and len(sentence[0]) > 1:
            ideal_lable = [term[1] for term in sentence]
            for i in xrange(len(ideal_lable)):
                if ideal_lable[i] == predict_label[i]:
                    correct += 1
                total += 1
        return correct * 1.0 / total

    #####################################################
    #@func: tag a sequence using trained model
    #####################################################
    def tag(self):
        output = open(self.output_path, 'w')
        for sentence in read_data(self.test_data_path):
            word_seq = [part[0] for part in sentence]
            predict_label = self.featureModel.viterbi(word_seq)
            for (word, predict_tag) in zip(word_seq, predict_label):
                output.write(word + '\t' + predict_tag + '\n')
            output.write('\n')
Example #4
0
class Perceptron(object):
    def __init__(self, train_data_path, feature_template_list,
                 model_params_path):
        self.train_data_path = train_data_path
        self.featureModel = FeatureVectorWeight(feature_template_list,
                                                list(LABEL_SET))
        self.model_params_path = model_params_path

    ######################################################
    #@func: train process
    #@param iteration:  iter_num, hyperparam in algorithm
    ######################################################
    def train(self, iteration):
        for i in xrange(iteration):
            print 'iteration: ', i + 1
            for sentence in read_data(self.train_data_path):
                observe_data = [pair[0] for pair in sentence]
                ideal_label = [pair[1] for pair in sentence]
                predict_label = self.featureModel.viterbi(observe_data)
                #update feature vector
                self.featureModel.update(observe_data, ideal_label,
                                         predict_label)

    #save feature vector weight
    def save_params_to_file(self):
        alist = list()
        for hashstr in self.featureModel.params:
            weight = self.featureModel.params[hashstr]
            if weight != 0:
                alist.append(hashstr + '\t' + str(weight))
        with open(self.model_params_path, 'w') as f:
            tmp_str = '\n'.join(alist)
            f.write(tmp_str)

    def save_params_to_pickle(self):
        final = dict()
        for hashstr in self.featureModel.params:
            weight = self.featureModel.params[hashstr]
            if weight != 0:
                final[hashstr] = weight
        with open(self.model_params_path, 'w') as f:
            pickle.dump(final, f)
class Perceptron(object):
    def __init__(self, train_data_path, feature_template_list, model_params_path):
        self.train_data_path = train_data_path
        self.featureModel = FeatureVectorWeight(feature_template_list, list(LABEL_SET))
        self.model_params_path = model_params_path

    ######################################################
    #@func: train process
    #@param iteration:  iter_num, hyperparam in algorithm
    ######################################################
    def train(self, iteration):
        for i in xrange(iteration):
            print 'iteration: ', i + 1
            for sentence in read_data(self.train_data_path):
                observe_data = [pair[0] for pair in sentence]
                ideal_label = [pair[1] for pair in sentence]
                predict_label = self.featureModel.viterbi(observe_data)
                #update feature vector
                self.featureModel.update(observe_data, ideal_label, predict_label)

    #save feature vector weight
    def save_params_to_file(self):
        alist = list()
        for hashstr in self.featureModel.params:
            weight = self.featureModel.params[hashstr]
            if weight != 0:
                alist.append(hashstr + '\t' + str(weight))
        with open(self.model_params_path,'w') as f:
            tmp_str = '\n'.join(alist)
            f.write(tmp_str)

    def save_params_to_pickle(self):
        final = dict()
        for hashstr in self.featureModel.params:
            weight = self.featureModel.params[hashstr]
            if weight != 0:
                final[hashstr] = weight
        with open(self.model_params_path,'w') as f:
            pickle.dump(final,f)
Example #6
0
 def __init__(self, test_data_path, feature_template_list, feature_weight_vector, output_path):
     self.test_data_path = test_data_path
     self.featureModel = FeatureVectorWeight(feature_template_list, list(LABEL_SET), feature_weight_vector)
     self.output_path = output_path
 def __init__(self, train_data_path, feature_template_list, model_params_path):
     self.train_data_path = train_data_path
     self.featureModel = FeatureVectorWeight(feature_template_list, list(LABEL_SET))
     self.model_params_path = model_params_path
Example #8
0
 def __init__(self, train_data_path, feature_template_list,
              model_params_path):
     self.train_data_path = train_data_path
     self.featureModel = FeatureVectorWeight(feature_template_list,
                                             list(LABEL_SET))
     self.model_params_path = model_params_path