class PosTagger(object): def __init__(self, test_data_path, feature_template_list, feature_weight_vector, output_path): self.test_data_path = test_data_path self.featureModel = FeatureVectorWeight(feature_template_list, list(LABEL_SET), feature_weight_vector) self.output_path = output_path ###################################################### #@func: calculate precison when test our model ###################################################### def _cal_precision(self, sentence, predict_label): correct = 0 total = 0 if len(sentence) > 0 and len(sentence[0]) > 1: ideal_lable = [term[1] for term in sentence] for i in xrange(len(ideal_lable)): if ideal_lable[i] == predict_label[i]: correct += 1 total += 1 return correct*1.0/total ##################################################### #@func: tag a sequence using trained model ##################################################### def tag(self): output = open(self.output_path, 'w') for sentence in read_data(self.test_data_path): word_seq = [part[0] for part in sentence] predict_label = self.featureModel.viterbi(word_seq) for (word, predict_tag) in zip(word_seq, predict_label): output.write(word + '\t' + predict_tag + '\n') output.write('\n')
class PosTagger(object): def __init__(self, test_data_path, feature_template_list, feature_weight_vector, output_path): self.test_data_path = test_data_path self.featureModel = FeatureVectorWeight(feature_template_list, list(LABEL_SET), feature_weight_vector) self.output_path = output_path ###################################################### #@func: calculate precison when test our model ###################################################### def _cal_precision(self, sentence, predict_label): correct = 0 total = 0 if len(sentence) > 0 and len(sentence[0]) > 1: ideal_lable = [term[1] for term in sentence] for i in xrange(len(ideal_lable)): if ideal_lable[i] == predict_label[i]: correct += 1 total += 1 return correct * 1.0 / total ##################################################### #@func: tag a sequence using trained model ##################################################### def tag(self): output = open(self.output_path, 'w') for sentence in read_data(self.test_data_path): word_seq = [part[0] for part in sentence] predict_label = self.featureModel.viterbi(word_seq) for (word, predict_tag) in zip(word_seq, predict_label): output.write(word + '\t' + predict_tag + '\n') output.write('\n')
class Perceptron(object): def __init__(self, train_data_path, feature_template_list, model_params_path): self.train_data_path = train_data_path self.featureModel = FeatureVectorWeight(feature_template_list, list(LABEL_SET)) self.model_params_path = model_params_path ###################################################### #@func: train process #@param iteration: iter_num, hyperparam in algorithm ###################################################### def train(self, iteration): for i in xrange(iteration): print 'iteration: ', i + 1 for sentence in read_data(self.train_data_path): observe_data = [pair[0] for pair in sentence] ideal_label = [pair[1] for pair in sentence] predict_label = self.featureModel.viterbi(observe_data) #update feature vector self.featureModel.update(observe_data, ideal_label, predict_label) #save feature vector weight def save_params_to_file(self): alist = list() for hashstr in self.featureModel.params: weight = self.featureModel.params[hashstr] if weight != 0: alist.append(hashstr + '\t' + str(weight)) with open(self.model_params_path, 'w') as f: tmp_str = '\n'.join(alist) f.write(tmp_str) def save_params_to_pickle(self): final = dict() for hashstr in self.featureModel.params: weight = self.featureModel.params[hashstr] if weight != 0: final[hashstr] = weight with open(self.model_params_path, 'w') as f: pickle.dump(final, f)
class Perceptron(object): def __init__(self, train_data_path, feature_template_list, model_params_path): self.train_data_path = train_data_path self.featureModel = FeatureVectorWeight(feature_template_list, list(LABEL_SET)) self.model_params_path = model_params_path ###################################################### #@func: train process #@param iteration: iter_num, hyperparam in algorithm ###################################################### def train(self, iteration): for i in xrange(iteration): print 'iteration: ', i + 1 for sentence in read_data(self.train_data_path): observe_data = [pair[0] for pair in sentence] ideal_label = [pair[1] for pair in sentence] predict_label = self.featureModel.viterbi(observe_data) #update feature vector self.featureModel.update(observe_data, ideal_label, predict_label) #save feature vector weight def save_params_to_file(self): alist = list() for hashstr in self.featureModel.params: weight = self.featureModel.params[hashstr] if weight != 0: alist.append(hashstr + '\t' + str(weight)) with open(self.model_params_path,'w') as f: tmp_str = '\n'.join(alist) f.write(tmp_str) def save_params_to_pickle(self): final = dict() for hashstr in self.featureModel.params: weight = self.featureModel.params[hashstr] if weight != 0: final[hashstr] = weight with open(self.model_params_path,'w') as f: pickle.dump(final,f)