def init_opt(): parser = optparse.OptionParser('usage: ./%prog [options] \n' 'Example:\n' ' ./scan.py -f rule.json') parser.add_option('-f', '--file', dest='rfile', default='rule.json', type='string', help='rules config file.') parser.add_option('-m', '--mode', dest='mode', default='0', type='string', help='scan mode, 0:code, 1:repos') (options, args) = parser.parse_args() rules = common.read_data(options.rfile) if len(rules) == 0: print("no rule find, exit.") sys.exit(1) rules['mode'] = options.mode os.system('mkdir -p report') os.system("rm -f report/*") return rules
def tag(self): output = open(self.output_path, 'w') for sentence in read_data(self.test_data_path): word_seq = [part[0] for part in sentence] predict_label = self.featureModel.viterbi(word_seq) for (word, predict_tag) in zip(word_seq, predict_label): output.write(word + '\t' + predict_tag + '\n') output.write('\n')
def train(self, iteration): for i in xrange(iteration): print 'iteration: ', i + 1 for sentence in read_data(self.train_data_path): observe_data = [pair[0] for pair in sentence] ideal_label = [pair[1] for pair in sentence] predict_label = self.featureModel.viterbi(observe_data) #update feature vector self.featureModel.update(observe_data, ideal_label, predict_label)
# -*- coding: utf-8 -*- ''' Created on Dec 27, 2015 @author: jielin ''' import sys from GlobalLinearModel.PerceptronOnlineLearning import Perceptron from GlobalLinearModel.PosTagger import PosTagger from utils.common import read_data, PARAM_PATH, TEST_DATA_PATH, OUTPUT_PATH from GlobalLinearModel.gen_features import features if __name__ == '__main__': params = PARAM_PATH test_data_path = TEST_DATA_PATH output_path = OUTPUT_PATH #count all tags for x in read_data(test_data_path): pass TaggerTool = PosTagger(test_data_path, features, params, output_path) TaggerTool.tag()
# -*- coding: utf-8 -*- ''' Created on Dec 27, 2015 @author: myue ''' import sys from GlobalLinearModel.PerceptronOnlineLearning import Perceptron from utils.common import read_data, TRAIN_DATA_PATH, PARAM_PATH, ITER_NUM, BATCH_NUM from GlobalLinearModel.gen_features import features if __name__ == '__main__': params = PARAM_PATH train_data_path = TRAIN_DATA_PATH iter_num = ITER_NUM # load all label sample_num = 0 for l in read_data(train_data_path): sample_num += 1 TrainModel = Perceptron(train_data_path, features, params) TrainModel.train(iter_num) TrainModel.save_params_to_file() #TrainModel.save_params_to_pickle()