コード例 #1
0
def init_opt():
    parser = optparse.OptionParser('usage: ./%prog [options] \n'
                                   'Example:\n'
                                   '		./scan.py -f rule.json')
    parser.add_option('-f',
                      '--file',
                      dest='rfile',
                      default='rule.json',
                      type='string',
                      help='rules config file.')
    parser.add_option('-m',
                      '--mode',
                      dest='mode',
                      default='0',
                      type='string',
                      help='scan mode, 0:code, 1:repos')
    (options, args) = parser.parse_args()

    rules = common.read_data(options.rfile)
    if len(rules) == 0:
        print("no rule find, exit.")
        sys.exit(1)

    rules['mode'] = options.mode

    os.system('mkdir -p report')
    os.system("rm -f report/*")
    return rules
コード例 #2
0
ファイル: PosTagger.py プロジェクト: jiellin/GlobeLinearModel
 def tag(self):
     output = open(self.output_path, 'w')
     for sentence in read_data(self.test_data_path):
         word_seq = [part[0] for part in sentence]
         predict_label = self.featureModel.viterbi(word_seq)
         for (word, predict_tag) in zip(word_seq, predict_label):
             output.write(word + '\t' + predict_tag + '\n')
         output.write('\n')
コード例 #3
0
 def tag(self):
     output = open(self.output_path, 'w')
     for sentence in read_data(self.test_data_path):
         word_seq = [part[0] for part in sentence]
         predict_label = self.featureModel.viterbi(word_seq)
         for (word, predict_tag) in zip(word_seq, predict_label):
             output.write(word + '\t' + predict_tag + '\n')
         output.write('\n')
コード例 #4
0
 def train(self, iteration):
     for i in xrange(iteration):
         print 'iteration: ', i + 1
         for sentence in read_data(self.train_data_path):
             observe_data = [pair[0] for pair in sentence]
             ideal_label = [pair[1] for pair in sentence]
             predict_label = self.featureModel.viterbi(observe_data)
             #update feature vector
             self.featureModel.update(observe_data, ideal_label, predict_label)
コード例 #5
0
 def train(self, iteration):
     for i in xrange(iteration):
         print 'iteration: ', i + 1
         for sentence in read_data(self.train_data_path):
             observe_data = [pair[0] for pair in sentence]
             ideal_label = [pair[1] for pair in sentence]
             predict_label = self.featureModel.viterbi(observe_data)
             #update feature vector
             self.featureModel.update(observe_data, ideal_label,
                                      predict_label)
コード例 #6
0
ファイル: predict.py プロジェクト: jiellin/GlobeLinearModel
# -*- coding: utf-8 -*-
'''
Created on Dec 27, 2015

@author: jielin
'''

import sys
from GlobalLinearModel.PerceptronOnlineLearning import Perceptron
from GlobalLinearModel.PosTagger import PosTagger
from utils.common import read_data, PARAM_PATH, TEST_DATA_PATH, OUTPUT_PATH
from GlobalLinearModel.gen_features import features

if __name__ == '__main__':
    params = PARAM_PATH
    test_data_path = TEST_DATA_PATH
    output_path = OUTPUT_PATH
    #count all tags
    for x in read_data(test_data_path):
        pass
    TaggerTool = PosTagger(test_data_path, features, params, output_path)
    TaggerTool.tag()
コード例 #7
0
# -*- coding: utf-8 -*-
'''
Created on Dec 27, 2015

@author: myue
'''

import sys
from GlobalLinearModel.PerceptronOnlineLearning import Perceptron
from utils.common import read_data, TRAIN_DATA_PATH, PARAM_PATH, ITER_NUM, BATCH_NUM
from GlobalLinearModel.gen_features import features

if __name__ == '__main__':
    params = PARAM_PATH
    train_data_path = TRAIN_DATA_PATH
    iter_num = ITER_NUM
    # load all label
    sample_num = 0
    for l in read_data(train_data_path):
        sample_num += 1
    TrainModel = Perceptron(train_data_path, features, params)
    TrainModel.train(iter_num)
    TrainModel.save_params_to_file()
    #TrainModel.save_params_to_pickle()