Esempio n. 1
0
parser.add_option("-s", "--seq_size", action="store",dest="seq_size",default=False,help=u"序列长度")
(options, args) = parser.parse_args()
if options.train_file == False:
    parser.print_help()
    sys.exit()
if options.seq_size==False:
    parser.print_help()
    sys.exit()
if options.model_file==False:
    parser.print_help()
    sys.exit()

trainFile = options.train_file
modelFile = options.save_file
maxSeqLen = int(options.seq_size) 

text    =   open(trainFile).read().decode("utf8")
util    =   TaggingUtil()
(X,Y)   =   util.textToTrainData(text,maxSeqLen)

taggingModel   =   TaggingModel()
taggingModel.loadModel(modelFile)

while True:
    query       = sys.stdin.readline().decode("utf8").strip()
    if query == "":
        break
    (qX,qY)     = util.textToTrainData(" ".join([w for w in query]),maxSeqLen)
    qY          = taggingModel.predict(qX)
    result      = util.trainDataToText((qX,qY), maxSeqLen)
    print result
Esempio n. 2
0
    parser.print_help()
    sys.exit()
if options.iter_count==False:
    parser.print_help()
    sys.exit()
if options.model_file==False:
    parser.print_help()
    sys.exit()

trainFile = options.train_file
modelFile = options.model_file
maxSeqLen = int(options.seq_size)
iterCount = int(options.iter_count)
    
text    =   open(trainFile).read().decode("utf8")
util    =   TaggingUtil()
(X,Y)   =   util.textToTrainData(text,maxSeqLen)
trainX, testX, trainY, testY = train_test_split(X, Y , train_size=0.8, random_state=1)

taggingModel   =   TaggingModel()
taggingModel.init(len(util.words), len(util.tags), maxSeqLen)

print "----------数据信息---------"
print "    Tag数 :",len(util.tags)
print "    Word数 :",len(util.words)
print "    训练样本数 :",len(X)
print

print "----------数据抽样---------"
print "X :"
print X[:3]