class JDHandler: def __init__(self): self.extr = JdParser() def parser(self, jdstr): jdstr = jdstr.encode('utf-8') result = self.extr.parser(jdstr) for k in result: if re.search(u"不限", result[k]): result[k] = NO_LIMIT_FIELD elif len(result[k].encode('utf-8')) < 2: print 'k', k result[k] = EMPTY_FIELD result[k] = result[k].encode('utf-8') print k, result[k] output = JD_RESULT_DATA(**result) return output def get_multi(self, jdstr): jdstr = jdstr.encode('utf-8') result = self.extr.split_multi_jd(jdstr) return result
class JDHandler: def __init__(self): self.extr = JdParser() def parser(self,jdstr): jdstr = jdstr.encode('utf-8') result = self.extr.parser(jdstr) for k in result: if re.search(u"不限",result[k]): result[k] = NO_LIMIT_FIELD elif len(result[k].encode('utf-8'))<2: print 'k',k result[k] = EMPTY_FIELD result[k] = result[k].encode('utf-8') print k,result[k] output = JD_RESULT_DATA(**result) return output def get_multi(self,jdstr): jdstr = jdstr.encode('utf-8') result = self.extr.split_multi_jd(jdstr) return result
#!/usr/bin/env python # coding=utf-8 import tornado.ioloop import tornado.web import json from jd_parser import JdParser class StringHandler(tornado.web.RequestHandler): def post(self): source_text = self.get_argument('source').encode('utf-8') result = clf.predict(source_text) self.write(json.dumps(result)) app = tornado.web.Application(handlers=[('r/string', StringHandler)]) clf = JdParser() if __name__ == "__main__": app.listen(8080) print 'starting tornado' tornado.ioloop.IOLoop.instance().start()
def get_output_data(self,jd_parser,fname='./data/lagou_train.txt'): res = [] for jdstr in self.read_train_iter(fname): single_res = jd_parser.parser(jdstr.decode('utf-8')) res.append(deepcopy(single_res)) return res if __name__ == "__main__": test = Evaluate() jdparser = JdParser() input = test.load_test_data() output = test.get_output_data(jdparser) res = test.evaluate(input,output) for k in res: print k,res[k]
def post(self): jd = self.get_argument("jd", "").encode('utf-8') cv = self.get_argument("cv", "").encode('utf-8') res = {} if len(jd) > 5 and len(cv) > 5: jd_skill = extr.parser(jd)['skill'] cv_skill = extr.parser(cv)['skill'] res = extr.match(jd_skill, cv_skill) else: res = {'error': 'input is too short,length must >5 '} self.render("match.html", jd=jd, cv=cv, result=res) if __name__ == "__main__": genjd = AutoGenJD() extr = JdParser() tornado.options.parse_command_line() app = tornado.web.Application( handlers=[(r'/', GenJdHandler), (r"/genjd", GenJdHandler), (r'/jdparser', JdParserHandler), (r'/string', StringHandler), (r'/match', MatchHandler)], template_path=os.path.join(os.path.dirname(__file__), "templates"), debug=True, autoescape=None, ) print "starting tornado..." http_server = tornado.httpserver.HTTPServer(app) http_server.listen(options.port) tornado.ioloop.IOLoop.instance().start()
def __init__(self): self.extr = JdParser()