def index(): if request.method == "POST": sentence_raw = request.form["sentence"] sentence_seg = api.segment(sentence_raw) contexts = api.run(sentence_raw) return myRender(render_template("index.html", raw=sentence_raw, seg=sentence_seg, contexts=contexts)) return myRender(render_template("index.html", raw=raw))
def segmentation(args, mode, result_dir, model, rst_data, logger): logger.info('Start segmenting {} dataset'.format(mode)) path = os.path.join(args.input_dir, mode) article_path = os.path.join(path, 'article') result_dir = os.path.join(result_dir, mode) try: os.mkdir(article_path) except: pass try: os.mkdir(result_dir) except: pass data_list = list(filter(lambda x: x.endswith('json'), os.listdir(f'{path}'))) for data_name in data_list: with open(os.path.join(path, data_name)) as f: data = json.load(f) with open(os.path.join(article_path, data_name), 'w') as f: for sent in data['article']: f.write(sent + '\n') args.file_dir = article_path args.result_dir = os.path.join(path, 'segment') segment(args, model, rst_data, logger) for data_name in data_list: try: os.remove(os.path.join(article_path, data_name)) with open(os.path.join(path, data_name)) as f: data = json.load(f) with open(os.path.join(args.result_dir, data_name)) as f: segmented = json.load(f) data['edu'] = segmented['edu'] data['sentence'] = segmented['sentence'] #article = list(map(lambda x: x[:-1], f.readlines())) #data['edu'] = article os.remove(os.path.join(args.result_dir, data_name)) #data['sentence'] = get_correspond_sentence(data['article'], data['edu']) with open(os.path.join(result_dir, data_name), 'w') as f: json.dump(data, f, indent=4, separators=(',', ':')) except: pass os.rmdir(article_path) os.rmdir(args.result_dir) logger.info('Finish segmenting {} dataset'.format(mode))
def index(self): content = request.args.get("content", "") content = urllib2.unquote(content) content = str(content).strip() algorithm = request.args.get("algorithm") sentence = api.segment(content, algorithm) sentence = " ".join([i.word for i in sentence]) ret = self.render('tokenizer.html', algorithm=algorithm, content=content, sentence=sentence) return ret
'%(asctime)s - %(name)s - %(levelname)s - %(message)s') if args.log_path: file_handler = logging.FileHandler(args.log_path) file_handler.setLevel(logging.INFO) file_handler.setFormatter(formatter) logger.addHandler(file_handler) else: console_handler = logging.StreamHandler() console_handler.setLevel(logging.INFO) console_handler.setFormatter(formatter) logger.addHandler(console_handler) logger.info('Running with args : {}'.format(args)) os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" if args.gpu is not None: os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu random.seed(args.seed) np.random.seed(args.seed) tf.set_random_seed(args.seed) if args.prepare: prepare(args) if args.train: train(args) if args.evaluate: evaluate(args) if args.segment: segment(args)