Exemple #1
0
def index():
	if request.method == "POST":
		sentence_raw = request.form["sentence"]
		sentence_seg = api.segment(sentence_raw)
		contexts = api.run(sentence_raw)
		return myRender(render_template("index.html", raw=sentence_raw, seg=sentence_seg, contexts=contexts))
	return myRender(render_template("index.html", raw=raw))
Exemple #2
0
def segmentation(args, mode, result_dir, model, rst_data, logger):
    logger.info('Start segmenting {} dataset'.format(mode))
    path = os.path.join(args.input_dir, mode)
    article_path = os.path.join(path, 'article')
    result_dir = os.path.join(result_dir, mode)
    try:
        os.mkdir(article_path)
    except:
        pass
    try:
        os.mkdir(result_dir)
    except:
        pass
    data_list = list(filter(lambda x: x.endswith('json'), os.listdir(f'{path}')))
    for data_name in data_list:
        with open(os.path.join(path, data_name)) as f:
            data = json.load(f)
        with open(os.path.join(article_path, data_name), 'w') as f:
            for sent in data['article']:
                f.write(sent + '\n')
    args.file_dir =  article_path
    args.result_dir = os.path.join(path, 'segment')
    segment(args, model, rst_data, logger)
    for data_name in data_list:
        try:
            os.remove(os.path.join(article_path, data_name))
            with open(os.path.join(path, data_name)) as f:
                data = json.load(f)
            with open(os.path.join(args.result_dir, data_name)) as f:
                segmented = json.load(f)
                data['edu'] = segmented['edu']
                data['sentence'] = segmented['sentence']
                #article = list(map(lambda x: x[:-1], f.readlines()))
                #data['edu'] = article
            os.remove(os.path.join(args.result_dir, data_name))
            #data['sentence'] = get_correspond_sentence(data['article'], data['edu'])
            with open(os.path.join(result_dir, data_name), 'w') as f:
                json.dump(data, f, indent=4, separators=(',', ':'))
        except:
            pass
    os.rmdir(article_path)
    os.rmdir(args.result_dir)
    logger.info('Finish segmenting {} dataset'.format(mode))
Exemple #3
0
    def index(self):
        content = request.args.get("content", "")
        content = urllib2.unquote(content)
        content = str(content).strip()
        algorithm = request.args.get("algorithm")
        sentence = api.segment(content, algorithm)

        sentence = "  ".join([i.word for i in sentence])
        ret = self.render('tokenizer.html',
                          algorithm=algorithm,
                          content=content,
                          sentence=sentence)

        return ret
Exemple #4
0
        '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    if args.log_path:
        file_handler = logging.FileHandler(args.log_path)
        file_handler.setLevel(logging.INFO)
        file_handler.setFormatter(formatter)
        logger.addHandler(file_handler)
    else:
        console_handler = logging.StreamHandler()
        console_handler.setLevel(logging.INFO)
        console_handler.setFormatter(formatter)
        logger.addHandler(console_handler)

    logger.info('Running with args : {}'.format(args))

    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    if args.gpu is not None:
        os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu

    random.seed(args.seed)
    np.random.seed(args.seed)
    tf.set_random_seed(args.seed)

    if args.prepare:
        prepare(args)
    if args.train:
        train(args)
    if args.evaluate:
        evaluate(args)
    if args.segment:
        segment(args)