コード例 #1
0
ファイル: web_server.py プロジェクト: peterwilliams97/blank
def markup(text_path, ann_path, html_path):
    """Markup text in file `text_path` with annotations in file `ann_path` as HTML and write to
        file `html_path`
    """
    text, ann = get_entities_from_brat(text_path, ann_path)
    # print('&' * 80)
    print(len(ann), text_path, html_path)
    if not ann:
        return
    # for i, a in enumerate(ann[:5]):
    #     s = text[a['start']:a['end']]
    #     # print('%3d: %10s %s %s' % (i, a['type'], a['text'], s))
    gaps = [text[a['end']:b['start']] for a, b in zip(ann[:-1], ann[1:])]
    gaps = [text[:ann[0]['start']]] + gaps + [text[ann[-1]['end']:]]
    gaps = [abridge(g) for g in gaps]
    words = ['<b>%s</b> [%s] ' % (a['text'], a['type']) for a in ann]
    # for i, (g, w) in enumerate(list(zip(gaps, words))[:5]):
    #     print('%3d: "%s" -- "%s"' % (i, g, w))
    # print(text[:ann[5]['end']])

    gw = [g + w for g, w in zip(gaps, words)]
    gw.append(gaps[-1])
    body = '<body>%s</body>' % ''.join(gw)
    marked = '<html>%s</html>' % body

    write_file(html_path, marked)
コード例 #2
0
    def predict(self, text):
        assert False
        self.prediction_count += 1

        if self.prediction_count == 1:
            self.parameters['dataset_text_folder'] = os.path.join('..', 'data', 'temp')
            self.stats_graph_folder, _ = self._create_stats_graph_folder(self.parameters)

        # Update the deploy folder, file, and dataset
        dataset_type = 'deploy'
        ### Delete all deployment data
        for filepath in glob.glob(os.path.join(self.parameters['dataset_text_folder'], '{0}*'.format(dataset_type))):
            if os.path.isdir(filepath):
                shutil.rmtree(filepath)
            else:
                os.remove(filepath)
        ### Create brat folder and file
        dataset_brat_deploy_folder = os.path.join(self.parameters['dataset_text_folder'], dataset_type)
        utils.create_folder_if_not_exists(dataset_brat_deploy_folder)
        dataset_brat_deploy_filepath = os.path.join(dataset_brat_deploy_folder,
                                        'temp_{0}.txt'.format(str(self.prediction_count).zfill(5)))
        with codecs.open(dataset_brat_deploy_filepath, 'w', 'UTF-8') as f:
            f.write(text)
        ### Update deploy filepaths
        dataset_filepaths, dataset_brat_folders = self._get_valid_dataset_filepaths(self.parameters,
            dataset_types=[dataset_type])
        self.dataset_filepaths.update(dataset_filepaths)
        self.dataset_brat_folders.update(dataset_brat_folders)
        ### Update the dataset for the new deploy set
        self.dataset.update_dataset(self.dataset_filepaths, [dataset_type])

        # Predict labels and output brat
        output_filepaths = {}
        prediction_output = train.prediction_step(self.sess, self.dataset, dataset_type, self.model,
                     self.transition_params_trained, self.stats_graph_folder, self.prediction_count,
                     self.parameters, self.dataset_filepaths)
        _, _, output_filepaths[dataset_type] = prediction_output
        conll_to_brat.output_brat(output_filepaths, self.dataset_brat_folders, self.stats_graph_folder,
                overwrite=True)

        # Print and output result
        text_filepath = os.path.join(self.stats_graph_folder, 'brat', 'deploy',
            os.path.basename(dataset_brat_deploy_filepath))
        annotation_filepath = os.path.join(self.stats_graph_folder, 'brat', 'deploy',
            '{0}.ann'.format(utils.get_basename_without_extension(dataset_brat_deploy_filepath)))
        text2, entities = brat_to_conll.get_entities_from_brat(text_filepath, annotation_filepath,
                verbose=True)
        assert(text == text2)
        return entities