コード例 #1
0
    def _from_json_entity_data_returner(self, json_path: str):
        entity_data = jopen(json_path)['doc_title2sents']
        entity_names, descriptions = list(), list()

        for entity_name, tokenized_data in entity_data.items():
            tokenized_title = tokenized_data[
                'sudachi_tokenized_title'][:self.max_token_in_one_entity_name]
            tokenized_descs = tokenized_data[
                'sudachi_tokenized_sents'][:self.max_sent_from_one_entity]
            tokenized_descs = [
                token for token in [
                    tokenized_sent[:self.
                                   max_token_in_one_sentence_of_entity_desc]
                    for tokenized_sent in tokenized_descs
                ]
            ]
            tokenized_descs = [
                item for sublist in tokenized_descs for item in sublist
            ]
            entity_names.append(tokenized_title)
            descriptions.append(tokenized_descs)

        assert len(entity_names) == len(descriptions)

        return entity_names, descriptions
コード例 #2
0
def _m2_collect_from_one_json(json_path: str) -> List[Tuple[str, str]]:
    annotations = jopen(json_path)['annotations']
    m2e = list()
    for annotation in annotations:
        mention, destination_of_its_mention_doc_title = annotation[
            'mention'], annotation['annotation_doc_entity_title']
        if destination_of_its_mention_doc_title != None:
            m2e.append((mention, destination_of_its_mention_doc_title))

    return m2e
コード例 #3
0
ファイル: dataset_reader.py プロジェクト: izuna385/jel
 def _title2doc_loader(self) -> dict:
     return jopen(file_path=self.config.title2doc_file_path)
コード例 #4
0
ファイル: dataset_reader.py プロジェクト: izuna385/jel
    def _test_loader(self) -> List[Dict]:
        data = jopen(file_path=self.config.biencoder_dataset_file_path)

        return data['test']