Python Ontonotes.dataset_path_iterator 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: allennlp.data.dataset_readers.dataset_utils

클래스/타입: Ontonotes

메소드/함수: dataset_path_iterator

hotexamples.com에서의 예제들: 9

Python Ontonotes.dataset_path_iterator - 9개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 allennlp.data.dataset_readers.dataset_utils.Ontonotes.dataset_path_iterator에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Ontonotes(29)

dataset_document_iterator(10)

dataset_path_iterator(7)

dataset_iterator(6)

sentence_iterator(3)

예제 #1

파일 보기

파일: ontonotes_test.py 프로젝트: Jordan-Sauchuk/allennlp

 def test_dataset_path_iterator(self):
     reader = Ontonotes()
     files = list(reader.dataset_path_iterator('tests/fixtures/conll_2012/'))
     expected_paths = ['tests/fixtures/conll_2012/subdomain/example.gold_conll',
                       'tests/fixtures/conll_2012/subdomain2/example.gold_conll']
     assert len(files) == len(expected_paths)
     assert set(files) == set(expected_paths)

예제 #2

파일 보기

 def test_dataset_path_iterator(self):
     reader = Ontonotes()
     files = list(reader.dataset_path_iterator(self.FIXTURES_ROOT / 'conll_2012'))
     expected_paths = [str(self.FIXTURES_ROOT / 'conll_2012' / 'subdomain' / 'example.gold_conll'),
                       str(self.FIXTURES_ROOT / 'conll_2012' / 'subdomain2' / 'example.gold_conll')]
     assert len(files) == len(expected_paths)
     assert set(files) == set(expected_paths)

예제 #3

파일 보기

    def read(self, file_path: str):
        # if `file_path` is a URL, redirect to the cache
        file_path = cached_path(file_path)

        instances = []
        ontonotes_reader = Ontonotes()
        for document in ontonotes_reader.dataset_path_iterator(file_path):
            clusters: DefaultDict[int, List[Tuple[
                int, int]]] = collections.defaultdict(list)
            sentences = [
                s for s in ontonotes_reader.sentence_iterator(document)
            ]

            total_tokens = 0
            for sentence in sentences:
                for typed_span in sentence.coref_spans:
                    # Coref annotations are on a _per sentence_
                    # basis, so we need to adjust them to be relative
                    # to the length of the document.
                    span_id, (start, end) = typed_span
                    clusters[span_id].append(
                        (start + total_tokens, end + total_tokens))
                total_tokens += len(sentence.words)

            canonical_clusters = canonicalize_clusters(clusters)
            instance = self.text_to_instance([s.words for s in sentences],
                                             canonical_clusters)
            instances.append(instance)

        if not instances:
            raise ConfigurationError(
                "No instances were read from the given filepath {}. "
                "Is the path correct?".format(file_path))
        return Dataset(instances)

예제 #4

파일 보기

파일: ontonotes_test.py 프로젝트: deepmipt/ner-meta

 def test_dataset_path_iterator(self):
     reader = Ontonotes()
     files = list(
         reader.dataset_path_iterator('tests/fixtures/conll_2012/'))
     assert files == [
         'tests/fixtures/conll_2012/subdomain/example.gold_conll',
         'tests/fixtures/conll_2012/subdomain2/example.gold_conll'
     ]

예제 #5

파일 보기

파일: ontonotes_test.py 프로젝트: ha-lins/medical_dialog

 def test_dataset_path_iterator(self):
     reader = Ontonotes()
     files = list(reader.dataset_path_iterator(self.FIXTURES_ROOT / "conll_2012"))
     expected_paths = [
         str(self.FIXTURES_ROOT / "conll_2012" / "subdomain" / "example.gold_conll"),
         str(self.FIXTURES_ROOT / "conll_2012" / "subdomain2" / "example.gold_conll"),
     ]
     assert len(files) == len(expected_paths)
     assert set(files) == set(expected_paths)

예제 #6

파일 보기

 def test_dataset_path_iterator(self):
     reader = Ontonotes()
     files = list(
         reader.dataset_path_iterator('tests/fixtures/conll_2012/'))
     expected_paths = [
         'tests/fixtures/conll_2012/subdomain/example.gold_conll',
         'tests/fixtures/conll_2012/subdomain2/example.gold_conll'
     ]
     assert len(files) == len(expected_paths)
     assert set(files) == set(expected_paths)

예제 #7

파일 보기

 def _ontonotes_subset(
         ontonotes_reader: Ontonotes, file_path: str,
         domain_identifier: str) -> Iterable[OntonotesSentence]:
     """
     Iterates over the Ontonotes 5.0 dataset using an optional domain identifier.
     If the domain identifier is present, only examples which contain the domain
     identifier in the file path are yielded.
     """
     for conll_file in ontonotes_reader.dataset_path_iterator(file_path):
         if domain_identifier is None or f"/{domain_identifier}/" in conll_file:
             yield from ontonotes_reader.sentence_iterator(conll_file)

예제 #8

파일 보기

파일: ontonotes_ner.py 프로젝트: apmoore1/allennlp

 def _ontonotes_subset(ontonotes_reader: Ontonotes,
                       file_path: str,
                       domain_identifier: str) -> Iterable[OntonotesSentence]:
     """
     Iterates over the Ontonotes 5.0 dataset using an optional domain identifier.
     If the domain identifier is present, only examples which contain the domain
     identifier in the file path are yielded.
     """
     for conll_file in ontonotes_reader.dataset_path_iterator(file_path):
         if (domain_identifier is None or f"/{domain_identifier}/" in conll_file) and "/pt/" not in conll_file:
             yield from ontonotes_reader.sentence_iterator(conll_file)

예제 #9

파일 보기

파일: ner_ontonotes.py 프로젝트: yueyedeai/hmtl

 def _ontonotes_subset(
         ontonotes_reader: Ontonotes, file_path: str,
         domain_identifier: str) -> Iterable[OntonotesSentence]:
     for conll_file in ontonotes_reader.dataset_path_iterator(file_path):
         yield from ontonotes_reader.sentence_iterator(conll_file)