Python extract_wiki_sentences 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: qanta.wikipedia.cached_wikipedia

메소드/함수: extract_wiki_sentences

hotexamples.com에서의 예제들: 4

Python extract_wiki_sentences - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 qanta.wikipedia.cached_wikipedia.extract_wiki_sentences에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

 def extract(args):
     title, text = args
     sentences = extract_wiki_sentences(
         title,
         text,
         n_wiki_sentences,
         replace_title_mentions=replace_title_mentions)
     return title, sentences

예제 #2

파일 보기

파일: wikipedia.py 프로젝트: Pinafore/qb

    def training_data(self) -> TrainingData:
        wiki_lookup = Wikipedia()
        wiki_content = []
        wiki_answers = []
        for ans in self.answers:
            if ans not in wiki_lookup:
                continue
            wiki_page = wiki_lookup[ans]
            if len(wiki_page.text) != 0:
                sentences = extract_wiki_sentences(
                    ans, wiki_page.text, self.n_sentences,
                    replace_title_mentions=self.replace_title_mentions
                )
                for sent in sentences:
                    wiki_content.append([sent])
                    wiki_answers.append(ans)

        return wiki_content, wiki_answers, None

예제 #3

파일 보기

    def training_data(self) -> TrainingData:
        wiki_lookup = Wikipedia()
        wiki_content = []
        wiki_answers = []
        for ans in self.answers:
            if ans not in wiki_lookup:
                continue
            wiki_page = wiki_lookup[ans]
            if len(wiki_page.text) != 0:
                sentences = extract_wiki_sentences(
                    ans,
                    wiki_page.text,
                    self.n_sentences,
                    replace_title_mentions=self.replace_title_mentions)
                for sent in sentences:
                    wiki_content.append([sent])
                    wiki_answers.append(ans)

        return wiki_content, wiki_answers, None

예제 #4

파일 보기

    def __init__(self,
                 path,
                 qnum_field,
                 sent_field,
                 page_field,
                 text_field,
                 unigram_field,
                 bigram_field,
                 trigram_field,
                 example_mode='sentence',
                 use_wiki=False,
                 n_wiki_sentences=3,
                 replace_title_mentions='',
                 **kwargs):
        from unidecode import unidecode

        if use_wiki and 'train' in path:
            base_path = os.path.dirname(path)
            filename = os.path.basename(s3_wiki)
            output_file = os.path.join(base_path, filename)
            if not os.path.exists(output_file):
                download_from_url(s3_wiki, output_file)
            with open(output_file) as f:
                self.wiki_lookup = json.load(f)
        else:
            self.wiki_lookup = {}
        self.path = path
        self.example_mode = example_mode

        text_dependent_fields = []
        if text_field is not None:
            text_dependent_fields.append(('text', text_field))
        if unigram_field is not None:
            text_dependent_fields.append(('unigram', unigram_field))
        if bigram_field is not None:
            text_dependent_fields.append(('bigram', bigram_field))
        if trigram_field is not None:
            text_dependent_fields.append(('trigram', trigram_field))

        example_fields = {
            'qnum': [('qnum', qnum_field)],
            'sent': [('sent', sent_field)],
            'page': [('page', page_field)],
            'text': text_dependent_fields
        }

        examples = []
        answer_set = set()
        with open(path) as f:
            for ex in json.load(f)['questions']:
                if example_mode == 'sentence':
                    sentences = ex['sentences']
                    for i, s in enumerate(sentences):
                        examples.append(
                            Example.fromdict(
                                {
                                    'qnum': ex['qnum'],
                                    'sent': i,
                                    'text': unidecode(s),
                                    'page': ex['page']
                                }, example_fields))
                        answer_set.add(ex['page'])
                elif example_mode == 'question':
                    raise NotImplementedError(
                        'Question tokenization is not implemented yet, submit a PR!'
                    )
                elif example_mode == 'runs':
                    raise NotImplementedError(
                        'Run tokenization is not implemented yet, submit a PR!'
                    )
                else:
                    raise ValueError(
                        f"Valid modes are 'sentence', 'question', and 'runs', but '{example_mode}' was given"
                    )

        if use_wiki and n_wiki_sentences > 0 and 'train' in path:
            for page in answer_set:
                if page in self.wiki_lookup:
                    sentences = extract_wiki_sentences(
                        page,
                        self.wiki_lookup[page]['text'],
                        n_wiki_sentences,
                        replace_title_mentions=replace_title_mentions)
                    for i, s in enumerate(sentences):
                        examples.append(
                            Example.fromdict(
                                {
                                    'qnum': -1,
                                    'sent': i,
                                    'text': s,
                                    'page': page
                                }, example_fields))

        dataset_fields = {
            'qnum': qnum_field,
            'sent': sent_field,
            'page': page_field,
        }
        if text_field is not None:
            dataset_fields['text'] = text_field
        if unigram_field is not None:
            dataset_fields['unigram'] = unigram_field
        if bigram_field is not None:
            dataset_fields['bigram'] = bigram_field
        if trigram_field is not None:
            dataset_fields['trigram'] = trigram_field

        super(QuizBowl, self).__init__(examples, dataset_fields, **kwargs)