コード例 #1
0
    def test_less_than_3(self):
        entry1 = Entry(entry='1',
                       part_of_speech='2',
                       language='kl',
                       definitions=['3'])
        entry2 = Entry(entry='1',
                       part_of_speech='3',
                       language='kl',
                       definitions=['4'])

        self.assertLess(entry1, entry2)
コード例 #2
0
    def test_less_than_2(self):
        entry1 = Entry(entry='a',
                       part_of_speech='i',
                       language='kk',
                       definitions=['3'])
        entry2 = Entry(entry='b',
                       part_of_speech='i',
                       language='kk',
                       definitions=['3'])

        self.assertLess(entry1, entry2)
コード例 #3
0
ファイル: __init__.py プロジェクト: radomd92/botjagwar
 def setUp(self) -> None:
     self.entry1 = Entry(entry='test1',
                         language='l1',
                         part_of_speech='ana',
                         definitions=['def1-1', 'def2-1'])
     self.entry2 = Entry(entry='test2',
                         language='l2',
                         part_of_speech='ana',
                         definitions=['def2-2', 'def2-2'])
     self.entry3 = Entry(entry='test3',
                         language='l3',
                         part_of_speech='ana',
                         definitions=['def3-3', 'def2-3'])
コード例 #4
0
ファイル: __init__.py プロジェクト: radomd92/botjagwar
    def translate(self,
                  entry: Entry,
                  source_language: str,
                  target_language: str = 'mg') -> Entry:
        out_definitions = []
        for definition in entry.definitions:
            out_entry_dict = entry.serialise()
            for method in self.methods:
                extracted_definition = method(entry.part_of_speech, definition,
                                              source_language, target_language)
                if isinstance(extracted_definition, TranslatedDefinition):
                    out_definitions.append(extracted_definition)

            out_entry_dict['definitions'] = out_definitions
            return Entry(**out_entry_dict)
コード例 #5
0
ファイル: core.py プロジェクト: radomd92/botjagwar
    def process_entry_in_foreign_language(self, entry: Entry, title: str,
                                          language: str, unknowns: list):
        if entry.language in self.language_blacklist:
            log.debug(
                "language '%s' is blacklisted, so not translating or processing."
                % language)
            return

        try:
            log.debug("Translating word in foreign language (%s in '%s')" %
                      (entry.definitions[0], language))
            target_language_translations = []
            for translation in self.translate_word(entry.definitions[0],
                                                   language):
                if translation['part_of_speech'] == entry.part_of_speech:
                    target_language_translations.append(
                        translation['definition'])
            if len(target_language_translations) == 0:
                log.debug("No matching translations found")
                return
        except NoWordException:
            log.debug("No translation found")
            if title not in unknowns:
                unknowns.append((entry.definitions[0], language))
            return

        infos = Entry(entry=title,
                      part_of_speech=str(entry.part_of_speech),
                      definitions=target_language_translations,
                      language=entry.language,
                      origin_wiktionary_edition=language,
                      origin_wiktionary_page_name=entry.definitions[0])

        return infos
コード例 #6
0
    def run(self, word_additional_data_info, counter=0):
        translation = Translation()
        translation.output.wikipage_renderer.pages_to_link = self.malagasy_words_to_link
        word = word_additional_data_info['word']
        word_id = word_additional_data_info['word_id']
        part_of_speech = word_additional_data_info['part_of_speech']
        definitions = self.get_definitions(word_id)
        translated_definitions = self.get_translated_definitions(word_id)
        print(f'{word} ({word_id}) ->', definitions, translated_definitions)
        entry = Entry(entry=word,
                      part_of_speech=part_of_speech,
                      language=self.language,
                      definitions=translated_definitions)
        if not translated_definitions:
            return

        if translated_definitions and translated_definitions[0] == '.':
            return

        response = pywikibot.input(
            f'Entry # {counter + 1}: Accept and upload? (y/n)')
        if response.strip() == 'y':
            translation.publish_to_wiktionary(entry.entry, [entry])
            translation._save_translation_from_page([entry])
            self.mark_definition(word_id, 'done')
        elif response.strip() == 'n':
            self.mark_definition(word_id, 'rejected')
        else:
            return
コード例 #7
0
    def run_from_csv(self, csv_path, language='la'):
        with open(csv_path, 'r') as csv_file:
            reader = csv.reader(csv_file, delimiter=';')
            for row in reader:
                title, pos, en_defn, mg_defn = row[:4]
                if not mg_defn.strip():
                    continue

                pos = pos.strip()
                mg_defn = mg_defn[0].upper() + mg_defn[1:].lower()

                print('>>>>> ' + title + ' <<<<<')

                try:
                    entry_data = {
                        'entry': title,
                        'language': language,
                        'part_of_speech': pos,
                        'definitions': [mg_defn],
                    }

                    entry = Entry(**{**entry_data})
                    wiki_string = self.renderer.render(entry)
                    summary_if_new = wiki_string.replace('\n', ' ')
                    summary_if_already_exists = '/* {{=' + language + '=}} */'

                    summary_if_new = "Pejy voaforona amin'ny « " + summary_if_new + ' »'
                    print(entry)
                    self.publisher.publish(entry, title, wiki_string,
                                           summary_if_already_exists,
                                           summary_if_new)
                except SkippedWord:
                    continue
                else:
                    self.output.db(entry)
コード例 #8
0
 def test_to_tuple(self):
     entry = Entry(entry='1',
                   part_of_speech='2',
                   definitions=['3'],
                   language='fr')
     self.assertEqual(entry.entry, '1')
     self.assertEqual(entry.part_of_speech, '2')
     self.assertEqual(entry.definitions, ['3'])
コード例 #9
0
 def test_deep_copy(self):
     old = Entry(entry='tasumaki',
                 part_of_speech='2',
                 language='mg',
                 definitions=['3'])
     new = copy.deepcopy(old)
     new.entry = 'wrong'
     new.definitions = ['potomaki']
     self.assertNotEqual(new.entry, old.entry)
     self.assertNotEqual(new.definitions, old.definitions)
コード例 #10
0
ファイル: fr.py プロジェクト: radomd92/botjagwar
    def get_all_entries(self, keepNativeEntries=False, **kw):
        """languges sections in a given page formatting: [(POS, lang, definition), ...]"""
        items = []

        if self.content is None:
            raise Exception(
                "self.page tsy voafaritra. self.process() tsy mbola nantsoina")

        ct_content = self.content
        for lang in re.findall('{{S\\|([a-z]+)\\|([a-z]{2,3})', self.content):
            # print(ct_content)
            # word DEFINITION Retrieving
            d1 = ct_content.find("{{S|%s|%s" % lang)
            d2 = ct_content.find("=={{langue|", d1) + 1
            if not d2:
                d2 = ct_content.find("== {{langue|", d1 + 50) + 1
            d_ptr = ct_content.find("=={{langue|%s" % lang[1], d1) + 1
            if not d_ptr:
                d_ptr = ct_content.find("== {{langue|%s" % lang[1], d1) + 1

            if d2 > d1:
                definition = ct_content[d1:d2]
            else:
                definition = ct_content[d1:]
            try:
                definition = definition.split('\n# ')[1]
                definition = re.sub("\\[\\[(.*)#(.*)\\|?[.*]?\\]?\\]?", "\\1",
                                    definition)
            except IndexError:
                ct_content = ct_content[d_ptr:]
                continue

            ct_content = ct_content[d_ptr:]
            if definition.find('\n') + 1:
                definition = definition[:definition.find('\n')]

            definition = stripwikitext(definition)
            if not definition:
                ct_content = ct_content[d_ptr:]
                continue

            pos = frpos = lang[0].strip()  # POS
            if frpos in self.postran:
                pos = self.postran[frpos]

            i = Entry(entry=self.title,
                      part_of_speech=pos,
                      language=lang[1].strip(),
                      definitions=[definition.strip()])

            items.append(i)

        # print("Nahitana dikanteny ", len(items))
        return items
コード例 #11
0
ファイル: core.py プロジェクト: radomd92/botjagwar
    def process_entry_in_native_language(self, content: str, title: str,
                                         language: str, unknowns: list):
        """
        Yields each translation found
        :param content:
        :param title:
        :param language:
        :param unknowns:
        :return:
        """
        wiktionary_processor_class = entryprocessor.WiktionaryProcessorFactory.create(
            language)
        wiktionary_processor = wiktionary_processor_class()
        try:
            wiktionary_processor.set_text(content)
            wiktionary_processor.set_title(title)
            translations = wiktionary_processor.retrieve_translations()
        except Exception as exc:
            log.exception(exc)
            return

        for translation in translations:
            entry = translation.entry
            pos = translation.part_of_speech
            entry_language = translation.language
            if entry_language in self.language_blacklist:  # check in language blacklist
                continue

            try:
                target_language_translations = [
                    t['definition']
                    for t in self.translate_word(title, language)
                    if t['part_of_speech'] == str(pos)
                ]
            except NoWordException as exc:
                log.debug('No translation found for %s in %s' %
                          (title, language))
                if title not in unknowns:
                    unknowns.append((title, language))
                break

            infos = Entry(entry=entry,
                          part_of_speech=str(pos),
                          definitions=target_language_translations,
                          language=entry_language,
                          origin_wiktionary_edition=language,
                          origin_wiktionary_page_name=title)

            yield infos
コード例 #12
0
    def render(self, info: Entry, link=True) -> str:
        data = info.serialise()
        s = """
{{-%(language)s-}}
'''{{subst:BASEPAGENAME}}'''""" % data
        if link:
            s += "\n# %s" % ', '.join(
                ['[[%s]]' % (d) for d in info.definitions])
        else:
            s += "\n# %s" % ', '.join(['%s' % (d) for d in info.definitions])
        additional_note = '\n{{bot-made translation|%s}}' % info.origin_wiktionary_page_name
        s = s + additional_note
        try:
            return s
        except UnicodeDecodeError:
            return s.decode('utf8')
コード例 #13
0
ファイル: mg.py プロジェクト: radomd92/botjagwar
    def get_all_entries(self, keep_native_entries=False, **kw):
        items = []
        if self.content is None:
            return []
        for regex in [self.form_of_regex, self.lemma_regex]:
            for pos, lang in re.findall(regex, self.content):
                pos = pos.strip()
                if pos.strip() in ('etim'):
                    continue
                # word DEFINITION Retrieving
                d1 = self.content.find("{{-%s-|%s}}" %
                                       (pos, lang)) + len("{{-%s-|%s}}" %
                                                          (pos, lang))
                d2 = self.content.find("=={{=", d1) + 1 or self.content.find(
                    "== {{=", d1) + 1
                if d2:
                    definition = self.content[d1:d2]
                else:
                    definition = self.content[d1:]
                try:
                    definitions = definition.split('\n# ')[1:]
                except IndexError:
                    # print(" Hadisoana : Tsy nahitana famaritana")
                    continue

                entry_definition = []
                for definition in definitions:
                    if definition.find('\n') + 1:
                        definition = definition[:definition.find('\n')]
                        definition = re.sub("\\[\\[(.*)#(.*)\\|?\\]?\\]?",
                                            "\\1", definition)
                    definition = stripwikitext(definition)
                    if not definition:
                        continue
                    else:
                        entry_definition.append(definition)

                entry_definition = [d for d in entry_definition if len(d) > 1]

                if entry_definition:
                    i = Entry(entry=self.title,
                              part_of_speech=pos.strip(),
                              language=lang.strip(),
                              definitions=entry_definition)
                    items.append(i)
        # print("Nahitana dikanteny ", len(items) ", len(items))
        return items
コード例 #14
0
ファイル: word_importer.py プロジェクト: radomd92/botjagwar
    def worker(self, entry: Entry):
        """
        Updates the wiki page with the given entry.
        If entry exists in database, skip;
        else, check language's existence on-wiki and if it exists, skip;
        else, add the entry on-wiki
        :param entry: entry to create
        :return:
        """
        if entry.language in LANGUAGE_BLACKLIST:
            print('blackisted: ', entry.language)
            return

        if self.lookup_cache.lookup(entry):
            return
        else:
            pprint(entry)
            output = Output()
            output.db(entry)

        if not self.update_on_wiki:
            print('not updating on wiki')
            return

        print('attempts to update on wiki...')
        wikipage = output.wikipage(entry)
        if entry.language in CYRILLIC_ALPHABET_LANGUAGES:
            entry.entry = _get_unaccented_word(entry.entry)

        page = pywikibot.Page(self.site, entry.entry)
        try:
            if page.isRedirectPage():
                return
        except Exception:
            return
        if page.exists():
            content = page.get()
            if '{{=%s=}}' % entry.language in content:
                print('exists on-wiki')
                return
            else:
                content = wikipage + '\n' + content
        else:
            content = wikipage

        page.put(content, self.summary)
コード例 #15
0
    def lookup(self, word) -> Entry:
        content = self.load_page(word)
        definitions = [
            self.reprocess_definition(d)
            for d in content.xpath(self.definition_xpath)
        ]

        if self.pos_xpath is not None:
            pos = content.xpath(self.pos_xpath)[0].text.strip('\n')
        else:
            pos = 'ana'

        return Entry(
            entry=word,
            part_of_speech=pos,
            language=self.language,
            definitions=definitions,
        )
コード例 #16
0
ファイル: word_importer.py プロジェクト: radomd92/botjagwar
    def do_import(self, workers=100):
        input_database = DictionaryDatabaseManager(
            database_file=self.export_path)
        with input_database.engine.connect() as connection:
            query = connection.execute("""
                select
                    word.id,
                    word.word,
                    word.language,
                    word.part_of_speech,
                    definitions.definition,
                    definitions.definition_language
                from
                    dictionary,
                    word,
                    definitions
                where
                    dictionary.definition = definitions.id
                    and word.id = dictionary.word
                    and definition_language = 'mg'
                """)
            print('-- build tree --')
            for w in query.fetchall():
                word, language, part_of_speech, definition = w[1], w[2], w[
                    3], w[4]
                key = (word, language, part_of_speech)
                if key in self.fast_tree:
                    self.fast_tree[key].append(definition)
                else:
                    self.fast_tree[key] = [definition]

            print('-- using tree --')
            for word, language, part_of_speech in self.fast_tree:
                entry = Entry(entry=word,
                              language=language,
                              part_of_speech=part_of_speech,
                              definitions=self.fast_tree[(word, language,
                                                          part_of_speech)])
                try:
                    self.worker(entry)
                except Exception:
                    continue
コード例 #17
0
ファイル: output.py プロジェクト: radomd92/botjagwar
 def dictionary_service_update_database(self, info: Entry):
     """updates database"""
     # Adapt to expected format
     log.info(info.serialise())
     definitions = [{
         'definition': d,
         'definition_language': self.content_language
     } for d in info.definitions]
     data = {
         'definitions':
         definitions,
         'word':
         info.entry,
         'part_of_speech':
         info.part_of_speech,
         'translation_method':
         info.translation_method
         if hasattr(info, 'translation_method') else None
     }
     response = dictionary_service.post('entry/%s/create' % info.language,
                                        json=data)
     if response.status_code == WordAlreadyExists.status_code:
         word_response = dictionary_service.get(
             'entry/%s/%s' %
             (info.language, info.entry)).json()  # fetch its ID
         edit_response = dictionary_service.put(
             'entry/%d/edit' % word_response[0]['id'],
             json=data)  # edit using its ID
         if edit_response.status_code == WordAlreadyExists.status_code:
             log.debug(
                 '%s [%s] > Attempted to create an already-existing entry.'
                 % (info.entry, info.language))
         elif edit_response.status_code != 200:
             log.error(
                 '%s [%s] > Entry update failed (%d).' %
                 (info.entry, info.language, edit_response.status_code))
コード例 #18
0
ファイル: output.py プロジェクト: radomd92/botjagwar
 def batchfile(self, info: Entry):
     "return batch format (see doc)"
     string = "%(entry)s -> %(entry_definition)s -> %(part_of_speech)s -> %(language)s\n" % info.serialise(
     )
     return string
コード例 #19
0
    def generate_wikipage_and_summaries(self, translation):
        # Fetching base information
        json_dictionary_infos_params = {
            'id': 'eq.' + str(translation["word_id"])
        }
        json_dictionary_rq = requests.get(dyn_backend.backend +
                                          '/vw_json_dictionary',
                                          params=json_dictionary_infos_params)

        if json_dictionary_rq.status_code == 200:
            json_dictionary_infos = json_dictionary_rq.json()
            additional_data = json_dictionary_infos[0]['additional_data']
        else:
            print('json_dictionary_rq.status_code',
                  json_dictionary_rq.status_code)
            raise SkippedWord()

        definitions = []
        request_convergent_definition_rq = requests.get(
            dyn_backend.backend + '/convergent_translations',
            params={'word_id': 'eq.' + str(translation["word_id"])})
        if request_convergent_definition_rq.status_code == 200:
            definitions = [
                e['suggested_definition']
                for e in request_convergent_definition_rq.json()
            ]
        else:
            print('request_convergent_definition_rq.status_code ',
                  request_convergent_definition_rq.status_code)

        # Fetching and mapping additional data
        additional_data_list = json_dictionary_infos[0]['additional_data']
        if additional_data_list is not None:
            # p = self.get_additional_data(
            # additional_data_list, translation['word_id'], 'pronunciation',
            # list)
            raw_additional_data_dict = {
                'synonyms':
                self.get_additional_data(additional_data_list,
                                         translation['word_id'], 'synonym',
                                         list),
                'antonyms':
                self.get_additional_data(additional_data_list,
                                         translation['word_id'], 'antonym',
                                         list),
                'ipa':
                self.get_additional_data(additional_data_list,
                                         translation['word_id'], 'ipa', list),
                # 'pronunciation': p[0] if p else [],
                # 'ipa': ['{{fanononana-ko}}'],
                'audio_pronunciations':
                self.get_additional_data(additional_data_list,
                                         translation['word_id'], 'audio',
                                         list),
                'related_terms':
                self.get_additional_data(additional_data_list,
                                         translation['word_id'], 'related',
                                         list),
                'derived_terms':
                self.get_additional_data(additional_data_list,
                                         translation['word_id'], 'derived',
                                         list),
                # 'references': ['{{Tsiahy:vortaro.net}}'],
                # 'references': self.get_additional_data(
                #     additional_data_list, translation['word_id'], 'reference', list),
                # 'etymology': self.get_additional_data(
                #     additional_data_list, translation['word_id'], 'etym/en', str)
            }
            additional_data_dict = {
                k: v
                for k, v in raw_additional_data_dict.items() if v
            }
            print(raw_additional_data_dict)
        else:
            additional_data_dict = {}

        # Compiling final object
        if definitions:
            entry_data = {
                'entry': translation["word"],
                'language': translation["language"],
                'part_of_speech': translation["part_of_speech"],
                'definitions': definitions,
            }

            for data_type in self.additional_data_types:
                if data_type in additional_data:
                    entry_data[data_type] = additional_data[data_type]

            entry = Entry(**{**entry_data, **additional_data_dict})
            wiki_string = self.renderer.render(entry)
            summary_if_new = wiki_string.replace('\n', ' ')
            summary_if_already_exists = '/* {{=' + \
                translation["language"] + '=}} */'
            if len(summary_if_new) > 147:
                summary_if_new = summary_if_new[:147] + '...'
            return entry, wiki_string, summary_if_new, summary_if_already_exists
        else:
            print('definitions', definitions)
            raise SkippedWord()
コード例 #20
0
    def create_missing_entries(self, xml_buffer: str):
        title_node, content_node = self.base_worker(xml_buffer)

        assert title_node is not None
        if ':' in title_node:
            return
        if self.processor_class is None:
            self.processor_class = WiktionaryProcessorFactory.create('en')
            assert self.processor_class is not None

        processor = self.processor_class()
        processor.set_title(title_node)
        processor.set_text(content_node)
        entries = processor.get_all_entries()

        for entry in entries:
            if entry.language == self.language:
                if self.translation_lookup_table.lookup(entry):
                    translation = self.translation_lookup_table.translate(
                        entry)
                    new_entry = Entry(entry=entry.entry,
                                      definitions=translation,
                                      language=entry.language,
                                      part_of_speech=entry.part_of_speech)
                    #print('local >', new_entry)
                    self.entry_writer.add(new_entry)
                    for e in processor.retrieve_translations():
                        e.definitions = translation
                        self.entry_writer.add(e)
                        #print('local translation >', e)
            else:
                # RIP cyclomatic complexity.
                translations = []
                pos = entry.part_of_speech
                for definition in entry.definitions:
                    try:
                        translation = self.translation_lookup_table.translate_word(
                            definition, language, entry.part_of_speech)
                    except LookupError:  # Translation couldn't be found in lookup table
                        if entry.part_of_speech in TEMPLATE_TO_OBJECT:
                            try:
                                # Try inflection template parser
                                elements = templates_parser.get_elements(
                                    TEMPLATE_TO_OBJECT[entry.part_of_speech],
                                    definition)
                            except Exception:
                                # add to missing translations
                                self.missing_translation_writer.add(definition)
                            else:
                                if elements:
                                    # part of speech changes to become a
                                    # form-of part of speech
                                    if not pos.startswith('e-'):
                                        pos = 'e-' + pos
                                    translations.append(
                                        elements.to_malagasy_definition())
                    else:
                        translations.append(translation[0])

                if translations:
                    new_entry = Entry(entry=entry.entry,
                                      definitions=list(set(translations)),
                                      language=entry.language,
                                      part_of_speech=pos)
                    #print('foreign >', new_entry)
                    self.entry_writer.add(new_entry)
コード例 #21
0
ファイル: word_forms.py プロジェクト: radomd92/botjagwar
def create_non_lemma_entry(entry: Entry):
    word, pos, code, definition = entry.entry, entry.part_of_speech, entry.language, entry.definitions[
        0]
    page_output = Output()
    mg_page = pywikibot.Page(pywikibot.Site(SITELANG, SITENAME), word)

    # Translate template's content into malagasy
    try:
        if pos not in TEMPLATE_TO_OBJECT:  # unsupported template
            print("Unsupported template")
            return 0

        output_object_class = TEMPLATE_TO_OBJECT[pos]
        try:
            elements = templates_parser.get_elements(
                output_object_class, definition)
        except Exception:
            return 1

        if code in POST_PROCESSORS:
            elements = POST_PROCESSORS[code](elements)

        if elements is None:
            print("No elements")
            return 0

        malagasy_definition = elements.to_malagasy_definition()
        lemma = elements.lemma
        # lemma = get_lemma(output_object_class, definition)
        print(elements, malagasy_definition, lemma)
    except ParserError as exc:
        print(exc)
        return 0

    # Do not create page if lemma does not exist
    if lemma:
        mg_lemma_page = pywikibot.Page(
            pywikibot.Site(SITELANG, SITENAME), lemma)
    else:
        return 1

    try:
        if not mg_lemma_page.exists():
            print('No lemma (%s) :/' % lemma)
            return 0
        else:
            broken_redirect = False
            while mg_lemma_page.isRedirectPage():
                mg_lemma_page = mg_lemma_page.getRedirectTarget()
                if not mg_lemma_page.exists():
                    broken_redirect = True
                    break

            if not broken_redirect:
                content = mg_lemma_page.get()
                if '{{=' + language_code + '=}}' not in content:
                    print('No lemma (%s) :/' % lemma)
                    return 0
            else:
                print('No lemma : broken redirect (%s)' % lemma)
                return 0
    except pywikibot.exceptions.InvalidTitle:  # doing something wrong at this point
        return 0
    except Exception as e:
        return 1

    form_of_template = FORM_OF_TEMPLATE[pos] if pos in FORM_OF_TEMPLATE else pos

    mg_entry = Entry(
        entry=word,
        part_of_speech=form_of_template,
        definitions=[malagasy_definition],
        language=code,
    )

    # Check ability to overwrite page
    if not os.path.isfile('/tmp/%s' % code):  # overwrite existing content!
        overwrite = False
    else:
        overwrite = True
        print(
            ('PAGE OVERWRITING IS ACTIVE. DELETE /tmp/%s TO DISABLE IT MID-SCRIPT.' % code))

    # Create or update the generated page
    if mg_page.exists() and not overwrite:
        new_entry = page_output.wikipage(mg_entry, link=False)
        page_content = mg_page.get()
        if page_content.find('{{=%s=}}' % code) != -1:
            if page_content.find(
                '{{-%s-|%s}}' %
                    (form_of_template, code)) != -1:
                print('section already exists : No need to go further')
                return 0
            else:  # Add part of speech subsection
                page_content = re.sub(
                    r'==[ ]?{{=%s=}}[ ]?==' %
                    code, new_entry, page_content)
        else:  # Add language section
            page_content = new_entry + '\n' + page_content
    else:  # Create a new page.
        page_content = page_output.wikipage(mg_entry, link=False)

    pywikibot.output('\03{blue}%s\03{default}' % page_content)
    try:
        mg_page.put(page_content, f'endriky ny teny [[{lemma}]]')
    except Exception:
        pass

    return 1
コード例 #22
0
ファイル: en.py プロジェクト: radomd92/botjagwar
    def get_all_entries(
        self,
        keepNativeEntries=False,
        get_additional_data=False,
        cleanup_definitions=False,
        translate_definitions_to_malagasy=False,
        human_readable_form_of_definition=True,
        **kw) -> list:
        """
        Retrieves all necessary information in the form of a list of Entry objects
        :param keepNativeEntries:
        :param get_additional_data:
        :param cleanup_definitions:
        :param translate_definitions_to_malagasy:
        :param human_readable_form_of_definition:
        :param kw:
        :return:
        """
        content = self.content
        entries = []
        content = re.sub("{{l/en\\|(.*)}}", "\\1 ", content)  # remove {{l/en}}
        for l in re.findall("[\n]?==[ ]?([A-Za-z]+)[ ]?==\n", content):
            last_part_of_speech = None
            ct_content = content
            try:
                last_language_code = self.lang2code(l)
            except KeyError:
                continue

            definitions = {}
            section_init = ct_content.find('==%s==' % l)
            section_end = ct_content.find('----', section_init)
            if section_end != -1:
                ct_content = ct_content[section_init:section_end]
            else:
                ct_content = ct_content[section_init:]

            lines = ct_content.split('\n')
            for line in lines:
                if last_part_of_speech is None:
                    last_part_of_speech = self.get_part_of_speech(line)

                # We assume en.wikt definitions start with a "# " and proceed to extract all definitions from there.
                # Definitions are then added as a list of strings then added as a list of strings. They are grouped
                #   by part of speech to ensure correctness, as we can only have one part of speech for a given entry.
                if line.startswith('# '):
                    defn_line = line
                    defn_line = defn_line.lstrip('# ')
                    if last_part_of_speech is None:
                        continue

                    definition = self.extract_definition(
                        last_part_of_speech,
                        defn_line,
                        cleanup_definition=cleanup_definitions,
                        translate_definitions_to_malagasy=translate_definitions_to_malagasy,
                        human_readable_form_of_definition=human_readable_form_of_definition,
                        advanced=kw['advanced'] if 'advanced' in kw else False
                    )
                    if last_part_of_speech in definitions:
                        definitions[last_part_of_speech].append(definition)
                    else:
                        definitions[last_part_of_speech] = [definition]

            # Fetch additional data if flag is set, else put it to none
            if get_additional_data:
                additional_data = self.get_additional_data(
                    ct_content, last_language_code)
            else:
                additional_data = None

            # Create the Entry object to add to the list
            for pos, definitions in definitions.items():
                entry = Entry(
                    entry=self.title,
                    part_of_speech=pos,
                    language=last_language_code,
                    definitions=definitions,
                )
                if additional_data is not None and get_additional_data:
                    entry.additional_data = {}
                    for data_type, data in additional_data.items():
                        if data:
                            entry.additional_data[data_type] = data

                entries.append(entry)

        return entries