Python FillerSectionの例、wiktionary_parser.sections.FillerSection Pythonの例

コード例 #1

0

ファイルを表示

ファイル: wortart_title.py プロジェクト: spandanagella/wiktionary-parser

 def parse(self):
     super(deWortartTitleSection, self).parse()
     for piece in Chopper(
             self.text,
         [self.WortartTitlePieceBlock, self.WortartTitleTagBlock],
             filler_blocks=True,
             include_tags=True):
         if isinstance(piece, FillerBlock) or isinstance(
                 piece, self.WortartTitleTagBlock):
             section = FillerSection(text=piece.text,
                                     parent=self,
                                     correct=True)
         else:
             section = deWortartTitlePieceSection(text=piece.text,
                                                  parent=self)
         self.children.append(section.parse())
     # Check to see if it is a kind of name
     # This is a low tech check.  Improve later.
     hints = set(['Vorname', 'Nachname', 'Eigenname'])
     word = self.get_property('word')
     if word:
         for hint in hints:
             if hint in self.text:
                 word.is_name = True
     return self

コード例 #2

0

ファイルを表示

ファイル: sections.py プロジェクト: ravelab/wiktionary-parser

 def parse(self):
     super(deWortartContentSection, self).parse()
     if self.get_property("language") == "Deutsch":
         blocks = list(Chopper(self.text, block_classes, filler_blocks=True, filler_block_class=NewFillerBlock))
         for block in blocks:
             section = block.make_section(parent=self)
             self.children.append(section.parse())
     else:
         section = FillerSection(text=self.text, parent=self)
         self.children.append(section.parse())
     return self

コード例 #3

0

ファイルを表示

ファイル: sections.py プロジェクト: spandanagella/wiktionary-parser

 def parse(self):
     super(deWortartContentSection, self).parse()
     if self.get_property('language') == 'Deutsch':
         blocks = list(Chopper(self.text, block_classes, filler_blocks=True,
                                filler_block_class=NewFillerBlock))
         for block in blocks:
             section = block.make_section(parent=self)
             self.children.append(section.parse())
     else:
         section = FillerSection(text=self.text, parent=self)
         self.children.append(section.parse())
     return self

コード例 #4

0

ファイルを表示

 def parse(self):
     regex = '== (?P<title>.+) =='
     pattern = re.compile(regex, re.UNICODE)
     match = pattern.match(self.heading)
     if match:
         self.set_property('einsatz_slug', match.groupdict()['title'])
     else:
         raise ParsingError('Cannot parse log page.')
     for level3block in Chopper(self.text, [
             Level3Block,
     ],
                                filler_blocks=True,
                                include_tags=True):
         # It's allowed to have some unknown text before the first
         # level3 block.
         if isinstance(level3block, FillerBlock):
             section = FillerSection(text=level3block.text, parent=self)
             section = section.parse()
             self.top_section = section
         else:
             section = AlertSection(text=level3block.text,
                                    parent=self,
                                    heading=level3block.start_tag)
             section = section.parse()
             if section.get_property('alert_slug') in self.alert_sections:
                 raise ParsingError('Two EinsatzSections with same title.')
             self.alert_sections[section.get_property(
                 'alert_slug')] = section
         self.children.append(section)
     return self

コード例 #5

0

ファイルを表示

 def parse(self, shallow=False):
     super(dePage, self).parse()
     if not self.title_OK():
         # No words on this page
         self.ignore = True
         return self
     for level2block in Chopper(self.text, [Level2Block,],
                                filler_blocks=True, include_tags=True):
         # It's allowed to have some unknown text before the first
         # level2 block.
         if isinstance(level2block, FillerBlock):
             section = FillerSection(text=level2block.text, parent=self)
         else:
             section = deLanguageSection(text=level2block.text, parent=self)
         if not shallow:
             section = section.parse()
         self.children.append(section)
     return self

コード例 #6

0

ファイルを表示

ファイル: wortart_title.py プロジェクト: benreynwar/wiktionary-parser

 def parse(self):
     super(deWortartTitleSection, self).parse()
     for piece in Chopper(self.text, [self.WortartTitlePieceBlock, self.WortartTitleTagBlock],
                          filler_blocks=True, include_tags=True):
         if isinstance(piece, FillerBlock) or isinstance(piece, self.WortartTitleTagBlock):
             section = FillerSection(text=piece.text, parent=self, correct=True)
         else:
             section = deWortartTitlePieceSection(text=piece.text, parent=self)
         self.children.append(section.parse())
     # Check to see if it is a kind of name
     # This is a low tech check.  Improve later.
     hints = set(['Vorname', 'Nachname', 'Eigenname'])
     word = self.get_property('word')
     if word:
         for hint in hints:
             if hint in self.text:
                 word.is_name = True
     return self

コード例 #7

0

ファイルを表示

 def parse(self, shallow=False):
     super(deLanguageSection, self).parse()
     # The only thing that a deLanguageSection should contain
     # is the title and deWortartSections.
     l2bs = list(Chopper(self.text, [
         Level2Block,
     ]))
     if len(l2bs) != 1:
         raise ParsingError()
     title = l2bs[0].start_tag
     content = l2bs[0].text
     lang_title_sec = deLangTitleSection(text=title, parent=self).parse()
     if not lang_title_sec.readable():
         new_section = FillerSection(text=self.text,
                                     parent=self.parent,
                                     correct=False)
         return new_section.parse()
     self.children.append(lang_title_sec)
     for l3b in Chopper(content, [
             Level3Block,
     ],
                        filler_blocks=True,
                        include_tags=True):
         if isinstance(l3b, FillerBlock):
             section = FillerSection(text=l3b.text,
                                     parent=self,
                                     correct=True)
         else:
             section = deWortartSection(text=l3b.text, parent=self)
         if not shallow:
             section = section.parse()
         self.children.append(section)
     return self

コード例 #8

0

ファイルを表示

ファイル: sections.py プロジェクト: ravelab/wiktionary-parser

 def parse(self, shallow=False):
     super(deLanguageSection, self).parse()
     # The only thing that a deLanguageSection should contain
     # is the title and deWortartSections.
     l2bs = list(Chopper(self.text, [Level2Block]))
     if len(l2bs) != 1:
         raise ParsingError()
     title = l2bs[0].start_tag
     content = l2bs[0].text
     lang_title_sec = deLangTitleSection(text=title, parent=self).parse()
     if not lang_title_sec.readable():
         new_section = FillerSection(text=self.text, parent=self.parent, correct=False)
         return new_section.parse()
     self.children.append(lang_title_sec)
     for l3b in Chopper(content, [Level3Block], filler_blocks=True, include_tags=True):
         if isinstance(l3b, FillerBlock):
             section = FillerSection(text=l3b.text, parent=self, correct=True)
         else:
             section = deWortartSection(text=l3b.text, parent=self)
         if not shallow:
             section = section.parse()
         self.children.append(section)
     return self

コード例 #9

0

ファイルを表示

 def parse(self, shallow=False):
     super(simpleTopSection, self).parse()
     for l3b in Chopper(self.text, [
             Level3Block,
     ],
                        filler_blocks=True,
                        include_tags=True):
         if isinstance(l3b, FillerBlock):
             section = simpleTopTopSection(text=l3b.text, parent=self)
         else:
             if l3b.start_tag == '===Pronunciation===':
                 section = PronunciationSection(text=l3b.text, parent=self)
             else:
                 section = FillerSection(text=l3b.text,
                                         parent=self,
                                         correct=True)
         if not shallow:
             section = section.parse()
         self.children.append(section)
     return self

コード例 #10

0

ファイルを表示

 def parse(self):
     super(SubstantivTabelleSection, self).parse()
     word = self.get_property('word')
     if not word or not word.genders:
         return self
     page_title = self.get_property('page').title
     page_title = 'blah'
     self.fixed_text = None
     match = self.pattern.match(self.text)
     if not match:
         message = u'%s: Substantiv-Tabelle in unreadable format.' % page_title
         alert = UnreadableAlert(message=message, title=page_title)
         self.alerts.append(alert)
         return FillerSection(text=self.text, parent=self.parent)
     data = match.groupdict()
     self.s_data = {}
     self.p_data = {}
     for case in CASES:
         self.s_data[case] = data[cps(case, False)]
         self.p_data[case] = data[cps(case, True)]
     all_assigned = True
     poss_fks = fks
     poss_genders = GENDERS
     smfc = SubstantivMultipleFormColl(self.s_data, plural=False)
     try:
         smfc.process()
         # This is to make plural match singular
         # But we don't check that every gender and flexionklasse present in
         # singular is present in plural.  They just can't be something
         # completely different.
         # I should really lump singular and plural together into one MultipleFormColl
         if smfc.form_colls:
             poss_fks = set([])
             poss_genders = set([])
             for form_coll in smfc.form_colls:
                 poss_fks = poss_fks | form_coll.poss_fks
                 poss_genders = poss_genders | form_coll.poss_genders
     except SubstantivTabelleException, e:
         smfc = None

コード例 #11

0

ファイルを表示

ファイル: log.py プロジェクト: benreynwar/wiktionary-parser

 def parse(self):
     self.einsatz_sections = {}
     for level2block in Chopper(self.text, [Level2Block,],
                                filler_blocks=True, include_tags=True):
         # It's allowed to have some unknown text before the first
         # level2 block.
         if isinstance(level2block, FillerBlock):
             section = FillerSection(text=level2block.text, parent=self)
             section = section.parse()
         else:
             section = EinsatzSection(text=level2block.text, parent=self, heading=level2block.start_tag)
             section = section.parse()
             if section.get_property('einsatz_slug') in self.einsatz_sections:
                 raise ParsingError('EinsatzSection title is not unique.')
             self.einsatz_sections[section.get_property('einsatz_slug')] = section
         self.children.append(section)
     return self

コード例 #12

0

ファイルを表示

 def parse(self):
     self.einsatz_sections = {}
     for level2block in Chopper(self.text, [
             Level2Block,
     ],
                                filler_blocks=True,
                                include_tags=True):
         # It's allowed to have some unknown text before the first
         # level2 block.
         if isinstance(level2block, FillerBlock):
             section = FillerSection(text=level2block.text, parent=self)
             section = section.parse()
         else:
             section = EinsatzSection(text=level2block.text,
                                      parent=self,
                                      heading=level2block.start_tag)
             section = section.parse()
             if section.get_property(
                     'einsatz_slug') in self.einsatz_sections:
                 raise ParsingError('EinsatzSection title is not unique.')
             self.einsatz_sections[section.get_property(
                 'einsatz_slug')] = section
         self.children.append(section)
     return self

コード例 #13

0

ファイルを表示

ファイル: log.py プロジェクト: benreynwar/wiktionary-parser

 def parse(self):
     regex = '== (?P<title>.+) =='
     pattern = re.compile(regex, re.UNICODE)
     match = pattern.match(self.heading)
     if match:
         self.set_property('einsatz_slug', match.groupdict()['title'])
     else:
         raise ParsingError('Cannot parse log page.')
     for level3block in Chopper(self.text, [Level3Block,],
                                filler_blocks=True, include_tags=True):
         # It's allowed to have some unknown text before the first
         # level3 block.
         if isinstance(level3block, FillerBlock):
             section = FillerSection(text=level3block.text, parent=self)
             section = section.parse()
             self.top_section = section
         else:
             section = AlertSection(text=level3block.text, parent=self, heading=level3block.start_tag)
             section = section.parse()
             if section.get_property('alert_slug') in self.alert_sections:
                 raise ParsingError('Two EinsatzSections with same title.')
             self.alert_sections[section.get_property('alert_slug')] = section
         self.children.append(section)
     return self

コード例 #14

0

ファイルを表示

 def make_section(self, parent):
     return FillerSection(text=self.text, parent=parent)

コード例 #15

0

ファイルを表示

 def parse(self, shallow=False):
     super(simpleWordTypeSection, self).parse()
     l2bs = list(Chopper(self.text, [
         Level2Block,
     ]))
     if len(l2bs) != 1:
         raise ParsingError()
     title = l2bs[0].start_tag
     content = l2bs[0].text
     wordtype_title_sec = simpleWordTypeTitleSection(text=title,
                                                     parent=self).parse()
     wordtype = self.get_property('wordtype')
     # If we don't get a recognisable word type then we can't parse this section.
     if wordtype not in level2_mapping:
         page_title = self.get_property('page').title
         section = FillerSection(text=self.text, parent=self.parent)
         if wordtype in level3_mapping:
             # This should be a level 3 heading.
             message = "%s: The heading %s should be level 3 not level 2." % (
                 page_title, wordtype)
             fixed_text = u"===%s===%s" % (wordtype, content)
             alert = Level2_not_Level3(section, fixed_text, message,
                                       page_title)
         else:
             message = '%s: The word type "%s" is not known.' % (page_title,
                                                                 wordtype)
             alert = UnknownType(message=message, title=page_title)
         section.alerts.append(alert)
         return section
     # Get the Word Class associated with this type.
     word_class = level2_mapping[wordtype]
     # If there is no Word Class then this section can be ignored.
     if word_class is None:
         return FillerSection(text=self.text, parent=self.parent)
     # Otherwise create a new Word object.
     if word_class not in self.parent.wordtypes:
         self.parent.wordtypes[word_class] = 1
         order = 0
     else:
         order = self.parent.wordtypes[word_class]
         self.parent.wordtypes[word_class] += 1
     new_word = word_class.get_and_update(title=self.parent.title,
                                          order=order,
                                          session=Session.object_session(
                                              self.parent),
                                          tags=self.get_property('tags'))
     self.set_property('word', new_word)
     self.parent.words.append(new_word)
     if not wordtype_title_sec.readable():
         new_section = FillerSection(text=self.text,
                                     parent=self.parent,
                                     correct=False)
         return new_section.parse()
     self.children.append(wordtype_title_sec)
     for l3b in Chopper(content, [
             Level3Block,
     ],
                        filler_blocks=True,
                        include_tags=True):
         if isinstance(l3b, FillerBlock):
             section = simpleWordTypeHeaderSection(text=l3b.text,
                                                   parent=self)
         else:
             section = FillerSection(text=l3b.text,
                                     parent=self,
                                     correct=True)
         if not shallow:
             section = section.parse()
         self.children.append(section)
     return self