Exemple #1
0
 def parse(self):
     regex = '== (?P<title>.+) =='
     pattern = re.compile(regex, re.UNICODE)
     match = pattern.match(self.heading)
     if match:
         self.set_property('einsatz_slug', match.groupdict()['title'])
     else:
         raise ParsingError('Cannot parse log page.')
     for level3block in Chopper(self.text, [
             Level3Block,
     ],
                                filler_blocks=True,
                                include_tags=True):
         # It's allowed to have some unknown text before the first
         # level3 block.
         if isinstance(level3block, FillerBlock):
             section = FillerSection(text=level3block.text, parent=self)
             section = section.parse()
             self.top_section = section
         else:
             section = AlertSection(text=level3block.text,
                                    parent=self,
                                    heading=level3block.start_tag)
             section = section.parse()
             if section.get_property('alert_slug') in self.alert_sections:
                 raise ParsingError('Two EinsatzSections with same title.')
             self.alert_sections[section.get_property(
                 'alert_slug')] = section
         self.children.append(section)
     return self
Exemple #2
0
 def parse(self, shallow=False):
     super(deLanguageSection, self).parse()
     # The only thing that a deLanguageSection should contain
     # is the title and deWortartSections.
     l2bs = list(Chopper(self.text, [
         Level2Block,
     ]))
     if len(l2bs) != 1:
         raise ParsingError()
     title = l2bs[0].start_tag
     content = l2bs[0].text
     lang_title_sec = deLangTitleSection(text=title, parent=self).parse()
     if not lang_title_sec.readable():
         new_section = FillerSection(text=self.text,
                                     parent=self.parent,
                                     correct=False)
         return new_section.parse()
     self.children.append(lang_title_sec)
     for l3b in Chopper(content, [
             Level3Block,
     ],
                        filler_blocks=True,
                        include_tags=True):
         if isinstance(l3b, FillerBlock):
             section = FillerSection(text=l3b.text,
                                     parent=self,
                                     correct=True)
         else:
             section = deWortartSection(text=l3b.text, parent=self)
         if not shallow:
             section = section.parse()
         self.children.append(section)
     return self
Exemple #3
0
 def parse(self):
     regex = u'=== (?P<title>[\w\s]+) ==='
     pattern = re.compile(regex, re.UNICODE)
     match = pattern.match(self.heading)
     if match:
         self.set_property('alert_slug', match.groupdict()['title'])
     else:
         print self.heading
         raise ParsingError('Cannot parse log page.')
     return self
    def parse(self):
        super(deWortartSection, self).parse()
        l3bs = list(Chopper(self.text, [Level3Block,]))
        if len(l3bs) != 1:
            import pdb
            pdb.set_trace()
            raise ParsingError()
        title = l3bs[0].start_tag
        content = l3bs[0].text
        wortart_title_sec = deWortartTitleSection(text=title, parent=self).parse()
#        if not wortart_title_sec.readable():
#            new_section = FillerSection(text=self.text, parent=self.parent, correct=False)
#            return new_section
        wortart_content = deWortartContentSection(text=content, parent=self).parse()
        self.children.append(wortart_title_sec)
        self.children.append(wortart_content)
        return self
Exemple #5
0
 def parse(self):
     self.einsatz_sections = {}
     for level2block in Chopper(self.text, [
             Level2Block,
     ],
                                filler_blocks=True,
                                include_tags=True):
         # It's allowed to have some unknown text before the first
         # level2 block.
         if isinstance(level2block, FillerBlock):
             section = FillerSection(text=level2block.text, parent=self)
             section = section.parse()
         else:
             section = EinsatzSection(text=level2block.text,
                                      parent=self,
                                      heading=level2block.start_tag)
             section = section.parse()
             if section.get_property(
                     'einsatz_slug') in self.einsatz_sections:
                 raise ParsingError('EinsatzSection title is not unique.')
             self.einsatz_sections[section.get_property(
                 'einsatz_slug')] = section
         self.children.append(section)
     return self
Exemple #6
0
 def parse(self, shallow=False):
     super(simpleWordTypeSection, self).parse()
     l2bs = list(Chopper(self.text, [
         Level2Block,
     ]))
     if len(l2bs) != 1:
         raise ParsingError()
     title = l2bs[0].start_tag
     content = l2bs[0].text
     wordtype_title_sec = simpleWordTypeTitleSection(text=title,
                                                     parent=self).parse()
     wordtype = self.get_property('wordtype')
     # If we don't get a recognisable word type then we can't parse this section.
     if wordtype not in level2_mapping:
         page_title = self.get_property('page').title
         section = FillerSection(text=self.text, parent=self.parent)
         if wordtype in level3_mapping:
             # This should be a level 3 heading.
             message = "%s: The heading %s should be level 3 not level 2." % (
                 page_title, wordtype)
             fixed_text = u"===%s===%s" % (wordtype, content)
             alert = Level2_not_Level3(section, fixed_text, message,
                                       page_title)
         else:
             message = '%s: The word type "%s" is not known.' % (page_title,
                                                                 wordtype)
             alert = UnknownType(message=message, title=page_title)
         section.alerts.append(alert)
         return section
     # Get the Word Class associated with this type.
     word_class = level2_mapping[wordtype]
     # If there is no Word Class then this section can be ignored.
     if word_class is None:
         return FillerSection(text=self.text, parent=self.parent)
     # Otherwise create a new Word object.
     if word_class not in self.parent.wordtypes:
         self.parent.wordtypes[word_class] = 1
         order = 0
     else:
         order = self.parent.wordtypes[word_class]
         self.parent.wordtypes[word_class] += 1
     new_word = word_class.get_and_update(title=self.parent.title,
                                          order=order,
                                          session=Session.object_session(
                                              self.parent),
                                          tags=self.get_property('tags'))
     self.set_property('word', new_word)
     self.parent.words.append(new_word)
     if not wordtype_title_sec.readable():
         new_section = FillerSection(text=self.text,
                                     parent=self.parent,
                                     correct=False)
         return new_section.parse()
     self.children.append(wordtype_title_sec)
     for l3b in Chopper(content, [
             Level3Block,
     ],
                        filler_blocks=True,
                        include_tags=True):
         if isinstance(l3b, FillerBlock):
             section = simpleWordTypeHeaderSection(text=l3b.text,
                                                   parent=self)
         else:
             section = FillerSection(text=l3b.text,
                                     parent=self,
                                     correct=True)
         if not shallow:
             section = section.parse()
         self.children.append(section)
     return self