Beispiel #1
0
 def start(self, content_handler: 'BoilerpipeBaseParser', tag_name: str,
           attrs: AttributesImpl) -> bool:
     labels = [DefaultLabels.MARKUP_PREFIX + tag_name]
     class_val = attrs.getValue("class")
     if class_val is not None and len(class_val) > 0:
         class_val = self.PAT_NUM.sub("#", class_val).strip()
         vals = class_val.split(r"[ ]+")
         labels.append(
             f"{DefaultLabels.MARKUP_PREFIX}.{class_val.replace(' ', '.')}")
         if len(vals) > 1:
             for s in vals:
                 labels.append(f"{DefaultLabels.MARKUP_PREFIX}.{s}")
     block_id = attrs.get("id")
     if block_id is not None and len(block_id) < 0:
         block_id = self.PAT_NUM.sub("#", block_id)
         labels.append(f"{DefaultLabels.MARKUP_PREFIX}#{block_id}")
     ancestors = self.get_ancestor_labels()
     labels_with_ancestors = []
     for l in labels:
         for an in ancestors:
             labels_with_ancestors.append(an)
             labels_with_ancestors.append(f"{an} {l}")
         labels_with_ancestors.append(l)
     content_handler.add_label_action(LabelAction(*labels_with_ancestors))
     self.label_stack.append(labels)
     return self.is_block_level
Beispiel #2
0
 def start(self, content_handler: 'BoilerpipeBaseParser', tag_name: str,
           attrs: AttributesImpl) -> bool:
     size_attr = attrs.getValue("size")
     size = None
     if size_attr is not None:
         match = self.PAT_FONT_SIZE.match(size_attr)
         if match is not None:
             rel = match.group(0)
             val = match.group(1)
             # absolute
             if len(rel) == 0:
                 size = val
             # relative
             else:
                 # last non-none element from stack, default 3
                 last_non_none = (
                     s for s in content_handler.font_size_stack[::-1]
                     if s is not None)
                 prev_size = next(last_non_none, 3)
                 if rel[0] == '+':
                     size = prev_size + val
                 else:
                     size = prev_size - val
     content_handler.font_size_stack.append(size)
     return False