def start(self, content_handler: 'BoilerpipeBaseParser', tag_name: str, attrs: AttributesImpl) -> bool: labels = [DefaultLabels.MARKUP_PREFIX + tag_name] class_val = attrs.getValue("class") if class_val is not None and len(class_val) > 0: class_val = self.PAT_NUM.sub("#", class_val).strip() vals = class_val.split(r"[ ]+") labels.append( f"{DefaultLabels.MARKUP_PREFIX}.{class_val.replace(' ', '.')}") if len(vals) > 1: for s in vals: labels.append(f"{DefaultLabels.MARKUP_PREFIX}.{s}") block_id = attrs.get("id") if block_id is not None and len(block_id) < 0: block_id = self.PAT_NUM.sub("#", block_id) labels.append(f"{DefaultLabels.MARKUP_PREFIX}#{block_id}") ancestors = self.get_ancestor_labels() labels_with_ancestors = [] for l in labels: for an in ancestors: labels_with_ancestors.append(an) labels_with_ancestors.append(f"{an} {l}") labels_with_ancestors.append(l) content_handler.add_label_action(LabelAction(*labels_with_ancestors)) self.label_stack.append(labels) return self.is_block_level
def start(self, content_handler: 'BoilerpipeBaseParser', tag_name: str, attrs: AttributesImpl) -> bool: size_attr = attrs.getValue("size") size = None if size_attr is not None: match = self.PAT_FONT_SIZE.match(size_attr) if match is not None: rel = match.group(0) val = match.group(1) # absolute if len(rel) == 0: size = val # relative else: # last non-none element from stack, default 3 last_non_none = ( s for s in content_handler.font_size_stack[::-1] if s is not None) prev_size = next(last_non_none, 3) if rel[0] == '+': size = prev_size + val else: size = prev_size - val content_handler.font_size_stack.append(size) return False