Exemple #1
0
def handle_col_floats(root):
    for node in root.xpath('//*[contains(@class, "infobox")]'):
        w, h = node_size(node)
        if h < config.min_float_height or (h < 2 * config.min_float_height
                                           and node_is_floatable(node, w, h)):
            utils.append_class(node, "pp_no_float")
        elif "pp_float_table" in node.get("class", ""):
            utils.remove_class(node, "pp_float_table")
Exemple #2
0
def map_classes(article):
    class_map = get_map(_class_map, article.language)
    if not class_map:
        return
    for node in article.dom.xpath("//*[@class]"):
        class_list = node.get("class").split(" ")
        for cls in class_map:
            if cls in class_list:
                utils.remove_class(node, cls)
                utils.append_class(node, class_map[cls])
Exemple #3
0
def map_class_to_style(article):
    class_to_style_map = get_map(_class_to_style_map, article.language)
    if not class_to_style_map:
        return
    for css_class in class_to_style_map:
        style_attr, style_val = class_to_style_map[css_class]
        for node in article.dom.xpath(
                '//*[contains(@class, "{}")]'.format(css_class)):
            utils.add_node_style(node, style_attr, style_val)
            utils.remove_class(node, css_class)
Exemple #4
0
def add_figure_numbers(root):
    classes = [
        "pp_singlecol",
        # 'infobox',  # infoboxes are not referenced despite floating
        "pp_figure",
        "pp_twocol_span",
    ]
    pred = " or ".join('contains(@class, "{}")'.format(cls) for cls in classes)
    total_figures = 0
    for article in root.xpath("//article"):
        figure_num = 0
        for node in article.xpath(".//*[{}]".format(pred)):
            utils.remove_class(node, "infobox")
            figure_num += 1
            total_figures += 1
            cls = [c for c in classes if c in node.get("class")][0]
            nr = ".".join([article.get("pp_article_num"), str(figure_num)])
            caption_txt = "Figure {nr} ".format(nr=nr)
            reference = E.p({"class": "pp_figure_ref"}, u"\u21AA " + caption_txt)
            if cls == "pp_figure":
                caption = node.xpath('.//*[contains(@class, "thumbcaption")]')
                if caption:
                    node.addnext(reference)
                    caption = caption[0]
                    prefix = E.b(caption_txt)
                    caption.insert(0, prefix)
                    prefix.tail = caption.text
                    caption.text = None
                    utils.append_class(caption, "pp_figure_caption")
                    continue
            wrapper = utils.wrap_node(node, "div", {"class": cls})
            caption = E.div({"class": "pp_figure_caption"}, E.b(caption_txt))
            wrapper.append(caption)
            utils.remove_class(node, cls)
            wrapper.addnext(reference)
    _combine_references(root)
Exemple #5
0
def remove_low_ppi(root):
    result = root.xpath('//img[contains(@class, "low-ppi")]')
    for img in result:
        utils.remove_class(img, "low-ppi")