Beispiel #1
0
def limit_widths(root):
    for node in root.iterdescendants():
        css_width = utils.get_node_width(node, "px")
        width, height = node_size(node)
        print node.tag, width, css_width
        if css_width > width:
            utils.change_node_width(node, "{:.2f}pt".format(width))
Beispiel #2
0
def fix_image_tables(root):
    img_tables = root.xpath(
        '//table[contains(@class, "short-table") and not(contains(@class, "infobox")) and .//a[contains(@class, "image")]]'
    )
    for table in img_tables:
        utils.remove_node_styles(table, "margin")
        utils.append_class(table, "image-table")
        max_widths = {}
        for row in table.xpath(".//tr"):
            for n, column in enumerate(row.xpath(".//td")):
                for img in column.xpath(".//img"):
                    width = utils.get_node_width(img, target_unit="px")
                    max_widths[n] = max(width, max_widths.get(n, 0))
        total_width = sum(max_widths.values())
        if total_width * config.px2pt > config.page_width_pt:
            utils.append_class(table, "wide-image-table")
            for row in table.xpath(".//tr"):
                for n, column in enumerate(row.xpath(".//td")):
                    _remove_inner_image_node_width(column, "image")
                    utils.remove_node_styles(column, ["padding-left", "padding", "margin"])
                    utils.add_node_style(
                        column, "width", "{}%".format(max_widths.get(n, 0) / total_width * 100)
                    )
        elif total_width > 0:
            for img in table.xpath(".//img"):
                _resize_image_node_width_to_pt(img)
Beispiel #3
0
def remove_img_style_size(root):
    """
    add class to img container and remove explicit width attributes
    """
    xpath_conditions = [
        'contains(@class,"thumb") ',
        'and not(contains(@class, "tmulti"))',
        'and not(contains(@class, "thumbinner"))',
        'and not(contains(@class, "thumbcaption"))',
        'and not(contains(@class, "thumbimage"))',
    ]
    result = root.xpath("//div[{}]".format(" ".join(xpath_conditions)))
    for img_container in result:
        if "map" in img_container.attrib.get("class", ""):
            continue
        thumbinner = img_container.xpath('.//*[contains(@class,"thumbinner")]')
        for node in thumbinner:
            utils.remove_node_styles(node, ["width", "height", "max-width"])
        if not img_container.xpath(".//img"):
            log.debug("No <img> found in {}".format(etree.tostring(img_container)))
            continue
        img = img_container.xpath(".//img")[0]
        width = utils.get_node_width(img, target_unit="pt")
        utils.remove_node_styles(img, ["width", "height"])
        cols = int(round(width / (column_width_pt * 4)))
        if cols > 3:
            cols = 3
        cols = cols * 4
        utils.append_class(img_container, "col-{}".format(cols))
        utils.remove_node_width(img_container)
        utils.remove_node_width(img)
Beispiel #4
0
def fix_abspos_overlays(root):
    for container in root.xpath(
        ('//*[contains(@style, "position")' ' and contains(@style, "relative")]')
    ):
        w = utils.get_node_width(container, target_unit="px")
        h = utils.get_node_height(container, target_unit="px")
        if not (w and h):
            img = container.xpath(".//img")
            if not img:
                continue
            img = img[0]
            w, h = get_img_size(img)
        for node in container.xpath(
            ('.//*[contains(@style, "position")' ' and contains(@style, "absolute")]')
        ):
            style = utils.get_node_style(node)
            left = style.get("left")
            top = style.get("top")
            for attr in ["left", "top"]:
                val = locals()[attr]
                if not val:
                    continue
                if val.endswith("%"):
                    continue
                elif val.endswith("px"):
                    val = val[:-2]
                elif val.isdigit():
                    pass
                else:
                    continue
                try:
                    new_val = 100 * int(float(val)) / (w if attr == "left" else h)
                except (ValueError, ZeroDivisionError):
                    continue
                utils.add_node_style(node, attr, "{}%".format(new_val))
Beispiel #5
0
def limit_size(root):
    for img in root.xpath('//img[not(contains(@class, "inline"))]'):
        w, h = get_img_size(img)
        if w == 0 or h == 0:
            continue
        if isinstance(h, unicode) or isinstance(h, str):
            continue
        in_table = any(node.tag == "table" for node in img.iterancestors())
        max_height_outside = 7.5 * config.cm2px
        max_height_in_table = 5 * config.cm2px
        max_height = max_height_in_table if in_table else max_height_outside
        max_width = 6.03 * config.cm2px

        # downscale if dimensions too big
        scale_factor = min(min(1, max_height / h), min(1, max_width / w))
        # upscale image too full width, if the image is wider than 70% of max width
        if scale_factor == 1:
            scaled_height = max_width / w * h
            if 0.7 * max_width < w < max_width and scaled_height < max_height:
                scale_factor = max_width / w

        if scale_factor != 1:
            img.set("width", str(w * scale_factor))
            img.set("height", str(h * scale_factor))

            for node in [n for n in img.iterancestors()]:
                w = utils.get_node_width(node, "px")
                h = utils.get_node_height(node, "px")
                if w:
                    utils.change_node_width(node, w * scale_factor, unit="px")
                if h:
                    utils.change_node_height(node, h * scale_factor, unit="px")
Beispiel #6
0
def get_img_size(node):
    """
    get size of a node in px
    """
    try:
        width = utils.get_node_width(node)
        height = utils.get_node_height(node)
        return width, height
    except TypeError:
        return 0, 0
Beispiel #7
0
def _resize_image_node_width_to_pt(node):
    """
    resize images from px to pt: 96px -> 72pt = shrink to 75%
    the scale factor is more or less deliberate but looks decent in sample pages
    """
    if node.tag != "img":
        return
    width = utils.get_node_width(node, target_unit="px")
    utils.remove_node_styles(node, ["width", "height"])
    utils.remove_node_width(node)
    utils.add_node_style(node, "width", "{}px".format(width * config.px2pt))
Beispiel #8
0
def _remove_inner_image_node_width(node, inner_class="thumbinner"):
    """
    remove explicit widths from an image node
    Side effect: removes the node if it doesn't contain an image!
    :param node:
    :param inner_class: "thumbinner" or "thumbimage"
    :return: original width of the image in pt
    """
    utils.remove_node_styles(node, ["width", "height", "max-width"])
    wrapper_nodes = node.xpath('.//*[contains(@class,"{}")]'.format(inner_class))
    for wrapper_node in wrapper_nodes:
        utils.remove_node_styles(wrapper_node, ["width", "height", "max-width"])
    if not node.xpath(".//img"):
        log.debug("No <img> found in {}. Removing node.".format(etree.tostring(node)))
        utils.remove_node(node)
        return 0
    img = node.xpath(".//img")[0]
    width = utils.get_node_width(img, target_unit="pt")
    utils.remove_node_styles(img, ["width", "height"])
    utils.remove_node_width(img)
    return width