def fix_election_charts(root): for node in root.xpath( '//div[@class="float-right"]/div[contains(@style, "relative;")]/div' ): styles = utils.get_node_style(node) if "width" not in styles: utils.add_node_style(node, "width", "100%")
def fix_image_tables(root): img_tables = root.xpath( '//table[contains(@class, "short-table") and not(contains(@class, "infobox")) and .//a[contains(@class, "image")]]' ) for table in img_tables: utils.remove_node_styles(table, "margin") utils.append_class(table, "image-table") max_widths = {} for row in table.xpath(".//tr"): for n, column in enumerate(row.xpath(".//td")): for img in column.xpath(".//img"): width = utils.get_node_width(img, target_unit="px") max_widths[n] = max(width, max_widths.get(n, 0)) total_width = sum(max_widths.values()) if total_width * config.px2pt > config.page_width_pt: utils.append_class(table, "wide-image-table") for row in table.xpath(".//tr"): for n, column in enumerate(row.xpath(".//td")): _remove_inner_image_node_width(column, "image") utils.remove_node_styles(column, ["padding-left", "padding", "margin"]) utils.add_node_style( column, "width", "{}%".format(max_widths.get(n, 0) / total_width * 100) ) elif total_width > 0: for img in table.xpath(".//img"): _resize_image_node_width_to_pt(img)
def fix_abspos_overlays(root): for container in root.xpath( ('//*[contains(@style, "position")' ' and contains(@style, "relative")]') ): w = utils.get_node_width(container, target_unit="px") h = utils.get_node_height(container, target_unit="px") if not (w and h): img = container.xpath(".//img") if not img: continue img = img[0] w, h = get_img_size(img) for node in container.xpath( ('.//*[contains(@style, "position")' ' and contains(@style, "absolute")]') ): style = utils.get_node_style(node) left = style.get("left") top = style.get("top") for attr in ["left", "top"]: val = locals()[attr] if not val: continue if val.endswith("%"): continue elif val.endswith("px"): val = val[:-2] elif val.isdigit(): pass else: continue try: new_val = 100 * int(float(val)) / (w if attr == "left" else h) except (ValueError, ZeroDivisionError): continue utils.add_node_style(node, attr, "{}%".format(new_val))
def set_figure_div_size(root): for figure_div in root.xpath("//div"): classes = figure_div.get("class", "").split(" ") if "thumb" not in classes: continue img_width = figure_div.xpath(".//img/@width") if img_width: utils.add_node_style(figure_div, "width", img_width[0] + "px")
def handle_span_all(node, width, height, two_col_max_size, debug): """ limit node width to max ? not sure about this ? """ if width > two_col_max_size: if debug: utils.add_node_style(node, "background-color", "red") utils.append_class(node, "pp_singlecol")
def map_class_to_style(article): class_to_style_map = get_map(_class_to_style_map, article.language) if not class_to_style_map: return for css_class in class_to_style_map: style_attr, style_val = class_to_style_map[css_class] for node in article.dom.xpath( '//*[contains(@class, "{}")]'.format(css_class)): utils.add_node_style(node, style_attr, style_val) utils.remove_class(node, css_class)
def resize_overwide_tables(root): """ scale node to regular width (using CSS transform) """ for node in root.xpath('//table[contains(@class, "over-wide")]'): width = float(node.attrib.get("box_width")) wrapper = node.getparent() utils.add_node_style(wrapper, "transform-origin", "0 0") utils.add_node_style( wrapper, "transform", "scale({:.2f}) ".format(config.columns["col-12"] / width))
def _resize_image_node_width_to_pt(node): """ resize images from px to pt: 96px -> 72pt = shrink to 75% the scale factor is more or less deliberate but looks decent in sample pages """ if node.tag != "img": return width = utils.get_node_width(node, target_unit="px") utils.remove_node_styles(node, ["width", "height"]) utils.remove_node_width(node) utils.add_node_style(node, "width", "{}px".format(width * config.px2pt))
def fix_galleries(root): for gallery in root.xpath('.//ul[contains(@class, "gallery")]'): for leaf in gallery.xpath(".//*"): utils.remove_node_width(leaf) utils.remove_node_height(leaf) utils.remove_node_styles(leaf, "margin") for leaf in gallery.xpath('.//li[contains(@class, "gallerybox")]'): utils.append_class(leaf, "col-4") img = leaf[0][0][0][0][0] utils.append_class(img, "thumbimage") url = img.attrib.get("src") utils.add_node_style(leaf[0][0][0], "background-image", "url({})".format(url))
def handle_two_col(node, width, height, reg_width, ext_width, debug): """ span node across two columns (to extended width) - if it is wider than the regular width """ if reg_width < width <= ext_width: if height > config.max_two_col_float_height: if debug: utils.add_node_style(node, "background-color", "orange") utils.append_class(node, "pp_singlecol") else: utils.append_class(node, "pp_twocol_span") if debug: utils.add_node_style(node, "background-color", "yellow")
def fix_img_style_size_tmulti(root): """ replace explicit width attributes with col-* classes and percentages """ xpath_conditions = [ 'contains(@class,"thumb") ', 'and contains(@class, "tmulti")', 'and not(contains(@class, "thumbinner"))', 'and not(contains(@class, "thumbcaption"))', 'and not(contains(@class, "thumbimage"))', ] result = root.xpath("//div[{}]".format(" ".join(xpath_conditions))) for img_container in result: thumbinner = img_container.xpath('.//*[contains(@class, "thumbinner")]')[0] total_width = utils.get_node_size(thumbinner, attr="max-width", target_unit="pt") utils.remove_node_styles(thumbinner, "max-width") resize_node_width_to_columns(img_container, total_width) for tsingle in thumbinner.xpath('.//*[contains(@class, "tsingle")]'): width = _remove_inner_image_node_width(tsingle, inner_class="thumbimage") single_width = width / total_width * 100 utils.add_node_style(tsingle, "width", "{}%".format(single_width))
def handle_reg_shrink(node, width, reg_width, debug): """ scale node to regular width (using CSS transform) """ if width <= reg_width: if debug: utils.add_node_style(node, "background", "lightblue") utils.add_node_style(node, "transform-origin", "0px 0px;") utils.add_node_style(node, "transform", "scalex({:.2f})".format(config.reg_width / width))