if not h.a and c in (2,3,4): util.h_to_a(out,h,ct) ct=ct+1 for ol in out.findAll("ol"): if ol.parent and ol.parent.name=="ol": ol.unwrap() for p in out.findAll("pre"): for s in p.findAll("strong"): if "style" in s.attrs and "class" not in s.attrs: del s.attrs["style"] s.attrs["class"]="resaltar" for i in out.findAll("iframe"): if "src" in i.attrs and i.attrs["src"].startswith("//"): i.attrs["src"]="http:"+i.attrs["src"] util.set_menu(out) html = util.get_html(out,True) html=html.replace(u"–","-") html=html.replace(u"—","-") r=re.compile("([rwx])=2([210])") html=r.sub("\\1=2<sup>\\2</sup>",html) r=re.compile("</p>\s*<li>") html=r.sub("</p><ul><li>",html) r=re.compile("</li>\s*<p>") html=r.sub("</li></ul><p>",html) util.escribir(html,oht) #out.prettify("utf-8",formatter="html") #with open(oht, "wb") as file: # file.write(html)#.encode('utf8'))
for f in soup.findAll(text=re.compile(u"(Estado de todos los servicios en el sistema|Archivos de configuración de upstart|Estado de un servicio en sistemas Red Hat)")): texts=f.find_parent("fieldset").div.find_all(text=True) for t in texts: b=sp.sub(" ",t.string) t.replace_with(b) for f in soup.findAll("a",text=re.compile(u".*(Eventos de Linux Foundation).*")): texts=f.find_parent("fieldset").div.ul.find_all("strong") for t in texts: t.unwrap() e=soup.find("span", attrs={'class': "enlace"}) if e and e.parent.name=="li": e.parent.extract() util.set_menu(soup) h=util.get_html(soup) h=h.replace("Objectivos de aprendizaje","Objetivos de aprendizaje") #7 11 h=h.replace(">31</a></h1>",">31. zypper</a></h1>") #31 h=h.replace(">31</option>",">31. zypper</option>") #31 h=h.replace(" del sisco "," del disco ") h=h.replace("miltihebra","multihebra") h=h.replace("el ajusta de E/S","el ajuste de E/S") h=h.replace(". Se este",". Si este") h=h.replace(" tital "," total ") h=h.replace(" para para "," para ") h=h.replace("revision_umber","revision_number") h=h.replace("cuentasde","cuentas de")