def find_fld(soup, r, tipo=None, txt=None): rt = [] for l in soup.findAll('legend'): if r.match(l.get_text()): f = l.parent if tipo == None: rt.append(f) elif tipo == 1 or tipo == 4: if f.div.find("p", text="Verdadero") and f.div.find( "p", text="Falso"): hjs = f.div.select(" > *") if tipo == 4 or (len(hjs) > 2 and util.sclean(hjs[1]) == "Verdadero"): rt.append(f) elif tipo == 2: hjs = f.div.select(" > *") if len(hjs) > 1 and hjs[0].name == "p": rt.append(f) elif tipo == 3: if f.div.p and f.div.p.get_text().strip() == txt: rt.append(f) elif tipo == 5: hjs = f.div.select(" > *") cg = 0 sg = 0 for h in hjs: if h.name != "p": break c = h.get_text().strip() if c.startswith("- "): cg = cg + 1 else: sg = sg + 1 if sg > 1: break if cg > 1 and sg == 1: rt.append(f) return rt
def find_fld(soup,r,tipo=None,txt=None): rt=[] for l in soup.findAll('legend'): if r.match(l.get_text()): f=l.parent if tipo==None: rt.append(f) elif tipo==1 or tipo==4: if f.div.find("p",text="Verdadero") and f.div.find("p",text="Falso"): hjs=f.div.select(" > *") if tipo==4 or (len(hjs)>2 and util.sclean(hjs[1])=="Verdadero"): rt.append(f) elif tipo==2: hjs=f.div.select(" > *") if len(hjs)>1 and hjs[0].name=="p": rt.append(f) elif tipo==3: if f.div.p and f.div.p.get_text().strip()==txt: rt.append(f) elif tipo==5: hjs=f.div.select(" > *") cg=0 sg=0 for h in hjs: if h.name!="p": break c=h.get_text().strip() if c.startswith("- "): cg=cg+1 else: sg=sg+1 if sg>1: break if cg>1 and sg==1: rt.append(f) return rt
fs=find_fld(d, cono, 1) if len(fs)>1: v1=fs[0] del fs[0] for v in fs: dv=v.div v1.append(dv) dv.unwrap() v.extract() fs=find_fld(d, cono, 4) for v1 in fs: uls=[] ul=soup.new_tag("ul") for p in v1.findAll("p"): if p.next_sibling and p.next_sibling.name=="p" and util.sclean(p.next_sibling)=="Verdadero": p.name="li" c=p.contents[0] if isinstance(c, bs4.NavigableString) or isinstance(c, unicode): cs=c.strip() if len(cs)>1 and cs[0].isdigit() and cs[1]==".": c.extract() ul.name="ol" if cs[0]=="1" and len(ul.contents)>0: uls.append(ul) ul=soup.new_tag("ul") ul.append(p) elif util.sclean(p)=="Verdadero" or util.sclean(p)=="Falso": p.extract() p=soup.new_tag("p") p.string=preguntaVF
ttxt = soup.select("div.ttxt") for t in ttxt: t.replaceWithChildren() comments = soup.findAll(text=lambda text: isinstance(text, bs4.Comment)) for n in comments: n.extract() tags = util.vacio(soup, ['table', 'p', 'div', 'ul', 'ol', 'li']) for t in tags: t.extract() spans = soup.select("span") for s in spans: txt = util.sclean(s) if len(txt) == 0 or txt == ":": s.unwrap() elif 'style' not in s.attrs: s.unwrap() elif "rgb(0, 150, 200)" in s.attrs[ 'style'] and s.parent.name != "a" and not ( len(s.select(" > *")) == 1 and s.select(" > *")[0].name == "a"): s.attrs['class'] = "enlace" elif util.has(s.attrs['style'], [ "rgb(0, 0, 255)", "rgb(0, 0, 205)", "color:#0000CD", "rgb(41, 1, 208)", "color:#0000FF" ]): s.attrs['class'] = "comando" elif util.has(s.attrs['style'],