def parse_year(x): dom=fromHTML(x.data) if len(dom) == 0: return for uri in [x.get("href") for x in dom.xpath("//table/tr/td/a")]: fetch.addUrl('http://logs.zub.cc'+uri) pass
def parse(x): if "post" in x.save: header=dict([xx.strip().split(': ') for xx in filter(lambda x: x.find(":") > 0,x.head.getvalue().split("\n"))]) header=dict(map(lambda h:[h.lower(),header[h]],header)) if "location" in header and header["location"].find("comment"): print "забеись",x.save["original"],x.save["comment"] else: print "плохо",x.save["url"],header,x.data else: if len(x.data) == 0: return dom=fromHTML(x.data) if len(dom) == 0: return form=filter(lambda x: x.get("action") and x.get("action").find("wp-comments-post.php"),dom.xpath("//form[@method='post']"))[0].forms[0] fields = form.form_values() commentName=False for field in fields: if field[0] in ('author','login','name'): form.fields[field[0]]=genName() elif field[0] in ('email','e-mail','mail'): form.fields[field[0]]=genEmail() #elif field[0] in ('site','url'): # form.fields[field[0]]=genURL() elif field[0] in ('comment','text','body'): commentName=field[0] submit= filter(lambda x: x.get('type') == "submit",form.inputs)[0] for comment in comments: form.fields[commentName]=comment fetch.addUrl({"url":getRealAction(x.url,form.action),"post":urlencode(form.form_values()+[(submit.name,submit.value)]),"original":x.url,"comment":comment})
def parse_page(x): dom=fromHTML(x.data.decode('cp1251')) if len(dom) == 0: return for msg in dom.xpath("/html/body/font")[0].text_content().split("\n"): if msg.find("<misskitten>") > 0: save("".join(msg.split("> ")[1:])) for uri in [el.get("href") for el in dom.xpath("/html/body/a")]: if uri.startswith("/viewlog/") and int(x.url.split("=")[-1])+1 == int(str(uri).split("=")[-1]): fetch.addUrl('http://logs.zub.cc'+uri) pass