def query_info1(self, url): # url = 'http://www.dayi.ca/ys/?p=2386&page=52' hutf = self.get_hutf(url) # echo(hutf) ct = SelStr("div#content-outer div#content", hutf)[0] title = ct.select('h3')[0].text p = ct.select('p')[0] title = title + '_' + p.text.split()[0] echo(title) #echo(p.text) u = match1(p.text, 'video:(\S+)') #u = u.strip('"').strip("'") if u[0] in ("'", '"'): u = u.split(u[0])[1] echo(u) #us = self.try_m3u8(u) #return title, None, us, None return title, "m3u8", u, None
def download_one(self, url): #url = "https://shimo.im/docs/gJQufddR72AZJcna/read" hutf = self.get_hutf(url) #echo(hutf) #return #hutf = open("s.html").read() d = SelStr("div#editor", hutf)[0] t = d.select("div.ql-title div.ql-title-box")[0] #title = "_".join(t["data-value"].split('|')) + ".txt" title = t["data-value"] + ".txt" t = d.select("div.ql-editor")[0] for p in t.select("p"): #p.raw_text += "\n" if p.children and isinstance(p.children[-1], DataNode): p.children[-1].append("\n") else: p.children.append(DataNode(p, "\n")) #print t.text fout = open(title, "w") fout.write(t.text) fout.close()