def tudou_download(url, vidfmt): http = HttpUtil() html = http.get(url) charset = http.parse_charset() html = html.decode(charset) iid = r1(r'"k":([^,]*),', html) if not iid: iid = r1(r'iid\s*[:=]\s*(\d+)', html) assert iid title = r1(r"kw\s*[:=]\s*['\"]([^']+)['\"]", html) assert title title = unescape_html(title) return tudou_download_by_iid(iid, title)
def tudou_download_by_iid(iid, title): url = r'http://v2.tudou.com/f?id=' + iid + r'&sid=11000&hd=2&sj=1&areaCode=110000' xml = HttpUtil().get(url) xml = unescape_html(xml) url = BeautifulSoup(xml).find('f').text return [url], title, 'flv', 1, headers