def get_vod(self): global QULIST global RUN_COUNT vinfo = {} while not QULIST.empty(): vinfo = QULIST.get() video_url = av911_root + vinfo['yuan_url'] video_html = download_html(video_url) if video_html: soup = BeautifulSoup(video_html, 'lxml') video_script = soup.find('div', id="playview") video_id = re.findall(r'mac_url=unescape\(\'(.*?)\'\)', str(video_script), re.S) if video_id: video_url = av911_video_api + video_id[0] # vinfo['play_url'] = location(video_url) vinfo['play_url'] = video_url vinfo['en_title'] = en(vinfo['title']) vinfo['vkey'] = ikey() vinfo['description'] = "暂无简介" vinfo['vod_long'] = '' vinfo['play_type'] = online_type[1] vinfo['inputtime'] = getTime() # 插入到数据库 sql = get_sql(vinfo, vod_type[0]) insert(sql) RUN_COUNT += 1 time.sleep(1) QULIST.task_done()
def get_vodbt_2(self): global QULIST global RUN_COUNT vinfo = {} while not QULIST.empty(): vinfo = QULIST.get() vodbt_url = sex8_root + '/' + vinfo['yuan_url'] vodbt_html = download_html(vodbt_url, result_type='sex8') if vodbt_html: soup = BeautifulSoup(vodbt_html, 'lxml') vinfo['title'] = soup.find('span', id="thread_subject").get_text() vodbt_html = str( soup.find('td', attrs={'id': re.compile('postmessage_[0-9]*')})) description = re.findall(r'【影片(.*?)</div>', vodbt_html, re.S) if description: vinfo['description'] = html("【影片{0}".format( description[0])) else: vinfo['description'] = '暂无简介' #预览图片 vinfo['images'] = '' images_arr = soup.find_all('img', attrs={ 'class': 'zoom', 'lazyloadthumb': '1' })[0:-2] if images_arr: if len(images_arr) > 0: try: for img in images_arr: vinfo['images'] += img['file'] + '$' # bt 种子 vinfo['bt_url'] = soup.find( 'span', attrs={'id': re.compile('attach_[0-9]*')}) if vinfo['bt_url']: vinfo['bt_url'] = vinfo['bt_url'].a['href'] vinfo['vkey'] = ikey() vinfo['en_title'] = en(vinfo['title']) vinfo['inputtime'] = getTime() # 插入到数据库 sql = get_sql(vinfo, vod_type[1]) insert(sql) RUN_COUNT += 1 except KeyError as e: pass time.sleep(1) QULIST.task_done()
def get_vodbt(self): global QULIST global RUN_COUNT vinfo = {} while not QULIST.empty(): vinfo = QULIST.get() vodbt_url = taohuazu_root + '/' + vinfo['yuan_url'] vodbt_html = download_html(vodbt_url) if vodbt_html: soup = BeautifulSoup(vodbt_html, 'lxml') vinfo['title'] = soup.find('span', id="thread_subject").get_text() vodbt_html = str(soup.find('div', class_="t_fsz")) soup = BeautifulSoup(vodbt_html, 'lxml') vinfo['vkey'] = ikey() vinfo['en_title'] = en(vinfo['title']) description = re.findall( r'<td class="t_f" id="postmessage_[0-9]*">(.*?)<ignore_js_op>', vodbt_html, re.S) if description: vinfo['description'] = html(strip(description[0])) else: vinfo['description'] = '暂无简介' # 预览图片 vinfo['images'] = '' images_arr = soup.find_all('img', attrs={'class': 'zoom'}) for img in images_arr: vinfo['images'] += img['file'] + '$' # bt 种子链接 bt_html = soup.find_all("dl", class_="tattl") vinfo['bt_url'] = '' for item_bt in bt_html: soup = BeautifulSoup(str(item_bt), "lxml") aid = str(soup.find("a")['href']).split('?')[1] vinfo['bt_url'] += taohuazu_bt_api + aid + "$" vinfo['inputtime'] = getTime() # 插入到数据库 sql = get_sql(vinfo, vod_type[1]) insert(sql) RUN_COUNT += 1 time.sleep(1) QULIST.task_done()
def pron91_do(self): global QULIST global RUN_COUNT while not QULIST.empty(): vinfo = QULIST.get() play_html = download_html(vinfo['yuan_url']) if play_html != '' and len(play_html) > 0: v_url = re.findall(r'<source src="(.*?)" type=\'video\/mp4\'>', play_html, re.S) if v_url: vinfo['play_url'] = v_url[0] vinfo['description'] = "暂无简介" # 插入数据 sql = get_sql(vinfo, vod_type[0]) insert(sql=sql) RUN_COUNT += 1 time.sleep(1) QULIST.task_done()
def get_vod_av(self): global QULIST global RUN_COUNT while not QULIST.empty(): vinfo = {} vod_url = QULIST.get() url = taoyin_root + '/' + vod_url # 解析单个视频 html = download_html(url=url) if html: soup = BeautifulSoup(html, 'lxml') vid_html1 = soup.find_all('div', id="tylongvideo") vid = 0 if vid_html1: vid = vid_html1[0]['datav'] else: vid_html2 = soup.find_all('div', id="tylongvideo") if vid_html2: vid = vid_html2[0]['datav'] title_html = soup.find_all(id="thread_subject") if title_html and vid != 0: vinfo['vkey'] = ikey() vinfo['title'] = title_html[0].get_text() vinfo['en_title'] = en(vinfo['title']) vinfo['description'] = "暂无简介" vinfo['images'] = taoyin_image_av(vid) vinfo['vod_long'] = "" vinfo['yuan_url'] = vod_url vinfo['play_url'] = taoyin_video_av(vid) vinfo['play_type'] = online_type[1] vinfo['movie_type'] = movie_type[3] vinfo['inputtime'] = getTime() # 插入数据 sql = get_sql(vinfo, vod_type[0]) insert(sql=sql) RUN_COUNT += 1 time.sleep(1) QULIST.task_done()
def get_vod(self): global QULIST global RUN_COUNT while not QULIST.empty(): vinfo = QULIST.get() vod_url = papax_root + vinfo['yuan_url'] html = download_html(vod_url) if html: soup = BeautifulSoup(html, 'lxml') vod_iframe = soup.find('iframe') if vod_iframe: play_iframe_url = papax_root + vod_iframe['src'] play_iframe_html = download_html(play_iframe_url) if play_iframe_html: soup = BeautifulSoup(play_iframe_html, 'lxml') video_url = soup.find('video') if video_url: video_url = video_url['src'] video_url = papax_root + video_url.replace( '../../..', '') vinfo['vkey'] = ikey() vinfo['play_url'] = video_url vinfo['play_type'] = online_type[1] vinfo['en_title'] = en(vinfo['title']) vinfo['description'] = "暂无简介" vinfo['vod_long'] = '' vinfo['inputtime'] = getTime() sql = get_sql(vinfo, vod_type[0]) # 插入到数据库 insert(sql=sql) RUN_COUNT += 1 time.sleep(1) QULIST.task_done()