예제 #1
0
    def get_vod(self):
        global QULIST
        global RUN_COUNT
        vinfo = {}
        while not QULIST.empty():
            vinfo = QULIST.get()
            video_url = av911_root + vinfo['yuan_url']
            video_html = download_html(video_url)
            if video_html:
                soup = BeautifulSoup(video_html, 'lxml')
                video_script = soup.find('div', id="playview")
                video_id = re.findall(r'mac_url=unescape\(\'(.*?)\'\)',
                                      str(video_script), re.S)
                if video_id:
                    video_url = av911_video_api + video_id[0]
                    # vinfo['play_url'] = location(video_url)
                    vinfo['play_url'] = video_url
                    vinfo['en_title'] = en(vinfo['title'])
                    vinfo['vkey'] = ikey()
                    vinfo['description'] = "暂无简介"
                    vinfo['vod_long'] = ''
                    vinfo['play_type'] = online_type[1]
                    vinfo['inputtime'] = getTime()

                    # 插入到数据库
                    sql = get_sql(vinfo, vod_type[0])
                    insert(sql)
                    RUN_COUNT += 1

            time.sleep(1)
            QULIST.task_done()
예제 #2
0
    def get_vodbt_2(self):
        global QULIST
        global RUN_COUNT
        vinfo = {}
        while not QULIST.empty():
            vinfo = QULIST.get()
            vodbt_url = sex8_root + '/' + vinfo['yuan_url']
            vodbt_html = download_html(vodbt_url, result_type='sex8')
            if vodbt_html:
                soup = BeautifulSoup(vodbt_html, 'lxml')
                vinfo['title'] = soup.find('span',
                                           id="thread_subject").get_text()
                vodbt_html = str(
                    soup.find('td',
                              attrs={'id': re.compile('postmessage_[0-9]*')}))
                description = re.findall(r'【影片(.*?)</div>', vodbt_html, re.S)
                if description:
                    vinfo['description'] = html("【影片{0}".format(
                        description[0]))
                else:
                    vinfo['description'] = '暂无简介'

                #预览图片
                vinfo['images'] = ''
                images_arr = soup.find_all('img',
                                           attrs={
                                               'class': 'zoom',
                                               'lazyloadthumb': '1'
                                           })[0:-2]
                if images_arr:
                    if len(images_arr) > 0:
                        try:
                            for img in images_arr:
                                vinfo['images'] += img['file'] + '$'

                            # bt 种子
                            vinfo['bt_url'] = soup.find(
                                'span',
                                attrs={'id': re.compile('attach_[0-9]*')})
                            if vinfo['bt_url']:
                                vinfo['bt_url'] = vinfo['bt_url'].a['href']
                                vinfo['vkey'] = ikey()
                                vinfo['en_title'] = en(vinfo['title'])
                                vinfo['inputtime'] = getTime()

                                # 插入到数据库
                                sql = get_sql(vinfo, vod_type[1])
                                insert(sql)
                                RUN_COUNT += 1

                        except KeyError as e:
                            pass

            time.sleep(1)
            QULIST.task_done()
예제 #3
0
    def get_vodbt(self):
        global QULIST
        global RUN_COUNT
        vinfo = {}
        while not QULIST.empty():
            vinfo = QULIST.get()
            vodbt_url = taohuazu_root + '/' + vinfo['yuan_url']
            vodbt_html = download_html(vodbt_url)
            if vodbt_html:
                soup = BeautifulSoup(vodbt_html, 'lxml')
                vinfo['title'] = soup.find('span',
                                           id="thread_subject").get_text()
                vodbt_html = str(soup.find('div', class_="t_fsz"))
                soup = BeautifulSoup(vodbt_html, 'lxml')
                vinfo['vkey'] = ikey()
                vinfo['en_title'] = en(vinfo['title'])
                description = re.findall(
                    r'<td class="t_f" id="postmessage_[0-9]*">(.*?)<ignore_js_op>',
                    vodbt_html, re.S)
                if description:
                    vinfo['description'] = html(strip(description[0]))
                else:
                    vinfo['description'] = '暂无简介'
                # 预览图片
                vinfo['images'] = ''
                images_arr = soup.find_all('img', attrs={'class': 'zoom'})
                for img in images_arr:
                    vinfo['images'] += img['file'] + '$'

                # bt 种子链接
                bt_html = soup.find_all("dl", class_="tattl")
                vinfo['bt_url'] = ''
                for item_bt in bt_html:
                    soup = BeautifulSoup(str(item_bt), "lxml")
                    aid = str(soup.find("a")['href']).split('?')[1]
                    vinfo['bt_url'] += taohuazu_bt_api + aid + "$"

                vinfo['inputtime'] = getTime()
                # 插入到数据库
                sql = get_sql(vinfo, vod_type[1])
                insert(sql)
                RUN_COUNT += 1

            time.sleep(1)
            QULIST.task_done()
예제 #4
0
    def pron91_do(self):
        global QULIST
        global RUN_COUNT
        while not QULIST.empty():
            vinfo = QULIST.get()
            play_html = download_html(vinfo['yuan_url'])
            if play_html != '' and len(play_html) > 0:
                v_url = re.findall(r'<source src="(.*?)" type=\'video\/mp4\'>',
                                   play_html, re.S)
                if v_url:
                    vinfo['play_url'] = v_url[0]
                    vinfo['description'] = "暂无简介"
                    # 插入数据
                    sql = get_sql(vinfo, vod_type[0])
                    insert(sql=sql)
                    RUN_COUNT += 1

            time.sleep(1)
            QULIST.task_done()
예제 #5
0
    def get_vod_av(self):
        global QULIST
        global RUN_COUNT
        while not QULIST.empty():
            vinfo = {}
            vod_url = QULIST.get()
            url = taoyin_root + '/' + vod_url
            # 解析单个视频
            html = download_html(url=url)
            if html:
                soup = BeautifulSoup(html, 'lxml')
                vid_html1 = soup.find_all('div', id="tylongvideo")
                vid = 0
                if vid_html1:
                    vid = vid_html1[0]['datav']
                else:
                    vid_html2 = soup.find_all('div', id="tylongvideo")
                    if vid_html2:
                        vid = vid_html2[0]['datav']
                title_html = soup.find_all(id="thread_subject")
                if title_html and vid != 0:
                    vinfo['vkey'] = ikey()
                    vinfo['title'] = title_html[0].get_text()
                    vinfo['en_title'] = en(vinfo['title'])
                    vinfo['description'] = "暂无简介"
                    vinfo['images'] = taoyin_image_av(vid)
                    vinfo['vod_long'] = ""
                    vinfo['yuan_url'] = vod_url
                    vinfo['play_url'] = taoyin_video_av(vid)
                    vinfo['play_type'] = online_type[1]
                    vinfo['movie_type'] = movie_type[3]
                    vinfo['inputtime'] = getTime()

                    # 插入数据
                    sql = get_sql(vinfo, vod_type[0])
                    insert(sql=sql)
                    RUN_COUNT += 1

            time.sleep(1)
            QULIST.task_done()
예제 #6
0
    def get_vod(self):
        global QULIST
        global RUN_COUNT
        while not QULIST.empty():
            vinfo = QULIST.get()
            vod_url = papax_root + vinfo['yuan_url']
            html = download_html(vod_url)
            if html:
                soup = BeautifulSoup(html, 'lxml')
                vod_iframe = soup.find('iframe')
                if vod_iframe:
                    play_iframe_url = papax_root + vod_iframe['src']
                    play_iframe_html = download_html(play_iframe_url)
                    if play_iframe_html:
                        soup = BeautifulSoup(play_iframe_html, 'lxml')
                        video_url = soup.find('video')
                        if video_url:
                            video_url = video_url['src']
                            video_url = papax_root + video_url.replace(
                                '../../..', '')

                            vinfo['vkey'] = ikey()
                            vinfo['play_url'] = video_url
                            vinfo['play_type'] = online_type[1]
                            vinfo['en_title'] = en(vinfo['title'])
                            vinfo['description'] = "暂无简介"
                            vinfo['vod_long'] = ''
                            vinfo['inputtime'] = getTime()

                            sql = get_sql(vinfo, vod_type[0])
                            # 插入到数据库
                            insert(sql=sql)
                            RUN_COUNT += 1

            time.sleep(1)
            QULIST.task_done()