Python SelStr Examples, mybs.SelStr Python Examples

Example #1

0

Show file

File: shuquge.py Project: pastebt/dwm

    def get_playlist(self, url):
        #url = "https://www.shuquge.com/txt/12236/index.html"
        base = os.path.dirname(url)
        hutf = self.get_hutf(url)
        #hutf = open("s.html").read().decode('utf8')
        #echo(hutf)
        tt = SelStr("div.book div.info h2", hutf)
        if not tt:
            return []
        #echo(tt[0].text)
        title = tt[0].text
        echo(title)
        ul = SelStr("div.listmain dl", hutf)
        if not ul:
            return []
        #for u in ul.descendants:
        sel = True
        lst = []
        for u in ul[0].children:
            #echo(u)
            if u.tag == 'dt':
                sel =  u"最新章节" not in u.text
                continue

            if sel and u.tag == 'dd':
                l = os.path.join(base, u.select("a")[0]['href'])
                echo(l, u.text)
                lst.append((u.text, l))
        return lst

Example #2

0

Show file

    def query_info(self, url):
        hutf = self.get_hutf(url)
        debug(hutf)
        title = SelStr('title', hutf)[0].text
        k = None
        if title.endswith('.mp4'):
            title, k = title[:-4], 'mp4'

        #url = "https://www.rapidvideo.com/embed/FUZ35WDLM7"
        # https://www3731.playercdn.net/187/0/G4i-UJ6bQxIZI6FWc_F5dg/1536365722/180905/692FUZ37O792IXDCUZDFX.mp4
        v = SelStr("video#videojs source", hutf)
        if v:
            u = v[0]["src"]
            return title, k, [u], None

        #url = 'https://www.rapidvideo.com/embed/ZsNSciBj'
        # https://admkis.playercdn.net/85/1/sQ52oTwwZ6vCo3Vk7-RS2g/1482741547/161202/063k10VmKldzoX8.mp4
        hutf = self.get_hutf(url, postdata='block=1')
        data = match1(hutf, 'jwplayer\("home_video"\)\.setup\(([^\(\)]+)\);')
        debug(data)
        data = match1(data, '"sources":\s*(\[[^\[\]]+\])')
        ml, u = 0, ''
        for src in json.loads(data):
            l = src['label']
            if l not in self.labels:
                echo("new label", l)
            i = self.labels.index(l)
            if i > ml:
                ml, u = i, src['file']
        debug(title, u)
        return title, k, [u], None

Example #3

0

Show file

    def test(self, args):
        import time
        from urllib2 import urlopen
        from urllib import urlencode

        #url = "http://qdrama.org/k2/"
        hutf = self.get_hutf(args.url)
        #echo(hutf)
        title = SelStr("div.title.sizing h1", hutf)[0].text
        #echo("title =", title)
        nodes = SelStr("div#playsource a", hutf)
        cnt = 0
        for node in nodes:
            cnt += 1
            t = "%s_%02d" % (title.encode('utf8'), cnt)
            u = node['href']
            if 'daily' not in u:
                continue
            echo(t, u)
            if cnt < 0:
                continue
            data = urlencode({"aviurl": u,
                              "avitil": t,
                              "destdn": "../dwm/xman/",
                              "sub": "Start"})
            urlopen("http://127.0.0.1:8080/", data).read()
            time.sleep(2)

Example #4

0

Show file

 def handle_sp_list(self, url):
     # serial play list
     urls = []
     # http://www.bilibili.com/sp/维京传奇
     # base.special.js line 25, loadBgmPage
     # http://www.bilibili.com/sppage/bangumi-21542-913-1.html
     # first find 21542
     hutf = self.get_hutf(url)
     #echo(hutf)
     spid = search_first(hutf, 'var spid = "(\d+)";').group(1)
     echo("spid=", spid)
     for li in SelStr('ul#season_selector li', hutf):
         data = self.get_hutf("http://www.bilibili.com/sppage/bangumi-%s-%s-1.html" % (
                              spid, li['season_id']))
         for n in SelStr('div.season_list li a.t', data):
             urls.append((n['title'].strip(),
                         'http://www.bilibili.com' + n['href']))
     args = copy(self.parsed_args)
     sk = args.playlist_skip
     args.playlist_skip = -1315
     tp = args.playlist_top
     args.playlist_top = 0
     cnt = 0
     for t, u in urls:
         cnt = cnt + 1
         if cnt > tp > 0:
             break
         if cnt < sk:
             continue
         echo(t, u)
         b = BILIBILI()
         b.title = t
         args.url = u
         run(b, args)
     sys.exit(1)

Example #5

0

Show file

File: rapidvideo.py Project: pastebt/dwm

    def query_info(self, url):
        hutf = self.get_hutf(url)
        debug(hutf)
        title = SelStr('title', hutf)[0].text
        k = None
        if title.endswith('.mp4'):
            title, k = title[:-4], 'mp4'

        #url = "https://www.rapidvideo.com/embed/FUZ35WDLM7"
        # https://www3731.playercdn.net/187/0/G4i-UJ6bQxIZI6FWc_F5dg/1536365722/180905/692FUZ37O792IXDCUZDFX.mp4
        v = SelStr("video#videojs source", hutf)
        if v:
            u = v[0]["src"]
            return title, k, [u], None

        #url = 'https://www.rapidvideo.com/embed/ZsNSciBj'
        # https://admkis.playercdn.net/85/1/sQ52oTwwZ6vCo3Vk7-RS2g/1482741547/161202/063k10VmKldzoX8.mp4
        hutf = self.get_hutf(url, postdata='block=1')
        data = match1(hutf, 'jwplayer\("home_video"\)\.setup\(([^\(\)]+)\);')
        debug(data)
        data = match1(data, '"sources":\s*(\[[^\[\]]+\])')
        ml, u = 0, ''
        for src in json.loads(data):
            l = src['label']
            if l not in self.labels:
                echo("new label", l)
            i = self.labels.index(l)
            if i > ml:
                ml, u = i, src['file']
        debug(title, u)
        return title, k, [u], None

Example #6

0

Show file

 def get_playlist(self, url):
     hutf = self.get_hutf(url)
     t = SelStr("h2.title a", hutf)[0]
     t = t.text.strip()
     ns = SelStr('div#playlist1 a', hutf)
     return [(t + "_" + a.text.strip(), "https://www.duboku.co" + a['href'])
             for a in ns]

Example #7

0

Show file

File: tv8.py Project: pastebt/dwm

    def get_playlist(self, url):
        if 'dayi.ca/' in url:
            return []
        hutf = self.get_hutf(url)
        #m = re.search(U("通用版.+第(\d+)集"), p[3].text)
        #if m:
        #    max_id = int(m.group(1))
        #else:
        #    m = re.search(U("首播:.+共(\d+)集"), p[0].text) #, flags=re.M+re.U)
        #    max_id = int(m.group(1))
        t = SelStr("h1.entry-title", hutf)[0]
        m = re.search(U("(.+) .+第(\d+)集"), t.text)
        if m:
            title, max_id = m.group(1).strip(), int(m.group(2))
        else:
            m = re.search(u"(.+) (\d+)集全", t.text)
            if m:
                title, max_id = m.group(1).strip(), int(m.group(2))

        #ps = SelStr("div.entry-content p", hutf)
        #for a in p[1].select("a"):
        al = SelStr("div.entry-content p a", hutf)
        for a in al:
            uo = urlparse.urlparse(a['href'])
            qs = urlparse.parse_qs(uo.query)
            if 'p' in qs and 'page' in qs:
                pn = int(qs['p'][0])
                break
        us = [(U("%s_第%02d集") % (title, i),
               "http://www.dayi.ca/ys/?p=%d&page=%d" % (pn, i))
              for i in range(1, max_id + 1)]
        debug(us)
        return us

Example #8

0

Show file

File: tv8.py Project: pastebt/dwm

 def test(self, argv):
     # try_m3u8
     # echo(self.get_playlist('http://tv8.fun/20170328-人民的名义/'))
     # 'http://www.dayi.ca/ys/?p=3004&page=2'
     #url = 'http://www.dayi.ca/ys/?p=2386&page=52'
     #url = 'http://www.dayi.ca/ys/?p=3004&page=1'
     #url = 'http://www.dayi.ca/ys/?p=4076&&page=1'
     url = 'http://tv8.fun/%e4%b8%8a%e9%98%b3%e8%b5%8b/'  # 上阳赋
     url = 'http://tv8.fun/%e8%a5%bf%e4%ba%ac%e6%95%85%e4%ba%8b/'  # 西京故事
     hutf = self.get_hutf(url)
     echo(hutf)
     return
     t = SelStr("h1.entry-title", hutf)[0]
     m = re.search(u"(.+) 至第(\d+)集", t.text)
     echo(m.group(1), m.group(2))
     p = SelStr("div.entry-content p", hutf)
     echo(p[3].text)
     m = re.search(u"通用版.+第(\d+)集", p[3].text)
     echo(m.group(1))
     m = re.search(U("首播:.+共(\d+)集"), p[0].text)  #, flags=re.M+re.U)
     echo(m.group(1))
     for a in p[1].select("a"):
         if 'page=' not in a['href']:
             continue
         uo = urlparse.urlparse(a['href'])
         qs = urlparse.parse_qs(uo.query)
         echo(qs)
         break

Example #9

0

Show file

 def get_playlist(self, url):
     hutf = self.get_hutf(url)
     t = SelStr("div.video_title h2.title", hutf)
     title = "Unknown"
     if t:
         title = t[0].text
     ts = SelStr("div#playlistbox ul.content_playlist li a", hutf)
     return [(u"%s_%s" % (title, t.text),
              "https://www.olevod.com" + t['href']) for t in ts]

Example #10

0

Show file

File: shuquge.py Project: pastebt/dwm

 def query_info(self, url):
     #url = "https://www.shuquge.com/txt/12236/46252712.html"
     hutf = self.get_hutf(url)
     #echo(hutf)
     cts = SelStr("div.showtxt", hutf)
     #echo(ct.text)
     t = SelStr("div.content h1", hutf)[0]
     #return "", "mp4", us, None
     return t.text, "book", cts, 1

Example #11

0

Show file

 def query_info(self, url):
     #url = 'http://vmus.online/The-Outpost-S01EP01.html'
     hutf = self.get_hutf(url)
     #echo(hutf)
     ol = OpenLoad()
     ret = SelStr("title", hutf)
     ol.title = ret[0].text
     ret = SelStr("div.entry-content>p iframe", hutf)
     #url = [ret[0]['src']]
     return ol.query_info(ret[0]['src'])

Example #12

0

Show file

File: ondemandchina.py Project: pastebt/dwm

 def query_info(self, url):
     # http://8drama.com/122804/
     #http://8drama.net/ipobar_.php?sign=251438...
     echo('phantomjs wait ...')
     p = Popen(["./phantomjs", "dwm.js", "300", url], stdout=PIPE)
     html = p.stdout.read()
     hutf = html.decode('utf8')
     p.wait()
     url = SelStr('video source', hutf)[0]['src']
     title = SelStr('h1.entry-title', hutf)[0].text
     return title, None, [url], None

Example #13

0

Show file

File: tv8.py Project: pastebt/dwm

 def query_info(self, url):
     hutf = self.get_hutf(url)
     #obj = match1(hutf, r" var\s+videoObject\s*\=\s*({[^}]+})")
     #mu = match1(obj, ' video:\s*(\S+)').strip('"')
     #mu = self.last_m3u8(mu)
     h = SelStr("h3", hutf)[0]
     d = SelStr("div.post-entry p", hutf)[0]
     mu = match1(d.text, ' video:\s*(\S+)').strip('"')
     mu = self.last_m3u8(mu)
     d.children = [c for c in d.children if isinstance(c, DataNode)]
     title = h.text.strip() + "_" + d.text.strip()
     return title, "m3u8", mu, None

Example #14

0

Show file

File: ttwanda.py Project: pastebt/dwm

    def query_info(self, url):
        #url = 'http://www.ttwanda.com/films/us/1693.html?xf'
        hutf = self.get_hutf(url)
        if '?' not in url:
            a = SelStr('section.p5 div a', hutf)[0]['href']
            url = url + a
            hutf = self.get_hutf(url)
        title = SelStr("div.video-content article p strong", hutf)[0].text
        r = "《(.+)》"
        if not py3:
            r = r.decode('utf8')
        t = match1(title, r)
        if t and '/films/' in url:
            title = t
        src = SelStr('iframe.player', hutf)[0]['src']

        if '/player/v.php?url=' in src:
            # http://www.ttwanda.com/tv/ustv/945.html
            # ../../player/v.php?url=www.le.com/ptv/vplay/20723618.html
            src = 'http://' + src.split('?url=', 1)[1]
            from letv import LETV
            return LETV().query_info(src)

        if not src.startswith("http://") and not src.startswith("https://"):
            src = 'http://www.ttwanda.com/' + src
        echo(src)
        self.extra_headers['Referer'] = url     # this is important
        hutf = self.get_hutf(src)
        dst = match1(hutf, 'var play_url \= "([^"]+)"')
        echo(dst)
        if not dst:
            echo("Can not find var play_url")
            sys.exit(1)
        if ('youku.com/' in dst and '/m3u8' in dst) \
           or 'lecloud.com/' in dst \
           or '/letv-uts/' in dst:
            return title, None, self.try_m3u8(dst), None
        if 'ttwanda.com/ftn_handler/' in dst:
            cs = ["%s=%s" % (c.name, c.value)
                  for c in self.cookie.cookiejar
                  if c.name != 'PHPSESSID']
            echo(cs)
            self.wget_cookie = "; ".join(cs)
            k, s = get_kind_size(dst, self.wget_cookie)
            return title, k, [dst], s
        #if 'mgtv.com/' in dst or '189.cn/v5/downloadFile' in dst:
        #    # http://www.ttwanda.com/films/us/907.html?style=cq
        #    return title, None, [dst], None
        #echo('TTWanda has new source')
        #echo(dst)
        #sys.exit(1)
        return title, None, [dst], None

Example #15

0

Show file

 def query_info(self, url):
     hutf = self.get_hutf(url)
     #echo(hutf)
     title = SelStr("div.title.sizing h1", hutf)[0].text
     #echo("title =", title)
     nodes = SelStr("div#playsource a", hutf)
     urls = []
     dm = DM()
     for node in nodes:
         t, e, us, s = dm.query_info(node['href'])
         echo(us)
         urls += us
     return title, None, urls, None

Example #16

0

Show file

File: bookdown.py Project: pastebt/dwm

 def test1(self, args):
     # http://m.bookdown.com.cn/read/31314.html
     url = 'http://m.bookdown.com.cn/read/31314.html'
     #hutf = self.get_hutf(url)
     #print hutf
     #m = re.findall("http://m.bookdown.com.cn/read/31314_\d+.html", hutf)
     #print m
     #hutf = self.get_html("http://m.bookdown.com.cn/read/31314_2.html")
     #print(hutf)
     #url = "http://m.bookdown.com.cn/read/31314_1.html"
     #url = "http://m.bookdown.com.cn/read/31314_1_2.html"
     while True:
         print >> sys.stderr, url
         hutf = self.get_hutf(url)
         # class="articlecon
         for div in SelStr('div.articlecon', hutf):
             #echo(div)
             #echo(" ".join(div.text.split("&nbsp;")))
             echo(
                 re.sub(u"分节阅读.+，请点击下一页继续阅读。", "",
                        re.sub("&nbsp;", " ", div.text)))
         #echo(hutf)
         m = re.findall(
             u'''<a class="btn" href="(http://m\.bookdown\.com\.cn/read/31314_.+\.html)">下一章</a>''',
             hutf)
         #m = re.findall(u'''\<a class="btn" href=".+"\>下一章\</a\>''', hutf) #, re.U)
         #echo(m)
         if not m:
             break
         url = m[0]

Example #17

0

Show file

File: iqiyi.py Project: pastebt/dwm

 def query_info(self, url):
     # title, ext, urls, totalsize
     #url = "http://www.iqiyi.com/v_19rr26qr38.html"
     #url = "https://www.iqiyi.com/v_19rr04z9is.html?list=19rrm106om"
     #url = "https://www.iqiyi.com/v_19rr04z9is.html"
     hutf = self.get_hutf(url)
     for s in ('meta[name=irTitle]', 'meta[property=og:title]'):
         try:
             title = SelStr(s, hutf)[0]["content"]
             break
         except IndexError:
             title = self.title
     #echo(hutf)
     tvid = match1(hutf, """param\['tvid'\] = "(\d+)";""")
     vid = match1(hutf, """param\['vid'\] = "([^"]+)";""")
     echo("tvid=", tvid, ", vid=", vid)
     dat = I2().getVMS(tvid, vid)
     #echo(dat)
     vd, url = self.get_vd_url(dat)
     #title = "%s_vd%02d" % (title, vd)
     echo(title)
     #return
     hutf = self.get_hutf(url)
     us = self._get_m3u8_urls(url, hutf)
     if '.ts?' in us[0]:
         return title, "ts", us, None
     # title, ext, urls, totalsize
     return title, None, us, None

Example #18

0

Show file

 def query_info(self, url):
     hutf = self.chrome_hutf(url)
     #echo(hutf)
     title = SelStr("html head title", hutf)[0].text
     echo("title =", title)
     ret = SelStr("video#video_player", hutf)
     echo(ret)
     if ret:
         u = ret[0]["src"]
         return title, None, [u], None
     #return
     ci = get_ci(DEBUG)
     try:
         return self.query_info_chrome(ci, url)
     finally:
         print("ci.stop()")
         ci.stop()

Example #19

0

Show file

 def test(self, args):
     url = "https://www.rapidvideo.com/embed/FUZ35WDLM7"
     # https://www3731.playercdn.net/187/0/G4i-UJ6bQxIZI6FWc_F5dg/1536365722/180905/692FUZ37O792IXDCUZDFX.mp4
     #echo(self.query_info(url))
     hutf = self.get_hutf(url)
     #echo(hutf)
     d = SelStr("video#videojs source", hutf)
     u = d[0]["src"]

Example #20

0

Show file

 def get_playlist(self, url):
     hutf = self.get_hutf(url)
     #echo(hutf)
     urls = []
     for a in SelStr('div.tvlists div.item a', hutf):
         if not a.select("span.sn_ispreview"):
             urls.append((a.text, a['href']))
     return urls

Example #21

0

Show file

File: tv8.py Project: pastebt/dwm

 def get_playlist1(self, url):
     # url = 'http://tv8.fun/20170328-人民的名义/'
     hutf = self.get_hutf(url)
     # echo(hutf)
     img = SelStr("div.entry-content p img", hutf)
     if img:
         title = img[0]['alt']
     else:
         title = SelStr("title", hutf)[0].text
     echo(title)
     us = []
     for p in SelStr("div.entry-content p", hutf):
         n = p.select("strong")
         if n and "M3U" in n[0].text:
             us = [(title + '_' + a.text, a['href']) for a in p.select("a")]
             echo(us)
             break
     return us

Example #22

0

Show file

 def query_info(self, url):
     hutf = self.get_hutf(url)
     dat = match1(hutf, r"var\s+player_data\s*\=\s*({[^}]+})")
     debug(dat)
     mu = self.last_m3u8(json.loads(dat)['url'])
     #us = self.try_m3u8(u)
     t = SelStr("h2.title", hutf)[0]
     title = '_'.join(t.text.split())
     return title, "m3u8", mu, None

Example #23

0

Show file

 def test1(self, argv):
     url = 'http://vmus.online/the-outpost-s01.html'
     #hutf = self.chrome_hutf(url)
     hutf = self.get_hutf(url)
     #echo(hutf)
     ret = SelStr("a.fasc-button", hutf)
     #ret = [str(a) for a in ret]
     ret = [(0, a['href']) for a in ret]
     echo(ret)

Example #24

0

Show file

File: tv8.py Project: pastebt/dwm

 def query_info1(self, url):
     # url = 'http://www.dayi.ca/ys/?p=2386&page=52'
     hutf = self.get_hutf(url)
     # echo(hutf)
     ct = SelStr("div#content-outer div#content", hutf)[0]
     title = ct.select('h3')[0].text
     p = ct.select('p')[0]
     title = title + '_' + p.text.split()[0]
     echo(title)
     #echo(p.text)
     u = match1(p.text, 'video:(\S+)')
     #u = u.strip('"').strip("'")
     if u[0] in ("'", '"'):
         u = u.split(u[0])[1]
     echo(u)
     #us = self.try_m3u8(u)
     #return title, None, us, None
     return title, "m3u8", u, None

Example #25

0

Show file

 def get_playlist(self, page_url):
     # http://www.letv.com/tv/10003313.html
     # http://www.le.com/tv/10009472.html
     urls = []
     hutf = self.get_hutf(page_url)
     for a in SelStr('div.list.active > dl > dt > a', hutf):
         i = a.select("img")[0]
         if 'title' in i:
             urls.append((i['title'], a['href']))
     return urls

Example #26

0

Show file

File: bookdown.py Project: pastebt/dwm

 def test(self, args):
     url = "http://www.bookdown.com.cn/bookinfo/30258.html"
     #url = "http://www.bookdown.com.cn/read/30258_1.html"
     ret = match1(url, "/bookinfo/(\d+)\.html", "/read/(\d+).*\.html")
     bid = int(ret[0])
     echo("bid =", bid)
     url = "http://www.bookdown.com.cn/read/%d_1.html" % bid
     while True:
         #print >> sys.stderr, url
         hutf = self.get_hutf(url)
         #echo(hutf)
         for div in SelStr('div#view_content_txt', hutf):
             echo(
                 re.sub(u"分节阅读.+，请点击下一页继续阅读。", "",
                        re.sub("&nbsp;", " ", div.text)))
         al = SelStr("a#nextPage", hutf)
         if not al:
             break
         url = al[0]['href']

Example #27

0

Show file

File: iqiyi.py Project: pastebt/dwm

    def get_playlist(self, page_url):
        #http://www.iqiyi.com/playlist521743802.html
        if '/playlist' in page_url:
            hutf = self.get_hutf(page_url)
            els = SelStr("div.site-piclist_pic > a.site-piclist_pic_link",
                         hutf)
            return [(e['title'], e['href']) for e in els]

        # http://www.iqiyi.com/a_19rrhb9eet.html 太阳的后裔
        echo("get_list phantomjs wait 200 ...")
        p = Popen(["./phantomjs", "dwm.js", "200", page_url], stdout=PIPE)
        html = p.stdout.read()
        p.wait()
        hutf = html.decode("utf8")
        #c = hutf.split("<!--视频列表区域 -->")[1]
        urls = [(a.text, a['href'])
                for a in SelStr('div.smalList > ul > li > a', hutf)]
        self.align_num = len(str(len(urls)))
        return urls

Example #28

0

Show file

 def query_info1(self, url):
     #url = 'https://www.dnvod.eu/Movie/Readyplay.aspx?id=deYM01Pf0bo%3d'
     hutf = self.get_hutf(url)
     title = SelStr('span#bfy_title >', hutf)[0].data.strip()
     debug('title =', title)
     for script in SelStr('script', hutf):
         txt = script.text
         debug('txt =', txt)
         if 'PlayerConfig' not in txt:
             continue
         debug('got PlayerConfig')
         vid = match1(txt, "id:\s*'([^']+)',")
         key = match1(txt, "key:\s*'([^']+)',")
         debug('vid =', vid, ', key =', key)
         break
     u = "https://www.dnvod.eu/Movie/GetResource.ashx?id=%s&type=htm" % vid
     self.extra_headers['Referer'] = url
     durl = self.get_html(u, postdata="key=" + key)
     debug(durl)
     return title, None, [durl], None

Example #29

0

Show file

File: shimo.py Project: pastebt/dwm

 def download_one(self, url):
     #url = "https://shimo.im/docs/gJQufddR72AZJcna/read"
     hutf = self.get_hutf(url)
     #echo(hutf)
     #return
     #hutf = open("s.html").read()
     d = SelStr("div#editor", hutf)[0]
     t = d.select("div.ql-title div.ql-title-box")[0]
     #title = "_".join(t["data-value"].split('|')) + ".txt"
     title = t["data-value"] + ".txt"
     t = d.select("div.ql-editor")[0]
     for p in t.select("p"):
         #p.raw_text += "\n"
         if p.children and isinstance(p.children[-1], DataNode):
             p.children[-1].append("\n")
         else:
             p.children.append(DataNode(p, "\n"))
     #print t.text
     fout = open(title, "w")
     fout.write(t.text)
     fout.close()

Example #30

0

Show file

File: openload.py Project: pastebt/dwm

    def query_info1(self, url):
        # https://openload.io/embed/igdtpdeGltM/
        # https://openload.co/embed/isCWWnlsZLE/
        # https://openload.io/embed/biw7ytfelzU/
        # <span id="streamurl">isCWWnlsZLE~1481138074~208.91.0.0~g617lYdo</span>
        echo("phantomjs wait 300 ...")
        p = Popen(["./phantomjs", "dwm.js", "300", url], stdout=PIPE)
        html = p.stdout.read()
        hutf = html.decode('utf8')
        p.wait()
        debug(hutf)
        n = SelStr('h6', hutf)
        if n:
            echo(n[0].text)
            return self.title, None, [], None
        #vid = match1(url, r'haiuken.com/theatre/([^/]+)/')
        m = re.search('''openload.co/embed/([^/]+)/''', url)
        if m:
            uid = m.groups()[0]
            echo(uid)
        m = re.search('''<span id="streamurl">([^<>]+)</span>''', hutf)
        vid = m.groups()[0]
        if not vid.startswith(uid):  # TODO, try to decode it
            vid = uid + "~1497803146~64.180.0.0~eBodZDZa"
        echo(vid)
        url = "https://openload.co/stream/%s?mime=true" % vid

        # "https://openload.co/embed/kUEfGclsU9o/"
        n = SelStr("meta[name=og:title]", hutf)
        if n and self.title == UTITLE:
            self.title = n[0]['content']  # ="skyrim_no-audio_1080.mp4">"

        # https://openload.co/stream/isCWWnlsZLE~1481139117~208.91.0.0~mcLfSy5C?mime=true
        # video/mp4 584989307
        k, tsize = get_kind_size(url)
        k = k.split('/')[-1]
        if self.title.endswith('.' + k):
            self.title = self.title[:-4]
        return self.title, k, [url], tsize

Example #31

0

Show file

    def query_info(self, url):
        #url = 'http://www.ttwanda.com/films/us/1693.html?xf'
        hutf = self.get_hutf(url)
        if '?' not in url:
            a = SelStr('section.p5 div a', hutf)[0]['href']
            url = url + a
            hutf = self.get_hutf(url)
        title = SelStr("div.video-content article p strong", hutf)[0].text
        r = "《(.+)》"
        if not py3:
            r = r.decode('utf8')
        t = match1(title, r)
        if t and '/films/' in url:
            title = t
        src = SelStr('iframe.player', hutf)[0]['src']

        if '/player/v.php?url=' in src:
            # http://www.ttwanda.com/tv/ustv/945.html
            # ../../player/v.php?url=www.le.com/ptv/vplay/20723618.html
            src = 'http://' + src.split('?url=', 1)[1]
            from letv import LETV
            return LETV().query_info(src)

        if not src.startswith("http://") and not src.startswith("https://"):
            src = 'http://www.ttwanda.com/' + src
        echo(src)
        self.extra_headers['Referer'] = url  # this is important
        hutf = self.get_hutf(src)
        dst = match1(hutf, 'var play_url \= "([^"]+)"')
        echo(dst)
        if not dst:
            echo("Can not find var play_url")
            sys.exit(1)
        if ('youku.com/' in dst and '/m3u8' in dst) \
           or 'lecloud.com/' in dst \
           or '/letv-uts/' in dst:
            return title, None, self.try_m3u8(dst), None
        if 'ttwanda.com/ftn_handler/' in dst:
            cs = [
                "%s=%s" % (c.name, c.value) for c in self.cookie.cookiejar
                if c.name != 'PHPSESSID'
            ]
            echo(cs)
            self.wget_cookie = "; ".join(cs)
            k, s = get_kind_size(dst, self.wget_cookie)
            return title, k, [dst], s
        #if 'mgtv.com/' in dst or '189.cn/v5/downloadFile' in dst:
        #    # http://www.ttwanda.com/films/us/907.html?style=cq
        #    return title, None, [dst], None
        #echo('TTWanda has new source')
        #echo(dst)
        #sys.exit(1)
        return title, None, [dst], None

Example #32

0

Show file

 def get_playlist(self, url):
     if '/tv/' not in url:
         return []
     url = url.split('?')[0]
     hutf = self.get_hutf(url)
     ns = SelStr('div.article-paging a', hutf)
     # href="?vid=20723618&amp;title=第01集 新局长崛起"
     urls = []
     for a in ns:
         vid = match1(a['href'], 'vid=(\d+)')
         if vid:
             urls.append((a.text, url + '?vid=' + vid))
         else:
             urls.append((a.text, url + a['href']))
     return urls