Beispiel #1
0
    def get_link(self, url):
        r = requests.get(url, timeout=10)
        result = r.text
        parser = etree.HTMLParser()
        tree = etree.parse(StringIO(result), parser)
        links = tree.xpath('//iframe[@src]')
        if len(links) == 0:
            raise VideoNotFound(url)
        vid_link = links[0].get("src")

        r = requests.get(vid_link, timeout=10)
        result = r.text
        parser = etree.HTMLParser()
        tree = etree.parse(StringIO(result), parser)
        links = tree.xpath('//source[@src]')
        if len(links) == 0:
            raise VideoNotFound(url)
        vid_link = links[0].get("src")
        vid_link = get_orig_url(vid_link)
        img_links = tree.xpath('//video[@poster]')
        img_link = ''
        if len(img_links) > 0:
            img_link = img_links[0].get("poster")
        desc = ""
        return {"vid": vid_link, "img": img_link, "desc": desc}
Beispiel #2
0
    def get_link(self, url):
        parsed = urlparse.urlsplit(url)
        patt = re.compile(r"/v/([^/]*)/")
        match = patt.search(parsed.path)
        if not match:
            raise VideoNotFound()

        puid = match.group(1)
        img_url = 'http://w.api.xiaoying.co/webapi2/rest/video/publishinfo.get?callback=videocallbackinfo&appkey=30000000&puid=%s' % (puid)
        vid_url = 'http://w.api.xiaoying.co/webapi2/rest/video/videourl?callback=videocallbackvideosrc&appkey=30000000&puid=%s' % (puid)

        r = requests.get(img_url, timeout=10)
        result = r.text
        patt = re.compile(r"\((\{.*\})\)")
        match = patt.search(result)
        img_link = ""
        if match:
            data = json.loads(match.group(1))
            img_link = data["videoinfo"]["coverurl"]

        r = requests.get(vid_url, timeout=10)
        result = r.text
        match = patt.search(result)
        if not match:
            raise VideoNotFound()
        data = json.loads(match.group(1))
        vid_link = data["url"]

        desc = ""
        return {"vid": vid_link, "img": img_link, "desc": desc}
Beispiel #3
0
    def get_link(self, url):
        parsed = urlparse.urlsplit(url)
        netloc = parsed.netloc
        sinaimg_url = ""
        img_link = ""
        if re.search(r"weibo.com|weibo.cn", netloc):
            r = requests.get(url, timeout=10)
            result = r.text
            parser = etree.HTMLParser()
            tree = etree.parse(StringIO(result), parser)
            links = tree.xpath('//embed/@flashvars')

            if len(links) == 0:
                raise VideoNotFound()

            flashvars = links[0]
            patt = re.compile(r'list=(.*)')
            match = patt.search(flashvars)
            if not match:
                raise VideoNotFound()
            sinaimg_url = urllib.unquote(match.group(1))
            img_links = tree.xpath('//img/@src')
            if len(img_links) > 0:
                img_link = img_links[0]
        elif re.search(r"sinajs.cn", netloc):
            patt = re.compile(r"file=(.*)")
            match = patt.search(url)
            sinaimg_url = urllib.unquote(match.group(1))
        elif re.search(r"sinaimg.cn", netloc):
            sinaimg_url = url
        path = self.extract_mp4(sinaimg_url)
        netloc = urlparse.urlsplit(sinaimg_url).netloc
        vid_link = "http://%s/%s" % (netloc, path)
        return {"vid": vid_link, "img": img_link, "desc": ""}
Beispiel #4
0
    def get_link(self, url):
        r = requests.get(url, timeout=20)
        result = r.text
        parser = etree.HTMLParser()
        tree = etree.parse(StringIO(result), parser)
        links = tree.xpath('//div[@id="post_content"]')
        if len(links) == 0:
            raise VideoNotFound(url)

        vid_node = links[0]
        html = etree.tostring(vid_node)
        patt = re.compile(r"setCuSunPlayerVideo\((.*)\)")
        match = patt.search(html)
        if not match:
            return self.get_sinaimg_video(tree)

        params = match.group(1).split(",")
        img_path = params[2]
        img_path = img_path[1:len(img_path) - 1]
        img_link = "http://www.aishipin.net" + img_path
        vid_link = params[3]
        vid_link = vid_link[1:len(vid_link) - 1]
        p_nodes = vid_node.findall('.//p[@style]')
        desc = get_inner_html(p_nodes[1])
        return {"vid": vid_link, "img": img_link, "desc": desc}
Beispiel #5
0
    def get_link(self, url):
        r = requests.get(url, timeout=10)
        result = r.text
        parser = etree.HTMLParser()
        tree = etree.parse(StringIO(result), parser)
        links = tree.xpath('//param[@name="src"]/@value')

        if len(links) == 0:
            return self.__search_mp4(tree)

        link = links[0]
        patt = re.compile(r"\?scid=(.*?)&")
        match = patt.search(link)
        if not match:
            raise VideoNotFound(url)
        scid = match.group(1)
        vid_link = "http://gslb.miaopai.com/stream/%s.mp4" % (scid)
        img_links = tree.xpath('//div[@class="video_img"]/img/@src')
        img_link = ''
        if len(img_links) > 0:
            img_link = img_links[0]

        descs = tree.xpath('//div[@class="introduction"]/p')
        desc = ''
        if len(descs) > 0:
            desc = descs[0].text

        return {"vid": vid_link, "img": img_link, "desc": desc}
Beispiel #6
0
    def get_link(self, url):
        r = requests.get(url, timeout=10)
        result = r.text
        parser = etree.HTMLParser()
        tree = etree.parse(StringIO(result), parser)
        links = tree.xpath(
            '//div[@class="video_player"]/a[@class="play"]/@onclick')
        #result = etree.tostring(tree.getroot(),
        #    pretty_print=True, method="html")
        #print result
        if len(links) == 0:
            raise VideoNotFound(url)
        link = links[0]
        patt = re.compile(r"playVideo\('(.*?)'")
        match = patt.search(link)
        if not match:
            raise VideoNotFound(url)

        vid = match.group(1)
        share_link = 'http://share.weipai.cn/video/play/id/%s/type/theater/source/undefine' % (
            vid)
        r = requests.get(share_link, timeout=10)
        result = r.text
        patt = re.compile(r"'(http.*?)'")
        match_url = patt.search(result)
        if not match_url:
            raise VideoNotFound(url)

        wrapper_url = match_url.group(1)
        wrapper_params = urlparse.urlsplit(wrapper_url)
        codes = urlparse.parse_qs(wrapper_params.query)['s']
        if len(codes) == 0:
            raise VideoNotFound()
        code = codes[0]
        vid_params = base64.b64decode(code)
        links = urlparse.parse_qs(vid_params)['p']
        if len(links) == 0:
            raise VideoNotFound()
        vid_link = links[0]

        img_links = tree.xpath(
            '//div[@class="video_player"]/div/span/img/@src')
        img_link = 0
        if len(img_links) > 0:
            img_link = img_links[0]
        return {"vid": vid_link, "img": img_link, "desc": ""}
Beispiel #7
0
    def get_sinaimg_video(self, tree):
        links = tree.xpath('//source[@src]')
        if len(links) == 0:
            raise VideoNotFound(url)

        vid_link = links[0].get("src")
        img_link = ""
        desc = ""
        return {"vid": vid_link, "img": img_link, "desc": desc}
Beispiel #8
0
    def get_link(self, url):
        r = requests.get(url, timeout=10)
        result = r.text
        #print result.encode("utf-8")
        patt = re.compile(r'jwplayer\(".*"\).setup\(\{(.*?)\}\)', re.M | re.S)
        match = patt.search(result)
        if not match:
            raise VideoNotFound()

        try:
            content = match.group(1)
            patt = re.compile(r'file: "(.*?)".*image: "(.*?)"', re.M | re.S)
            link = patt.search(content)
            vid_link = link.group(1)
            img_link = link.group(2)
            desc = ''
            return {"vid": vid_link, "img": img_link, "desc": desc}
        except:
            raise VideoNotFound()
Beispiel #9
0
    def extract_mp4(self, url):
        r = requests.get(url, timeout=5)
        result = r.text
        lines = result.split("\n")
        patt = re.compile(r'^#')
        lines = filter(lambda x: not patt.match(x), lines)
        if len(lines) == 0:
            raise VideoNotFound()

        return lines[0]
Beispiel #10
0
    def __search_mp4(self, tree):
        divs = tree.xpath('//div[@class="vid_img"]')
        if len(divs) == 0:
            raise VideoNotFound()

        vid_link = divs[0].get('data-url')
        img_links = divs[0].find('.//img')
        img_link = ""
        try:
            img_link = img_links.get('src')
        except:
            pass
        return {"vid": vid_link, "img": img_link, "desc": ""}
Beispiel #11
0
    def get_link(self, url):
        r = requests.get(url, timeout=10)
        result = r.text
        parser = etree.HTMLParser()
        tree = etree.parse(StringIO(result), parser)
        links = tree.xpath('//source[@src]')

        if len(links) == 0:
            raise VideoNotFound(url)
        vid_link = links[0].get('src')
        img_link = ""
        desc = ""
        return {"vid": vid_link, "img": img_link, "desc": desc}
Beispiel #12
0
    def get_link(self, url):
        parsed = urlparse.urlsplit(url)
        patt = re.compile(r"/v/(.*).html")
        match = patt.search(parsed.path)
        if not match:
            raise VideoNotFound()
        scid = match.group(1)
        url = 'http://api.xiaokaxiu.com/video/web/get_play_video?scid=%s' % (
            scid)
        r = requests.get(url, timeout=10)
        result = r.text
        data = json.loads(result)
        img_link = data["data"]["cover"]
        vid_link = data["data"]["linkurl"]
        desc = ''

        return {"vid": vid_link, "img": img_link, "desc": desc}
Beispiel #13
0
    def get_link(self, url):
        r = requests.get(url, timeout=10)
        result = r.text
        parser = etree.HTMLParser()
        tree = etree.parse(StringIO(result), parser)
        links = tree.xpath('//div[@id="detailVideo"]/@data-video')
        if len(links) == 0:
            raise VideoNotFound(url)
        vid_link = links[0]

        img_links = tree.xpath('//div[@id="detailVideo"]/img/@src')
        img_link = ''
        if len(img_links) > 0:
            img_link = img_links[0]

        descs = tree.xpath('//h1[@class="detail-description break"]/text()')
        desc = " ".join(descs)
        return {"vid": vid_link, "img": img_link, "desc": desc}
Beispiel #14
0
    def get_link(self, url):
        r = requests.get(url, timeout=10)
        result = r.text
        patt = re.compile(r'player_src=([^"]*)"')
        match = patt.search(result)
        if not match:
            raise VideoNotFound(url)

        src = urllib.unquote(match.group(1))
        vid_link = get_orig_url(src)

        patt = re.compile(r'player_poster=([^&]*)&')
        match = patt.search(result)
        img_link = ''
        if match:
            img_link = urllib.unquote(match.group(1))

        parser = etree.HTMLParser()
        tree = etree.parse(StringIO(result), parser)
        descs = tree.xpath('//div[@class="detail_des"]')
        desc = ""
        if len(descs) > 0:
            desc = descs[0].text
        return {"vid": vid_link, "img": img_link, "desc": desc}