Ejemplo n.º 1
0
 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     if source_url and source_url != FORCE_NO_MATCH:
         url = urlparse.urljoin(self.base_url, source_url)
         html = self._http_get(url, cache_limit=.5)
         match = re.search('proxy\.link=([^"&]+)', html)
         if match:
             proxy_link = match.group(1)
             proxy_link = proxy_link.split('*', 1)[-1]
             if len(proxy_link) <= 224:
                 picasa_url = scraper_utils.gk_decrypt(
                     self.get_name(), GK_KEY1, proxy_link)
             else:
                 picasa_url = scraper_utils.gk_decrypt(
                     self.get_name(), GK_KEY2, proxy_link)
             if self._get_direct_hostname(picasa_url) == 'gvideo':
                 sources = self._parse_google(picasa_url)
                 for source in sources:
                     hoster = {
                         'multi-part': False,
                         'url': source,
                         'class': self,
                         'quality': scraper_utils.gv_get_quality(source),
                         'host': self._get_direct_hostname(source),
                         'rating': None,
                         'views': None,
                         'direct': True
                     }
                     hosters.append(hoster)
     return hosters
Ejemplo n.º 2
0
 def __get_gk_links2(self, html):
     sources = {}
     match = re.search('proxy\.link=([^"&]+)', html)
     if match:
         proxy_link = match.group(1)
         proxy_link = proxy_link.split('*', 1)[-1]
         if len(proxy_link) <= 224:
             vid_url = scraper_utils.gk_decrypt(self.get_name(), GK_KEY1, proxy_link)
         else:
             vid_url = scraper_utils.gk_decrypt(self.get_name(), GK_KEY2, proxy_link)
         
         if scraper_utils.get_direct_hostname(self, vid_url) == 'gvideo':
             for source in self._parse_gdocs(vid_url):
                 sources[source] = {'quality': scraper_utils.gv_get_quality(source), 'direct': True}
     return sources
 def __get_gk_links2(self, html):
     sources = {}
     match = re.search('proxy\.link=([^"&]+)', html)
     if match:
         proxy_link = match.group(1)
         proxy_link = proxy_link.split('*', 1)[-1]
         if len(proxy_link) <= 224:
             vid_url = scraper_utils.gk_decrypt(self.get_name(), GK_KEY1, proxy_link)
         else:
             vid_url = scraper_utils.gk_decrypt(self.get_name(), GK_KEY2, proxy_link)
         
         if self._get_direct_hostname(vid_url) == 'gvideo':
             for source in self._parse_gdocs(vid_url):
                 sources[source] = scraper_utils.gv_get_quality(source)
     return sources
Ejemplo n.º 4
0
    def get_sources(self, video):
        source_url = self.get_url(video)
        sources = {}
        hosters = []
        if source_url and source_url != FORCE_NO_MATCH:
            url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(url, cache_limit=.5)
            for match in re.finditer("embeds\[(\d+)\]\s*=\s*'([^']+)", html):
                match = re.search('src="([^"]+)', match.group(2))
                if match:
                    iframe_url = match.group(1)
                    if 'play-en.php' in iframe_url:
                        match = re.search('id=([^"&]+)', iframe_url)
                        if match:
                            proxy_link = match.group(1)
                            proxy_link = proxy_link.split('*', 1)[-1]
                            picasa_url = scraper_utils.gk_decrypt(
                                self.get_name(), GK_KEY, proxy_link)
                            for stream_url in self._parse_google(picasa_url):
                                sources[stream_url] = {
                                    'quality':
                                    scraper_utils.gv_get_quality(stream_url),
                                    'direct':
                                    True
                                }
                    else:
                        html = self._http_get(iframe_url, cache_limit=0)
                        temp_sources = self._parse_sources_list(html)
                        for source in temp_sources:
                            if 'download.php' in source:
                                redir_html = self._http_get(
                                    source,
                                    allow_redirect=False,
                                    method='HEAD',
                                    cache_limit=0)
                                if redir_html.startswith('http'):
                                    temp_sources[redir_html] = temp_sources[
                                        source]
                                    del temp_sources[source]
                        sources.update(temp_sources)

        for source in sources:
            host = self._get_direct_hostname(source)
            stream_url = source + '|User-Agent=%s' % (scraper_utils.get_ua())
            quality = QUALITY_MAP.get(sources[source]['quality'],
                                      QUALITIES.HIGH)
            hoster = {
                'multi-part': False,
                'url': stream_url,
                'host': host,
                'class': self,
                'quality': quality,
                'views': None,
                'rating': None,
                'direct': True
            }
            hosters.append(hoster)

        return hosters
    def get_sources(self, video):
        source_url = self.get_url(video)
        sources = {}
        hosters = []
        if source_url and source_url != FORCE_NO_MATCH:
            url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(url, cache_limit=0.5)
            for match in re.finditer("embeds\[(\d+)\]\s*=\s*'([^']+)", html):
                match = re.search('src="([^"]+)', match.group(2))
                if match:
                    iframe_url = match.group(1)
                    if "play-en.php" in iframe_url:
                        match = re.search('id=([^"&]+)', iframe_url)
                        if match:
                            proxy_link = match.group(1)
                            proxy_link = proxy_link.split("*", 1)[-1]
                            picasa_url = scraper_utils.gk_decrypt(self.get_name(), GK_KEY, proxy_link)
                            for stream_url in self._parse_google(picasa_url):
                                sources[stream_url] = {
                                    "quality": scraper_utils.gv_get_quality(stream_url),
                                    "direct": True,
                                }
                    else:
                        html = self._http_get(iframe_url, cache_limit=0.25)
                        temp_sources = self._parse_sources_list(html)
                        for source in temp_sources:
                            if "download.php" in source:
                                redir_html = self._http_get(source, allow_redirect=False, method="HEAD", cache_limit=0)
                                if redir_html.startswith("http"):
                                    temp_sources[redir_html] = temp_sources[source]
                                    del temp_sources[source]
                        sources.update(temp_sources)
                        for source in dom_parser.parse_dom(html, "source", {"type": "video/mp4"}, ret="src"):
                            sources[source] = {"quality": QUALITIES.HD720, "direct": True}

        for source in sources:
            host = self._get_direct_hostname(source)
            stream_url = source + "|User-Agent=%s" % (scraper_utils.get_ua())
            if host == "gvideo":
                quality = scraper_utils.gv_get_quality(source)
            else:
                quality = sources[source]["quality"]
                if quality not in Q_ORDER:
                    quality = QUALITY_MAP.get(sources[source]["quality"], QUALITIES.HIGH)

            hoster = {
                "multi-part": False,
                "url": stream_url,
                "host": host,
                "class": self,
                "quality": quality,
                "views": None,
                "rating": None,
                "direct": True,
            }
            hosters.append(hoster)

        return hosters
    def get_sources(self, video):
        source_url = self.get_url(video)
        sources = []
        stream_urls = []
        if source_url and source_url != FORCE_NO_MATCH:
            url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(url, cache_limit=.5)
            for match in re.finditer("embeds\[(\d+)\]\s*=\s*'([^']+)", html):
                match = re.search('src="([^"]+)', match.group(2))
                if match:
                    iframe_url = match.group(1)
                    if 'play-en.php' in iframe_url:
                        match = re.search('id=([^"&]+)', iframe_url)
                        if match:
                            proxy_link = match.group(1)
                            proxy_link = proxy_link.split('*', 1)[-1]
                            picasa_url = scraper_utils.gk_decrypt(
                                self.get_name(), GK_KEY, proxy_link)
                            stream_urls += self._parse_google(picasa_url)
                    else:
                        html = self._http_get(iframe_url, cache_limit=0)
                        match = re.search('sources\s*:\s*\[(.*?)\]', html,
                                          re.DOTALL)
                        if match:
                            for match in re.finditer(
                                    '''['"]*file['"]*\s*:\s*['"]*([^'"]+).*?['"]*label['"]*\s*:\s*['"]*([^'"]+)''',
                                    match.group(1), re.DOTALL):
                                stream_url, label = match.groups()
                                if 'download.php' in stream_url:
                                    redir_html = self._http_get(
                                        stream_url,
                                        allow_redirect=False,
                                        method='HEAD',
                                        cache_limit=0)
                                    if stream_url.startswith('http'):
                                        stream_url = redir_html
                                stream_urls.append(stream_url)

        for stream_url in list(set(stream_urls)):
            host = self._get_direct_hostname(stream_url)
            if host == 'gvideo':
                quality = scraper_utils.gv_get_quality(stream_url)
            else:
                quality = QUALITY_MAP.get(label, QUALITIES.HIGH)
            stream_url += '|User-Agent=%s' % (scraper_utils.get_ua())
            source = {
                'multi-part': False,
                'url': stream_url,
                'host': host,
                'class': self,
                'quality': quality,
                'views': None,
                'rating': None,
                'direct': True
            }
            sources.append(source)

        return sources
Ejemplo n.º 7
0
 def __get_google_links(self, link):
     sources = {}
     html = self._http_get(link, cache_limit=.5)
     match = re.search('base64\.decode\("([^"]+)', html, re.I)
     if match:
         match = re.search('proxy\.link=tunemovie\*([^&]+)', base64.b64decode(match.group(1)))
         if match:
             picasa_url = scraper_utils.gk_decrypt(self.get_name(), GK_KEY, match.group(1))
             sources = self._parse_google(picasa_url)
     return sources
Ejemplo n.º 8
0
 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     if source_url and source_url != FORCE_NO_MATCH:
         url = urlparse.urljoin(self.base_url, source_url)
         html = self._http_get(url, cache_limit=.5)
         match = re.search('proxy\.link=([^"&]+)', html)
         if match:
             proxy_link = match.group(1)
             proxy_link = proxy_link.split('*', 1)[-1]
             if len(proxy_link) <= 224:
                 picasa_url = scraper_utils.gk_decrypt(self.get_name(), GK_KEY1, proxy_link)
             else:
                 picasa_url = scraper_utils.gk_decrypt(self.get_name(), GK_KEY2, proxy_link)
             if self._get_direct_hostname(picasa_url) == 'gvideo':
                 sources = self._parse_google(picasa_url)
                 for source in sources:
                     hoster = {'multi-part': False, 'url': source, 'class': self, 'quality': scraper_utils.gv_get_quality(source), 'host': self._get_direct_hostname(source), 'rating': None, 'views': None, 'direct': True}
                     hosters.append(hoster)
     return hosters
Ejemplo n.º 9
0
 def __get_google_links(self, link):
     sources = {}
     html = self._http_get(link, cache_limit=.5)
     match = re.search('base64\.decode\("([^"]+)', html, re.I)
     if match:
         log_utils.log(match.group(1))
         match = re.search('proxy\.link=tunemovie\*([^&]+)', base64.b64decode(match.group(1)))
         if match:
             picasa_url = scraper_utils.gk_decrypt(self.get_name(), GK_KEY, match.group(1))
             sources = self._parse_google(picasa_url)
     return sources
Ejemplo n.º 10
0
 def __get_gk_links2(self, html):
     sources = {}
     match = re.search('base64\.decode\("([^"]+)', html, re.I)
     if match:
         match = re.search('proxy\.link=tunemovie\*([^&]+)', base64.b64decode(match.group(1)))
         if match:
             picasa_url = scraper_utils.gk_decrypt(self.get_name(), GK_KEY, match.group(1))
             g_links = self._parse_google(picasa_url)
             for link in g_links:
                 sources[link] = scraper_utils.gv_get_quality(link)
             
     return sources
Ejemplo n.º 11
0
    def get_sources(self, video):
        source_url = self.get_url(video)
        sources = {}
        hosters = []
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, cache_limit=0)
        for match in re.finditer("embeds\[(\d+)\]\s*=\s*'([^']+)", html):
            match = re.search('src="([^"]+)', match.group(2))
            if match:
                iframe_url = match.group(1)
                if 'play-en.php' in iframe_url:
                    match = re.search('id=([^"&]+)', iframe_url)
                    if match:
                        proxy_link = match.group(1)
                        proxy_link = proxy_link.split('*', 1)[-1]
                        picasa_url = scraper_utils.gk_decrypt(self.get_name(), GK_KEY, proxy_link)
                        for stream_url in scraper_utils.parse_google(self, picasa_url):
                            sources[stream_url] = {'quality': scraper_utils.gv_get_quality(stream_url), 'direct': True}
                else:
                    html = self._http_get(iframe_url, cache_limit=0)
                    temp_sources = scraper_utils.parse_sources_list(self, html)
                    for source in temp_sources:
                        if 'download.php' in source:
                            redir_html = self._http_get(source, allow_redirect=False, method='HEAD', cache_limit=0)
                            if redir_html.startswith('http'):
                                temp_sources[redir_html] = temp_sources[source]
                                del temp_sources[source]
                    sources.update(temp_sources)
                    for source in dom_parser2.parse_dom(html, 'source', {'type': 'video/mp4'}, req='src'):
                        sources[source.attrs['src']] = {'quality': QUALITIES.HD720, 'direct': True, 'referer': iframe_url}
                                
        for source, values in sources.iteritems():
            host = scraper_utils.get_direct_hostname(self, source)
            headers = {'User-Agent': scraper_utils.get_ua()}
            if 'referer' in values: headers['Referer'] = values['referer']
            stream_url = source + scraper_utils.append_headers(headers)
            if host == 'gvideo':
                quality = scraper_utils.gv_get_quality(source)
            else:
                quality = values['quality']
                if quality not in Q_ORDER:
                    quality = QUALITY_MAP.get(values['quality'], QUALITIES.HIGH)
                    
            hoster = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True}
            hosters.append(hoster)

        return hosters
Ejemplo n.º 12
0
    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if source_url and source_url != FORCE_NO_MATCH:
            url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(url, cache_limit=.5)

            ele = dom_parser.parse_dom(html, 'video')
            if ele:
                stream_url = dom_parser.parse_dom(ele, 'source', ret='src')
                if stream_url:
                    hoster = {'multi-part': False, 'url': stream_url[0], 'class': self, 'quality': QUALITIES.HD720, 'host': self._get_direct_hostname(stream_url[0]), 'rating': None, 'views': None, 'direct': True}
                    if hoster['host'] == 'gvideo':
                        hoster['quality'] = scraper_utils.gv_get_quality(hoster['url'])
                    hosters.append(hoster)
            
            sources = dom_parser.parse_dom(html, 'iframe', ret='src')
            for src in sources:
                if 'facebook' in src: continue
                host = urlparse.urlparse(src).hostname
                hoster = {'multi-part': False, 'url': src, 'class': self, 'quality': QUALITIES.HIGH, 'host': host, 'rating': None, 'views': None, 'direct': False}
                hosters.append(hoster)
                
            match = re.search('proxy\.link=([^"&]+)', html)
            if match:
                proxy_link = match.group(1)
                proxy_link = proxy_link.split('*', 1)[-1]
                stream_url = scraper_utils.gk_decrypt(self.get_name(), GK_KEY, proxy_link)
                if 'vk.com' in stream_url.lower():
                    hoster = {'multi-part': False, 'host': 'vk.com', 'class': self, 'url': stream_url, 'quality': QUALITIES.HD720, 'views': None, 'rating': None, 'direct': False}
                    hosters.append(hoster)
                if 'picasaweb' in stream_url.lower():
                    for source in self._parse_google(stream_url):
                        quality = scraper_utils.gv_get_quality(source)
                        hoster = {'multi-part': False, 'url': source, 'class': self, 'quality': quality, 'host': self._get_direct_hostname(source), 'rating': None, 'views': None, 'direct': True}
                        hosters.append(hoster)
                if 'docs.google' in stream_url.lower():
                    for source in self._parse_gdocs(stream_url):
                        quality = scraper_utils.gv_get_quality(source)
                        hoster = {'multi-part': False, 'url': source, 'class': self, 'quality': quality, 'host': self._get_direct_hostname(source), 'rating': None, 'views': None, 'direct': True}
                        hosters.append(hoster)
                
        return hosters
Ejemplo n.º 13
0
    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if source_url and source_url != FORCE_NO_MATCH:
            url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(url, cache_limit=.5)

            ele = dom_parser.parse_dom(html, 'video')
            if ele:
                stream_url = dom_parser.parse_dom(ele, 'source', ret='src')
                if stream_url:
                    hoster = {'multi-part': False, 'url': stream_url[0], 'class': self, 'quality': QUALITIES.HD720, 'host': self._get_direct_hostname(stream_url[0]), 'rating': None, 'views': None, 'direct': True}
                    if hoster['host'] == 'gvideo':
                        hoster['quality'] = scraper_utils.gv_get_quality(hoster['url'])
                    hosters.append(hoster)
            
            sources = dom_parser.parse_dom(html, 'iframe', ret='src')
            for src in sources:
                if 'facebook' in src: continue
                host = urlparse.urlparse(src).hostname
                hoster = {'multi-part': False, 'url': src, 'class': self, 'quality': QUALITIES.HIGH, 'host': host, 'rating': None, 'views': None, 'direct': False}
                hosters.append(hoster)
                
            match = re.search('proxy\.link=([^"&]+)', html)
            if match:
                proxy_link = match.group(1)
                proxy_link = proxy_link.split('*', 1)[-1]
                stream_url = scraper_utils.gk_decrypt(self.get_name(), GK_KEY, proxy_link)
                if 'vk.com' in stream_url.lower():
                    hoster = {'multi-part': False, 'host': 'vk.com', 'class': self, 'url': stream_url, 'quality': QUALITIES.HD720, 'views': None, 'rating': None, 'direct': False}
                    hosters.append(hoster)
                if 'picasaweb' in stream_url.lower():
                    for source in self._parse_google(stream_url):
                        quality = scraper_utils.gv_get_quality(source)
                        hoster = {'multi-part': False, 'url': source, 'class': self, 'quality': quality, 'host': self._get_direct_hostname(source), 'rating': None, 'views': None, 'direct': True}
                        hosters.append(hoster)
                if 'docs.google' in stream_url.lower():
                    for source in self._parse_google(stream_url):
                        quality = scraper_utils.gv_get_quality(source)
                        hoster = {'multi-part': False, 'url': source, 'class': self, 'quality': quality, 'host': self._get_direct_hostname(source), 'rating': None, 'views': None, 'direct': True}
                        hosters.append(hoster)
                
        return hosters
    def get_sources(self, video):
        source_url = self.get_url(video)
        sources = {}
        hosters = []
        if source_url and source_url != FORCE_NO_MATCH:
            url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(url, cache_limit=.5)
            for match in re.finditer("embeds\[(\d+)\]\s*=\s*'([^']+)", html):
                match = re.search('src="([^"]+)', match.group(2))
                if match:
                    iframe_url = match.group(1)
                    if 'play-en.php' in iframe_url:
                        match = re.search('id=([^"&]+)', iframe_url)
                        if match:
                            proxy_link = match.group(1)
                            proxy_link = proxy_link.split('*', 1)[-1]
                            picasa_url = scraper_utils.gk_decrypt(self.get_name(), GK_KEY, proxy_link)
                            for stream_url in self._parse_google(picasa_url):
                                sources[stream_url] = {'quality': scraper_utils.gv_get_quality(stream_url), 'direct': True}
                    else:
                        html = self._http_get(iframe_url, cache_limit=.25)
                        temp_sources = self._parse_sources_list(html)
                        for source in temp_sources:
                            if 'download.php' in source:
                                redir_html = self._http_get(source, allow_redirect=False, method='HEAD', cache_limit=0)
                                if redir_html.startswith('http'):
                                    temp_sources[redir_html] = temp_sources[source]
                                    del temp_sources[source]
                        sources.update(temp_sources)
                                
        for source in sources:
            host = self._get_direct_hostname(source)
            stream_url = source + '|User-Agent=%s' % (scraper_utils.get_ua())
            if host == 'gvideo':
                quality = scraper_utils.gv_get_quality(source)
            else:
                quality = QUALITY_MAP.get(sources[source]['quality'], QUALITIES.HIGH)
            hoster = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True}
            hosters.append(hoster)

        return hosters
Ejemplo n.º 15
0
    def get_sources(self, video):
        source_url = self.get_url(video)
        sources = []
        stream_urls = []
        if source_url and source_url != FORCE_NO_MATCH:
            url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(url, cache_limit=.5)
            for match in re.finditer("embeds\[(\d+)\]\s*=\s*'([^']+)", html):
                match = re.search('src="([^"]+)', match.group(2))
                if match:
                    iframe_url = match.group(1)
                    if 'play-en.php' in iframe_url:
                        match = re.search('id=([^"&]+)', iframe_url)
                        if match:
                            proxy_link = match.group(1)
                            proxy_link = proxy_link.split('*', 1)[-1]
                            picasa_url = scraper_utils.gk_decrypt(self.get_name(), GK_KEY, proxy_link)
                            stream_urls += self._parse_google(picasa_url)
                    else:
                        html = self._http_get(iframe_url, cache_limit=0)
                        match = re.search('sources\s*:\s*\[(.*?)\]', html, re.DOTALL)
                        if match:
                            for match in re.finditer('''['"]*file['"]*\s*:\s*['"]*([^'"]+).*?['"]*label['"]*\s*:\s*['"]*([^'"]+)''', match.group(1), re.DOTALL):
                                stream_url, label = match.groups()
                                if 'download.php' in stream_url:
                                    redir_html = self._http_get(stream_url, allow_redirect=False, cache_limit=0)
                                    if stream_url.startswith('http'): stream_url = redir_html
                                stream_urls.append(stream_url)
                
        for stream_url in list(set(stream_urls)):
            host = self._get_direct_hostname(stream_url)
            if host == 'gvideo':
                quality = scraper_utils.gv_get_quality(stream_url)
            else:
                quality = QUALITY_MAP.get(label, QUALITIES.HIGH)
            stream_url += '|User-Agent=%s' % (scraper_utils.get_ua())
            source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True}
            sources.append(source)

        return sources