コード例 #1
0
 def sources(self, url, hostDict, hostprDict):
     try:
         sources = []
         if url == None: return sources
         html = client.request(url)
         links = re.compile('<iframe width=".+?src="(.+?)"',
                            re.DOTALL).findall(html)
         for link in links:
             quality, info = source_utils.get_release_quality(link, url)
             host = link.split('//')[1].replace('www.', '')
             host = host.split('/')[0].split('.')[0].title()
             sources.append({
                 'source': host,
                 'quality': quality,
                 'language': 'en',
                 'url': link,
                 'direct': False,
                 'debridonly': False
             })
         return sources
     except:
         failure = traceback.format_exc()
         log_utils.log('MyBobMovies - sources - Exception: \n' +
                       str(failure))
         return sources
コード例 #2
0
    def sources(self, url, hostDict, hostprDict):
        sources = []

        try:
            if not url:
                return sources

            query = urlparse.urljoin(self.base_link, url)

            r = client.request(query)

            quality = dom_parser.parse_dom(
                r, 'span',
                attrs={'id': 'release_text'})[0].content.split('&nbsp;')[0]
            quality, info = source_utils.get_release_quality(quality)

            r = dom_parser.parse_dom(r,
                                     'ul',
                                     attrs={'class': 'currentStreamLinks'})
            r = [(dom_parser.parse_dom(i, 'p', attrs={'class': 'hostName'}),
                  dom_parser.parse_dom(i,
                                       'a',
                                       attrs={'class': 'stream-src'},
                                       req='data-id')) for i in r]
            r = [(re.sub(' hd$', '', i[0][0].content.lower()),
                  [x.attrs['data-id'] for x in i[1]]) for i in r
                 if i[0] and i[1]]

            for hoster, id in r:
                valid, hoster = source_utils.is_host_valid(hoster, hostDict)
                if not valid: continue

                sources.append({
                    'source':
                    hoster,
                    'quality':
                    quality,
                    'language':
                    'de',
                    'info':
                    ' | '.join(info + ['' if len(id) == 1 else 'multi-part']),
                    'url':
                    id,
                    'direct':
                    False,
                    'debridonly':
                    False,
                    'checkquality':
                    True
                })

            return sources
        except:
            return sources
コード例 #3
0
ファイル: seriescr.py プロジェクト: puppyrambo18/rambo-repo
    def sources(self, url, hostDict, hostprDict):
        try:
            sources = []

            if url is None: return sources

            data = urlparse.parse_qs(url)
            data = dict([(i, data[i][0]) if data[i] else (i, '')
                         for i in data])

            hdlr = 'S%02dE%02d' % (int(data['season']), int(data['episode']))
            query = '%s S%02dE%02d' % (data['tvshowtitle'], int(
                data['season']), int(data['episode']))
            query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query)
            url = self.search_link % urllib.quote_plus(query)
            r = urlparse.urljoin(self.base_link, url)
            r = client.request(r)
            r = client.parseDOM(r, 'item')
            title = client.parseDOM(r, 'title')[0]
            if hdlr in title:
                r = re.findall(
                    '<h3.+?>(.+?)</h3>\s*<h5.+?<strong>(.+?)</strong.+?h3.+?adze.+?href="(.+?)">.+?<h3',
                    r[0], re.DOTALL)
                for name, size, url in r:
                    quality, info = source_utils.get_release_quality(name, url)
                    try:
                        size = re.sub('i', '', size)
                        div = 1 if size.endswith(('GB', 'GiB')) else 1024
                        size = float(re.sub('[^0-9|/.|/,]', '', size)) / div
                        size = '%.2f GB' % size
                        info.append(size)
                    except:
                        pass

                    info = ' | '.join(info)

                    valid, host = source_utils.is_host_valid(url, hostDict)
                    sources.append({
                        'source': host,
                        'quality': quality,
                        'language': 'en',
                        'url': url,
                        'info': info,
                        'direct': False,
                        'debridonly': True
                    })
            return sources
        except:
            failure = traceback.format_exc()
            log_utils.log('SeriesCR - Exception: \n' + str(failure))
            return sources
コード例 #4
0
    def sources(self, url, hostDict, hostprDict):
        sources = []

        try:
            if not url:
                return sources

            query = urlparse.urljoin(self.base_link, url)
            r = client.request(query)

            r = dom_parser.parse_dom(r, 'div', attrs={'id': 'tab-plot_german'})
            r = dom_parser.parse_dom(r, 'tbody')
            r = dom_parser.parse_dom(r, 'tr')

            for i in r:
                if re.search('(?<=">)(\n.*?)(?=<\/a>)', i[1]).group().strip():
                    hoster = re.search('(?<=">)(\n.*?)(?=<\/a>)',
                                       i[1]).group().strip()
                    link = re.search('(?<=href=\")(.*?)(?=\")', i[1]).group()
                    rel = re.search(
                        '(?<=oddCell qualityCell">)(\n.*?)(?=<\/td>)',
                        i[1]).group().strip()
                    quality, info = source_utils.get_release_quality(rel)
                    if not quality:
                        quality = 'SD'

                    valid, hoster = source_utils.is_host_valid(
                        hoster, hostDict)
                    if not valid: continue

                    sources.append({
                        'source': hoster,
                        'quality': quality,
                        'language': 'de',
                        'url': link,
                        'direct': False,
                        'debridonly': False
                    })

            return sources
        except:
            return sources
コード例 #5
0
    def sources(self, url, hostDict, hostprDict):
        sources = []

        try:
            if not url:
                return sources

            url = urlparse.urljoin(self.base_link, url)

            r = client.request(url)

            rel = dom_parser.parse_dom(r, 'div', attrs={'id': 'info'})
            rel = dom_parser.parse_dom(rel, 'div', attrs={'itemprop': 'description'})
            rel = dom_parser.parse_dom(rel, 'p')
            rel = [re.sub('<.+?>|</.+?>', '', i.content) for i in rel]
            rel = [re.findall('release:\s*(.*)', i, re.I) for i in rel]
            rel = [source_utils.get_release_quality(i[0]) for i in rel if i]
            quality, info = (rel[0]) if rel else ('SD', [])

            r = dom_parser.parse_dom(r, 'div', attrs={'id': 'links'})
            r = dom_parser.parse_dom(r, 'table')
            r = dom_parser.parse_dom(r, 'tr', attrs={'id': re.compile('\d+')})
            r = [dom_parser.parse_dom(i, 'td') for i in r]
            r = [(i[0], re.sub('<.+?>|</.+?>', '', i[1].content).strip()) for i in r if len(r) >= 1]
            r = [(dom_parser.parse_dom(i[0], 'a', req='href'), i[1]) for i in r]
            r = [(i[0][0].attrs['href'], i[1]) for i in r if i[0]]

            info = ' | '.join(info)

            for link, hoster in r:
                valid, hoster = source_utils.is_host_valid(hoster, hostDict)
                if not valid: continue

                sources.append({'source': hoster, 'quality': quality, 'language': 'de', 'url': link, 'info': info, 'direct': False, 'debridonly': False, 'checkquality': True})

            return sources
        except:
            return sources
コード例 #6
0
 def sources(self, url, hostDict, hostprDict):
     try:
         sources = []
         if url == None: return sources
         if debrid.status() is False: raise Exception()
         data = urlparse.parse_qs(url)
         data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data])
         title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title']
         hdlr = 'S%02dE%02d' % (int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else data['year']
         query = '%s S%02dE%02d' % (
             data['tvshowtitle'], int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else '%s %s' % (
             data['title'], data['year'])
         query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query)
         url = self.search_link % urllib.quote_plus(query)
         url = urlparse.urljoin(self.base_link, url)
         html = client.request(url)
         posts = client.parseDOM(html, 'item')
         hostDict = hostprDict + hostDict
         items = []
         for post in posts:
             try:
                 t = client.parseDOM(post, 'title')[0]
                 u = client.parseDOM(post, 'a', ret='href')
                 s = re.search('((?:\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|MB|MiB))', post)
                 s = s.groups()[0] if s else '0'
                 items += [(t, i, s) for i in u]
             except:
                 pass
         for item in items:
             try:
                 url = item[1]
                 if any(x in url for x in ['.rar', '.zip', '.iso']): raise Exception()
                 url = client.replaceHTMLCodes(url)
                 url = url.encode('utf-8')
                 valid, host = source_utils.is_host_valid(url, hostDict)
                 if not valid: raise Exception()
                 host = client.replaceHTMLCodes(host)
                 host = host.encode('utf-8')
                 name = item[0]
                 name = client.replaceHTMLCodes(name)
                 t = re.sub('(\.|\(|\[|\s)(\d{4}|S\d*E\d*|S\d*|3D)(\.|\)|\]|\s|)(.+|)', '', name, flags=re.I)
                 if not cleantitle.get(t) == cleantitle.get(title): raise Exception()
                 y = re.findall('[\.|\(|\[|\s](\d{4}|S\d*E\d*|S\d*)[\.|\)|\]|\s]', name)[-1].upper()
                 if not y == hdlr: raise Exception()
                 quality, info = source_utils.get_release_quality(name, url)
                 try:
                     size = re.findall('((?:\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|MB|MiB))', item[2])[-1]
                     div = 1 if size.endswith(('GB', 'GiB')) else 1024
                     size = float(re.sub('[^0-9|/.|/,]', '', size)) / div
                     size = '%.2f GB' % size
                     info.append(size)
                 except:
                     pass
                 info = ' | '.join(info)
                 sources.append({'source': host, 'quality': quality, 'language': 'en', 'url': url, 'info': info,
                                 'direct': False, 'debridonly': True})
             except:
                 pass
         check = [i for i in sources if not i['quality'] == 'CAM']
         if check: sources = check
         return sources
     except:
         return
コード例 #7
0
ファイル: rlsscn.py プロジェクト: danny1990/ScraperPackages
    def sources(self, url, hostDict, hostprDict):
        try:
            hostDict = hostDict + hostprDict
        
            log_utils.log("RLSSCN debug")
            
            sources      = []
            query_bases  = []
            options      = []
            html         = None

            log_utils.log("RLSSCN url : "+ str(url))

            if url == None: return sources

            

            data = url
            #data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data])        
            title = (data['tvshowtitle'] if 'tvshowtitle' in data else data['title'])
            hdlr = 'S%02dE%02d' % (int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else data['year']
            premDate = ''

            # tvshowtitle
            if 'tvshowtitle' in data: 
                query_bases.append('%s ' % (data['tvshowtitle']))  # (ex 9-1-1 become 911)
                # tvshowtitle + year (ex Titans-2018-s01e1 or Insomnia-2018-S01)
                query_bases.append('%s %s ' % (data['tvshowtitle'], data['year']))

                # season and episode (classic)
                options.append('S%02dE%02d' % (int(data['season']), int(url['episode'])))
                # season space episode (for double episode like S02E02-E03)
                options.append('S%02d E%02d' % (int(data['season']), int(data['episode'])))
                # season and episode1 - epsiode2 (two episodes at a time)
                #options.append('S%02dE%02d-E%02d' % (int(data['season']), int(data['episode']),   int(data['episode'])+1))
                #options.append('S%02dE%02d-E%02d' % (int(data['season']), int(data['episode'])-1, int(data['episode'])))
                # season only (ex ozark-S02, group of episodes)
                options.append('S%02d' % (int(data['season'])))

                html = self.search(query_bases, options)

            else:
                #log_utils.log("RLSSCN Movie")
                #  Movie
                query_bases.append('%s ' % (data['title']))
                options.append('%s' % (data['year']))
                html = self.search(query_bases, options)
       
            # this split is based on TV shows, soooo... might screw up movies
            # grab the relevent div and chop off the footer
            if html != None:
            	html = client.parseDOM(html, "div", attrs={"id": "content"})[0]
            	html = re.sub('class="wp-post-navigation.+','', html, flags=re.DOTALL)
            	sects = html.split('<p>')

            	log_utils.log("RLSSCN html links : " + str(sects))

            	for sect in sects:
                	hrefs = client.parseDOM(sect, "a", attrs={"class": "autohyperlink"}, ret='href')
                	if not hrefs: continue
        
                	# filenames (with useful info) seem predictably located
                	try: fn = re.match('(.+?)</strong>',sect).group(1)
                	except: fn = ''
                	log_utils.log('*** fn: %s' % fn)
            
                	# sections under filenames usually have sizes (for tv at least)
                	size = ""
                	try: 
                    	size = re.findall('([0-9,\.]+ ?(?:GB|GiB|MB|MiB))', sect)[0]
                    	div = 1 if size.endswith(('GB', 'GiB')) else 1024
                    	size = float(re.sub('[^0-9\.]', '', size)) / div
                    	size = '%.2f GB' % size
                	except: pass
            
                	for url in hrefs:
                    	quality, info = source_utils.get_release_quality(url,fn)
                    	info.append(size)
                    	info = ' | '.join(info)
                    	log_utils.log(' ** (%s %s) url=%s' % (quality,info,url)) #~~~~~~~~~~~

                    	url = url.encode('utf-8')
                    	hostDict = hostDict + hostprDict

                    	valid, host = source_utils.is_host_valid(url, hostDict)
                    	if not valid: continue
                
                    	log_utils.log(' ** VALID! (host=%s)' % host) #~~~~~~~~~~~~~~~
                    	sources.append({'source': host, 'quality': quality, 'language': 'en', 'url': url,
                                		'info': info, 'direct': False, 'debridonly': False})

            return sources
        except:
            log_utils.log("RLSSCN oups..." + str(traceback.format_exc()))
コード例 #8
0
    def sources(self, url, hostDict, hostprDict):
        try:
            sources = []
            scraper = cfscrape.create_scraper()

            if url == None: return sources

            data = urlparse.parse_qs(url)
            data = dict([(i, data[i][0]) if data[i] else (i, '')
                         for i in data])

            title = data['tvshowtitle'] if 'tvshowtitle' in data else data[
                'title']

            hdlr = 'S%02dE%02d' % (int(data['season']), int(
                data['episode'])) if 'tvshowtitle' in data else data['year']

            query = '%s s%02de%02d' % (
                data['tvshowtitle'], int(data['season']),
                int(data['episode'])) if 'tvshowtitle' in data else '%s %s' % (
                    data['title'], data['year'])
            query = re.sub('[\\\\:;*?"<>|/ \+\']+', '-', query)

            url = self.search_link % urllib.quote_plus(query)
            url = urlparse.urljoin(self.base_link, url)
            #log_utils.log('\n\n\n\n\n\nquery, url: %s, %s' % (query,url))
            r = scraper.get(url).content

            # grab the (only?) relevant div and cut off the footer
            r = client.parseDOM(r, "div", attrs={'class': 'entry-content'})[0]
            r = re.sub('shareaholic-canvas.+', '', r, flags=re.DOTALL)

            # gather actual <a> links then clear all <a>/<img> to prep for naked-url scan
            # inner text could be useful if url looks like http://somehost.com/ugly_hash_377cbc738eff
            a_txt = ''
            a_url = ''
            a_txt = client.parseDOM(r, "a", attrs={'href': '.+?'})
            a_url = client.parseDOM(r, "a", ret="href")
            r = re.sub('<a .+?</a>', '', r, flags=re.DOTALL)
            r = re.sub('<img .+?>', '', r, flags=re.DOTALL)

            # check pre blocks for size and gather naked-urls
            size = ''
            pre_txt = []
            pre_url = []
            pres = client.parseDOM(r, "pre", attrs={'style': '.+?'})
            for pre in pres:
                try:
                    size = re.findall('([0-9,\.]+ ?(?:GB|GiB|MB|MiB))', pre)[0]
                except:
                    pass

                url0 = re.findall(
                    'https?://[^ <"\'\s]+', pre,
                    re.DOTALL)  # bad form but works with this site
                txt0 = [size] * len(url0)
                pre_url = pre_url + url0
                pre_txt = pre_txt + txt0  # we're just grabbing raw urls so there's no other info

            r = re.sub('<pre .+?</pre>', '', r, flags=re.DOTALL)

            # assume info at page top is true for all movie links, and only movie links
            #  (and that otherwise, only <pre>'s have scrapable sizes)
            size = ''
            if not 'tvshowtitle' in data:
                try:
                    size = " " + re.findall('([0-9,\.]+ ?(?:GB|GiB|MB|MiB))',
                                            r)[0]
                except:
                    pass

            # get naked urls (after exhausting <a>'s and <pre>'s)
            # note: all examples use full titles in links, so we can be careful
            raw_url = re.findall(
                'https?://[^ <"\'\s]+', r,
                re.DOTALL)  # bad form but works with this site
            raw_txt = [size] * len(
                raw_url
            )  # we're just grabbing raw urls so there's no other info

            # combine the 3 types of scrapes
            pairs = zip(a_url + pre_url + raw_url, a_txt + pre_txt + raw_txt)

            for pair in pairs:
                try:
                    url = str(pair[0])
                    info = re.sub(
                        '<.+?>', '',
                        pair[1])  #+ size  # usually (??) no <span> inside

                    # immediately abandon pairs with undesired traits
                    #  (if they stop using urls w/ titles, would need to accomodate here)
                    if any(x in url for x in ['.rar', '.zip', '.iso']):
                        raise Exception()
                    if not query.lower() in re.sub('[\\\\:;*?"<>|/ \+\'\.]+',
                                                   '-', url + info).lower():
                        raise Exception()

                    # establish size0 for this pair: 'size' is pre-loaded for movies only...
                    #  ...but prepend 'info' to lead with more-specific sizes (from a <pre>)
                    size0 = info + " " + size

                    # grab first reasonable data size from size0 string
                    try:
                        size0 = re.findall('([0-9,\.]+ ?(?:GB|GiB|MB|MiB))',
                                           size0)[0]
                        div = 1 if size0.endswith(('GB', 'GiB')) else 1024
                        size0 = float(re.sub('[^0-9\.]', '', size0)) / div
                        size0 = '%.2f GB' % size0
                    except:
                        size0 = ''
                        pass

                    # process through source_tools and hint with size0
                    quality, info = source_utils.get_release_quality(url, info)
                    info.append(size0)
                    info = ' | '.join(info)
                    #log_utils.log('** pair: [%s / %s] %s' % (quality,info,url))

                    url = url.encode('utf-8')
                    hostDict = hostDict + hostprDict

                    valid, host = source_utils.is_host_valid(url, hostDict)
                    if not valid: continue
                    sources.append({
                        'source': host,
                        'quality': quality,
                        'language': 'en',
                        'url': url,
                        'info': info,
                        'direct': False,
                        'debridonly': True
                    })

                except:
                    pass

            return sources
        except:
            return sources
コード例 #9
0
ファイル: furk.py プロジェクト: danny1990/ScraperPackages
    def sources(self, url, hostDict, hostprDict):

        api_key = self.get_api()

        if not api_key:
            return

        sources = []

        try:

            content_type = 'episode' if 'tvshowtitle' in url else 'movie'
            match = 'extended'
            moderated = 'no' if content_type == 'episode' else 'yes'
            search_in = ''

            if content_type == 'movie':
                title = url['title'].replace(':', ' ').replace(' ',
                                                               '+').replace(
                                                                   '&', 'and')
                title = title.replace("'", "")
                year = url['year']
                link = '@name+%s+%s+@files+%s+%s' \
                        % (title, year, title, year)

            elif content_type == 'episode':
                title = url['tvshowtitle'].replace(':', ' ').replace(
                    ' ', '+').replace('&', 'and')
                season = int(url['season'])
                episode = int(url['episode'])
                season00_ep00_SE = 's%02de%02d' % (season, episode)
                season0_ep0_SE = 's%de%d' % (season, episode)
                season00_ep00_X = '%02dx%02d' % (season, episode)
                season0_ep0_X = '%dx%d' % (season, episode)
                season0_ep00_X = '%dx%02d' % (season, episode)
                link = '@name+%s+@files+%s+|+%s+|+%s+|+%s+|+%s' \
                        % (title, season00_ep00_SE, season0_ep0_SE, season00_ep00_X, season0_ep0_X, season0_ep00_X)

            s = requests.Session()
            link = (
                self.base_link + self.meta_search_link %
                (api_key, link, match, moderated, search_in, self.search_limit)
            )

            p = s.get(link)
            p = json.loads(p.text)

            if p['status'] != 'ok':
                return

            files = p['files']

            for i in files:
                if i['is_ready'] == '1' and i['type'] == 'video':
                    try:
                        source = 'SINGLE'
                        if int(i['files_num_video']) > 3:
                            source = 'PACK [B](x%02d)[/B]' % int(
                                i['files_num_video'])
                        file_name = i['name']
                        file_id = i['id']
                        file_dl = i['url_dl']
                        if content_type == 'episode':
                            url = '%s<>%s<>%s<>%s<>%s<>%s' % (
                                file_id, season00_ep00_SE, season0_ep0_SE,
                                season00_ep00_X, season0_ep0_X, season0_ep00_X)
                            details = self.details(file_name, i['size'],
                                                   i['video_info'])
                        else:
                            url = '%s<>%s<>%s+%s' % (file_id, 'movie', title,
                                                     year)
                            details = self.details(file_name, i['size'],
                                                   i['video_info']).split('|')
                            details = details[0] + ' | ' + file_name.replace(
                                '.', ' ')

                        quality = source_utils.get_release_quality(
                            file_name, file_dl)
                        sources.append({
                            'source': source,
                            'quality': quality[0],
                            'language': "en",
                            'url': url,
                            'info': details,
                            'direct': True,
                            'debridonly': False
                        })
                    except:
                        pass

                else:
                    continue

            return sources

        except:
            print("Unexpected error in Furk Script: source", sys.exc_info()[0])
            exc_type, exc_obj, exc_tb = sys.exc_info()
            print(exc_type, exc_tb.tb_lineno)
            pass
コード例 #10
0
    def sources(self, url, hostDict, hostprDict):

        hostDict = hostDict + hostprDict

        sources = []

        # they use a peculiar full-word url scheme for tv eps query
        # also prepare here a by-date scheme query
        if 'tvshowtitle' in url:
            request2 = '%s %s' % (url['tvshowtitle'],
                                  re.sub('\D+', '-', url['premiered']))
            request2 = self.base_link + self.search_link % re.sub(
                '\W+', '-', request2)
            log_utils.log('*** request2: %s' % request2)

            request = '%s season %s episode %s' % (
                url['tvshowtitle'], int(url['season']), int(url['episode']))
        else:
            request = '%s %s' % (url['title'], url['year'])

        request = self.base_link + self.search_link % re.sub(
            '\W+', '-', request)
        log_utils.log('***	request: %s' % request)

        # pull html but if ep got a 404, try ep by premiere date
        html = client.request(request)
        if html == None and 'tvshowtitle' in url:
            html = client.request(request2)

        # grab the relevent div and chop off the footer
        html = client.parseDOM(html, "div", attrs={"id": "content"})[0]
        html = re.sub('class="crp_related.+', '', html, flags=re.DOTALL)

        # pre-load *some* size (for movies, mostly)
        try:
            size0 = re.findall('([0-9,\.]+ ?(?:GB|GiB|MB|MiB))', html)[0]
        except:
            size0 = ''

        # this split is based on TV shows, soooo... might screw up movies
        sects = html.split('<strong>')

        for sect in sects:
            hrefs = client.parseDOM(sect,
                                    "a",
                                    attrs={"class": "autohyperlink"},
                                    ret='href')
            if not hrefs: continue

            # filenames (with useful info) seem predictably located
            try:
                fn = re.match('(.+?)</strong>', sect).group(1)
            except:
                fn = ''
            log_utils.log('*** fn: %s' % fn)

            # sections under filenames usually have sizes (for tv at least)
            try:
                size = re.findall('([0-9,\.]+ ?(?:GB|GiB|MB|MiB))', sect)[0]
                div = 1 if size.endswith(('GB', 'GiB')) else 1024
                size = float(re.sub('[^0-9\.]', '', size)) / div
                size = '%.2f GB' % size
            except:
                size = size0

            for url in hrefs:
                quality, info = source_utils.get_release_quality(url, fn)
                info.append(size)
                info = ' | '.join(info)
                log_utils.log(' ** (%s %s) url=%s' %
                              (quality, info, url))  #~~~~~~~~~~~

                url = url.encode('utf-8')
                hostDict = hostDict + hostprDict

                valid, host = source_utils.is_host_valid(url, hostDict)
                if not valid: continue

                log_utils.log(' ** VALID! (host=%s)' % host)  #~~~~~~~~~~~~~~~
                #sources.append({'source': host, 'quality': quality, 'language': 'en', 'url': url,
                #				'info': info, 'direct': False, 'debridonly': False})

        return sources
コード例 #11
0
ファイル: 300mbfilms.py プロジェクト: puppyrambo18/rambo-repo
    def sources(self, url, hostDict, hostprDict):
        try:
            sources = []

            if url is None: return sources

            data = urlparse.parse_qs(url)
            data = dict([(i, data[i][0]) if data[i] else (i, '')
                         for i in data])

            title = data['tvshowtitle'] if 'tvshowtitle' in data else data[
                'title']

            hdlr = 'S%02dE%02d' % (int(data['season']), int(
                data['episode'])) if 'tvshowtitle' in data else data['year']

            query = '%s S%02dE%02d' % (
                data['tvshowtitle'], int(data['season']),
                int(data['episode'])) if 'tvshowtitle' in data else '%s %s' % (
                    data['title'], data['year'])
            query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query)

            url = self.search_link % urllib.quote_plus(query)
            url = urlparse.urljoin(self.base_link, url)

            r = client.request(url)

            posts = client.parseDOM(r, 'item')

            hostDict = hostprDict + hostDict

            items = []

            for post in posts:
                try:
                    t = client.parseDOM(post, 'title')[0]
                    u = client.parseDOM(post, 'link')[0]
                    s = re.findall(
                        '((?:\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', t)
                    s = s[0] if s else '0'

                    items += [(t, u, s)]

                except:
                    pass

            urls = []
            for item in items:

                try:
                    name = item[0]
                    name = client.replaceHTMLCodes(name)

                    t = re.sub(
                        '(\.|\(|\[|\s)(\d{4}|S\d*E\d*|S\d*|3D)(\.|\)|\]|\s|)(.+|)',
                        '', name)

                    if not cleantitle.get(t) == cleantitle.get(title):
                        raise Exception()

                    y = re.findall(
                        '[\.|\(|\[|\s](\d{4}|S\d*E\d*|S\d*)[\.|\)|\]|\s]',
                        name)[-1].upper()

                    if not y == hdlr: raise Exception()

                    quality, info = source_utils.get_release_quality(
                        name, item[1])
                    if any(x in quality for x in ['CAM', 'SD']): continue

                    try:
                        size = re.sub('i', '', item[2])
                        div = 1 if size.endswith('GB') else 1024
                        size = float(re.sub('[^0-9|/.|/,]', '', size)) / div
                        size = '%.2f GB' % size
                        info.append(size)
                    except:
                        pass

                    info = ' | '.join(info)

                    url = item[1]
                    links = self.links(url)
                    urls += [(i, quality, info) for i in links]

                except:
                    pass

            for item in urls:

                if 'earn-money' in item[0]: continue
                if any(x in item[0] for x in ['.rar', '.zip', '.iso']):
                    continue
                url = client.replaceHTMLCodes(item[0])
                url = url.encode('utf-8')

                valid, host = source_utils.is_host_valid(url, hostDict)
                if not valid: continue
                host = client.replaceHTMLCodes(host)
                host = host.encode('utf-8')

                sources.append({
                    'source': host,
                    'quality': item[1],
                    'language': 'en',
                    'url': url,
                    'info': item[2],
                    'direct': False,
                    'debridonly': True
                })

            return sources
        except:
            return sources
コード例 #12
0
ファイル: iload.py プロジェクト: danny1990/ScraperPackages
    def sources(self, url, hostDict, hostprDict):
        sources = []

        try:
            if not url:
                return sources

            query = urlparse.urljoin(self.base_link, url)

            r = client.request(query)
            r = dom_parser.parse_dom(r, 'div', attrs={'id': 'Module'})
            r = [(r,
                  dom_parser.parse_dom(
                      r,
                      'a',
                      attrs={
                          'href': re.compile('[^\'"]*xrel_search_query[^\'"]*')
                      },
                      req='href'))]
            r = [(i[0], i[1][0].attrs['href'] if i[1] else '') for i in r]

            rels = dom_parser.parse_dom(
                r[0][0],
                'a',
                attrs={'href': re.compile('[^\'"]*ReleaseList[^\'"]*')},
                req='href')
            if rels and len(rels) > 1:
                r = []
                for rel in rels:
                    relData = client.request(
                        urlparse.urljoin(self.base_link, rel.attrs['href']))
                    relData = dom_parser.parse_dom(
                        relData, 'table', attrs={'class': 'release-list'})
                    relData = dom_parser.parse_dom(relData,
                                                   'tr',
                                                   attrs={'class': 'row'})
                    relData = [
                        (dom_parser.parse_dom(
                            i,
                            'td',
                            attrs={
                                'class': re.compile('[^\'"]*list-name[^\'"]*')
                            }),
                         dom_parser.parse_dom(i,
                                              'img',
                                              attrs={'class': 'countryflag'},
                                              req='alt'),
                         dom_parser.parse_dom(i,
                                              'td',
                                              attrs={'class':
                                                     'release-types'}))
                        for i in relData
                    ]
                    relData = [(i[0][0].content, i[1][0].attrs['alt'].lower(),
                                i[2][0].content) for i in relData
                               if i[0] and i[1] and i[2]]
                    relData = [(i[0], i[2]) for i in relData
                               if i[1] == 'deutsch']
                    relData = [(i[0],
                                dom_parser.parse_dom(
                                    i[1],
                                    'img',
                                    attrs={'class': 'release-type-stream'}))
                               for i in relData]
                    relData = [i[0] for i in relData if i[1]]
                    #relData = dom_parser.parse_dom(relData, 'a', req='href')[:3]
                    relData = dom_parser.parse_dom(relData, 'a', req='href')

                    for i in relData:
                        i = client.request(
                            urlparse.urljoin(self.base_link, i.attrs['href']))
                        i = dom_parser.parse_dom(i,
                                                 'div',
                                                 attrs={'id': 'Module'})
                        i = [(i,
                              dom_parser.parse_dom(
                                  i,
                                  'a',
                                  attrs={
                                      'href':
                                      re.compile(
                                          '[^\'"]*xrel_search_query[^\'"]*')
                                  },
                                  req='href'))]
                        r += [(x[0], x[1][0].attrs['href'] if x[1] else '')
                              for x in i]

            r = [(dom_parser.parse_dom(i[0],
                                       'div',
                                       attrs={'id':
                                              'ModuleReleaseDownloads'}), i[1])
                 for i in r]
            r = [(dom_parser.parse_dom(
                i[0][0],
                'a',
                attrs={'class': re.compile('.*-stream.*')},
                req='href'), i[1]) for i in r if len(i[0]) > 0]

            for items, rel in r:
                rel = urlparse.urlparse(rel).query
                rel = urlparse.parse_qs(rel)['xrel_search_query'][0]

                quality, info = source_utils.get_release_quality(rel)

                items = [(i.attrs['href'], i.content) for i in items]
                items = [(i[0], dom_parser.parse_dom(i[1], 'img', req='src'))
                         for i in items]
                items = [(i[0], i[1][0].attrs['src']) for i in items if i[1]]
                items = [(i[0], re.findall('.+/(.+\.\w+)\.\w+', i[1]))
                         for i in items]
                items = [(i[0], i[1][0]) for i in items if i[1]]

                info = ' | '.join(info)

                for link, hoster in items:
                    valid, hoster = source_utils.is_host_valid(
                        hoster, hostDict)
                    if not valid: continue

                    sources.append({
                        'source': hoster,
                        'quality': quality,
                        'language': 'de',
                        'url': link,
                        'info': info,
                        'direct': False,
                        'debridonly': False,
                        'checkquality': True
                    })

            return sources
        except:
            return sources
コード例 #13
0
ファイル: scenerls.py プロジェクト: danny1990/ScraperPackages
    def sources(self, url, hostDict, hostprDict):
        try:
            sources = []
            if url == None: return sources

            hostDict = hostprDict + hostDict
            data = urlparse.parse_qs(url)
            data = dict([(i, data[i][0]) if data[i] else (i, '')
                         for i in data])
            title = data['tvshowtitle'] if 'tvshowtitle' in data else data[
                'title']
            hdlr = '%sS%02dE%02d' % (
                data['year'], int(data['season']), int(data['episode'])
            ) if 'tvshowtitle' in data else data['year']
            query = '%s %s S%02dE%02d' % (
                data['tvshowtitle'], data['year'], int(data['season']),
                int(data['episode'])) if 'tvshowtitle' in data else '%s %s' % (
                    data['title'], data['year'])
            query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query)
            try:
                url = self.search_link % urllib.quote_plus(query)
                url = urlparse.urljoin(self.base_link, url)
                r = self.scraper.get(url).content
                posts = client.parseDOM(r, 'div', attrs={'class': 'post'})
                items = []
                dupes = []
                for post in posts:
                    try:
                        t = client.parseDOM(post, 'a')[0]
                        t = re.sub('<.+?>|</.+?>', '', t)
                        x = re.sub(
                            '(\.|\(|\[|\s)(\d{4}|S\d*E\d*|S\d*|3D)(\.|\)|\]|\s|)(.+|)',
                            '', t)
                        if not cleantitle.get(title) in cleantitle.get(x):
                            raise Exception()
                        y = re.findall(
                            '[\.|\(|\[|\s](\d{4}|S\d*E\d*|S\d*)[\.|\)|\]|\s]',
                            t)[-1].upper()
                        if not y == hdlr: raise Exception()
                        fmt = re.sub(
                            '(.+)(\.|\(|\[|\s)(\d{4}|S\d*E\d*|S\d*)(\.|\)|\]|\s)',
                            '', t.upper())
                        fmt = re.split('\.|\(|\)|\[|\]|\s|\-', fmt)
                        fmt = [i.lower() for i in fmt]
                        #if not any(i in ['1080p', '720p'] for i in fmt): raise Exception()
                        if len(dupes) > 2: raise Exception()
                        dupes += [x]
                        u = client.parseDOM(post, 'a', ret='href')[0]
                        r = self.scraper.get(u).content
                        u = client.parseDOM(r, 'a', ret='href')
                        u = [(i.strip('/').split('/')[-1], i) for i in u]
                        items += u
                    except:
                        pass
            except:
                pass
            for item in items:
                try:
                    name = item[0]
                    name = client.replaceHTMLCodes(name)
                    t = re.sub(
                        '(\.|\(|\[|\s)(\d{4}|S\d*E\d*|S\d*|3D)(\.|\)|\]|\s|)(.+|)',
                        '', name)
                    if not cleantitle.get(t) == cleantitle.get(title):
                        raise Exception()
                    y = re.findall(
                        '[\.|\(|\[|\s](\d{4}|S\d*E\d*|S\d*)[\.|\)|\]|\s]',
                        name)[-1].upper()
                    if not y == hdlr: raise Exception()
                    quality, info = source_utils.get_release_quality(
                        name, item[1])
                    url = item[1]
                    if any(x in url for x in ['.rar', '.zip', '.iso']):
                        raise Exception()
                    url = client.replaceHTMLCodes(url)
                    url = url.encode('utf-8')
                    host = re.findall(
                        '([\w]+[.][\w]+)$',
                        urlparse.urlparse(url.strip().lower()).netloc)[0]
                    if not host in hostDict: raise Exception()
                    host = client.replaceHTMLCodes(host)
                    host = host.encode('utf-8')
                    sources.append({
                        'source': host,
                        'quality': quality,
                        'language': 'en',
                        'url': url,
                        'info': info,
                        'direct': False,
                        'debridonly': True
                    })
                except:
                    pass
            return sources
        except:
            return
コード例 #14
0
    def sources(self, url, hostDict, hostprDict):
        sources = []
        try:
            if url == None:
                raise Exception()

            if not (self.api and not self.api == ''):
                raise Exception()

            data = urlparse.parse_qs(url)
            data = dict([(i, data[i][0]) if data[i] else (i, '')
                         for i in data])

            title = data['tvshowtitle'] if 'tvshowtitle' in data else data[
                'title']
            year = int(
                data['year']
            ) if 'year' in data and not data['year'] == None else None
            season = int(
                data['season']
            ) if 'season' in data and not data['season'] == None else None
            episode = int(
                data['episode']
            ) if 'episode' in data and not data['episode'] == None else None
            query = '%s S%02dE%02d' % (
                title, season,
                episode) if 'tvshowtitle' in data else '%s %d' % (title, year)

            query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query)

            query += ' lang:%s' % self.language[0]
            query = urllib.quote_plus(query)
            url = urlparse.urljoin(self.base_link, self.search_link)

            hostDict = hostprDict + hostDict

            iterations = self.streamLimit / self.streamIncrease
            last = self.streamLimit - (iterations * self.streamIncrease)
            if not last:
                iterations = iterations - 1
                last = self.streamIncrease
            iterations = iterations + 1

            seen_urls = set()
            for type in self.types:
                searchFrom = 0
                searchCount = self.streamIncrease
                for offset in range(iterations):
                    if iterations == offset + 1: searchCount = last
                    urlNew = url % (type, self.api, query, searchCount,
                                    searchFrom)
                    searchFrom = searchFrom + self.streamIncrease

                    results = client.request(urlNew)
                    results = json.loads(results)

                    apistatus = results['status']
                    if apistatus != 'success': break

                    results = results['result']

                    added = False
                    for result in results:
                        jsonName = result['title']
                        jsonSize = result['sizeinternal']
                        jsonExtension = result['extension']
                        jsonLanguage = result['lang']
                        jsonHoster = result['hostername'].lower()
                        jsonLink = result['hosterurls'][0]['url']

                        if jsonLink in seen_urls: continue
                        seen_urls.add(jsonLink)

                        if not jsonHoster in hostDict: continue

                        if not self.extensionValid(jsonExtension): continue

                        quality, info = source_utils.get_release_quality(
                            jsonName)
                        info.append(self.formatSize(jsonSize))
                        info.append(jsonName)
                        info = '|'.join(info)

                        sources.append({
                            'source': jsonHoster,
                            'quality': quality,
                            'language': jsonLanguage,
                            'url': jsonLink,
                            'info': info,
                            'direct': False,
                            'debridonly': False
                        })
                        added = True

                    if not added:
                        break

            return sources
        except:
            return sources
コード例 #15
0
    def sources(self, url, hostDict, hostprDict):
        try:
            sources = []

            if url is None: return sources

            data = urlparse.parse_qs(url)
            data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data])
            aliases = eval(data['aliases'])
            mozhdr = {'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3'}
            headers = mozhdr
            headers['X-Requested-With'] = 'XMLHttpRequest'
            
            self.s = cfscrape.create_scraper()
            if 'tvshowtitle' in data:
                episode = int(data['episode'])
                url = self.searchShow(data['tvshowtitle'], data['season'], aliases, headers)
            else:
                episode = 0
                url = self.searchMovie(data['title'], data['year'], aliases, headers)

            headers['Referer'] = url
            ref_url = url
            mid = re.findall('-(\d*)\.',url)[0]
            data = {'id':mid}
            r = self.s.post(url, headers=headers)
            try:
                u = urlparse.urljoin(self.base_link, self.server_link % mid)
                r = self.s.get(u, headers=mozhdr).content
                r = json.loads(r)['html']
                rl = client.parseDOM(r, 'div', attrs = {'class': 'pas-list'})
                rh = client.parseDOM(r, 'div', attrs = {'class': 'pas-header'})
                ids = client.parseDOM(rl, 'li', ret='data-id')
                servers = client.parseDOM(rl, 'li', ret='data-server')
                labels = client.parseDOM(rl, 'a', ret='title')
                r = zip(ids, servers, labels)
                rrr = zip(client.parseDOM(rh, 'li', ret='data-id'), client.parseDOM(rh, 'li', ret='class'))
                types = {}
                for rr in rrr:
                    types[rr[0]] = rr[1] 
                               
                for eid in r:
                    try:
                        try:
                            ep = re.findall('episode.*?(\d+).*?',eid[2].lower())[0]
                        except:
                            ep = 0
                        if (episode == 0) or (int(ep) == episode):
                            t = str(int(time.time()*1000))
                            quali = source_utils.get_release_quality(eid[2])[0]
                            if 'embed' in types[eid[1]]:
                                url = urlparse.urljoin(self.base_link, self.embed_link % (eid[0]))
                                xml = self.s.get(url, headers=headers).content
                                url = json.loads(xml)['src']
                                valid, hoster = source_utils.is_host_valid(url, hostDict)
                                if not valid: continue
                                q = source_utils.check_sd_url(url)
                                q = q if q != 'SD' else quali
                                sources.append({'source': hoster, 'quality': q, 'language': 'en', 'url': url, 'direct': False, 'debridonly': False })
                                continue
                            else:
                                url = urlparse.urljoin(self.base_link, self.token_link % (eid[0], mid, t))
                            script = self.s.get(url, headers=headers).content
                            if '$_$' in script:
                                params = self.uncensored1(script)
                            elif script.startswith('[]') and script.endswith('()'):
                                params = self.uncensored2(script)
                            elif '_x=' in script:
                                x = re.search('''_x=['"]([^"']+)''', script).group(1)
                                y = re.search('''_y=['"]([^"']+)''', script).group(1)
                                params = {'x': x, 'y': y}
                            else:
                                raise Exception()
                            u = urlparse.urljoin(self.base_link, self.source_link % (eid[0], params['x'], params['y']))
                            length = 0
                            count = 0
                            while length == 0 and count < 11:
                                r = self.s.get(u, headers=headers).text
                                length = len(r)
                                if length == 0: count += 1
                            uri = None
                            uri = json.loads(r)['playlist'][0]['sources']
                            try:
                                uri = [i['file'] for i in uri if 'file' in i]
                            except:
                                try:
                                    uri = [uri['file']]
                                except:
                                    continue
                            
                            for url in uri:
                                if 'googleapis' in url:
                                    q = source_utils.check_sd_url(url)
                                    sources.append({'source': 'gvideo', 'quality': q, 'language': 'en', 'url': url, 'direct': True, 'debridonly': False})
                                    continue

                                valid, hoster = source_utils.is_host_valid(url, hostDict)
                                #urls, host, direct = source_utils.check_directstreams(url, hoster)
                                q = quali                        
                                if valid:
                                    #for z in urls:
                                    if hoster == 'gvideo':
                                        direct = True
                                        try:
                                            q = directstream.googletag(url)[0]['quality']
                                        except:
                                            pass
                                        url = directstream.google(url, ref=ref_url)
                                    else: direct = False
                                    sources.append({'source': hoster, 'quality': q, 'language': 'en', 'url': url, 'direct': direct, 'debridonly': False})                             
                                else:
                                    sources.append({'source': 'CDN', 'quality': q, 'language': 'en', 'url': url, 'direct': True, 'debridonly': False})
                    except:
                        pass
            except:
                pass

            return sources
        except:
            return sources
コード例 #16
0
    def sources(self, url, hostDict, hostprDict):
        try:
            sources = []

            if url is None: return sources

            data = urlparse.parse_qs(url)
            data = dict([(i, data[i][0]) if data[i] else (i, '')
                         for i in data])
            imdb = data['imdb']

            try:
                query = urlparse.urljoin(self.base_link, self.search_link)
                result = client.request(query)
                m = re.findall(
                    'Movie Size:(.+?)<.+?href="(.+?)".+?href="(.+?)"\s*onMouse',
                    result, re.DOTALL)
                m = [(i[0], i[1], i[2]) for i in m if imdb in i[1]]
                if m:
                    link = m
                else:
                    query = urlparse.urljoin(self.base_link, self.search_link2)
                    result = client.request(query)
                    m = re.findall(
                        'Movie Size:(.+?)<.+?href="(.+?)".+?href="(.+?)"\s*onMouse',
                        result, re.DOTALL)
                    m = [(i[0], i[1], i[2]) for i in m if imdb in i[1]]
                    if m:
                        link = m
                    else:
                        query = urlparse.urljoin(self.base_link,
                                                 self.search_link3)
                        result = client.request(query)
                        m = re.findall(
                            'Movie Size:(.+?)<.+?href="(.+?)".+?href="(.+?)"\s*onMouse',
                            result, re.DOTALL)
                        m = [(i[0], i[1], i[2]) for i in m if imdb in i[1]]
                        if m: link = m

            except:
                return

            for item in link:
                try:

                    quality, info = source_utils.get_release_quality(
                        item[2], None)

                    try:
                        size = re.findall(
                            '((?:\d+\.\d+|\d+\,\d+|\d+) (?:GB|GiB|MB|MiB))',
                            item[0])[-1]
                        div = 1 if size.endswith(('GB', 'GiB')) else 1024
                        size = float(re.sub('[^0-9|/.|/,]', '', size)) / div
                        size = '%.2f GB' % size
                        info.append(size)
                    except:
                        pass

                    info = ' | '.join(info)

                    url = item[2]
                    if any(x in url for x in ['.rar', '.zip', '.iso']):
                        raise Exception()
                    url = client.replaceHTMLCodes(url)
                    url = url.encode('utf-8')

                    sources.append({
                        'source': 'DL',
                        'quality': quality,
                        'language': 'en',
                        'url': url,
                        'info': info,
                        'direct': True,
                        'debridonly': False
                    })
                except:
                    pass

            return sources
        except:
            return sources
コード例 #17
0
    def sources(self, url, hostDict, hostprDict):
        sources = []

        try:
            if not url:
                return sources

            r = client.request(urlparse.urljoin(self.base_link, url))

            links = dom_parser.parse_dom(r, 'table')
            links = [
                i.content for i in links if dom_parser.parse_dom(
                    i, 'span', attrs={'class': re.compile('linkSearch(-a)?')})
            ]
            links = re.compile('(<a.+?/a>)', re.DOTALL).findall(''.join(links))
            links = [
                dom_parser.parse_dom(i, 'a', req='href') for i in links
                if re.findall('(.+?)\s*\(\d+\)\s*<', i)
            ]
            links = [i[0].attrs['href'] for i in links if i]

            url = re.sub('/streams-\d+', '', url)

            for link in links:
                if '/englisch/' in link: continue
                control.sleep(3000)
                if link != url:
                    r = client.request(urlparse.urljoin(self.base_link, link))

                quality = 'SD'
                info = []

                detail = dom_parser.parse_dom(r,
                                              'th',
                                              attrs={'class': 'thlink'})
                detail = [
                    dom_parser.parse_dom(i, 'a', req='href') for i in detail
                ]
                detail = [(i[0].attrs['href'],
                           i[0].content.replace('&#9654;', '').strip())
                          for i in detail if i]

                if detail:
                    quality, info = source_utils.get_release_quality(
                        detail[0][1])
                    r = client.request(
                        urlparse.urljoin(self.base_link, detail[0][0]))

                r = dom_parser.parse_dom(r, 'table')
                r = [
                    dom_parser.parse_dom(i, 'a', req=['href', 'title'])
                    for i in r if not dom_parser.parse_dom(i, 'table')
                ]
                r = [(l.attrs['href'], l.attrs['title']) for i in r for l in i
                     if l.attrs['title']]

                info = ' | '.join(info)

                for stream_link, hoster in r:
                    valid, hoster = source_utils.is_host_valid(
                        hoster, hostDict)
                    if not valid: continue

                    direct = False

                    if hoster.lower() == 'gvideo':
                        direct = True

                    sources.append({
                        'source': hoster,
                        'quality': quality,
                        'language': 'de',
                        'url': stream_link,
                        'info': info,
                        'direct': direct,
                        'debridonly': False,
                        'checkquality': True
                    })

            return sources
        except:
            return sources