コード例 #1
0
ファイル: watchepisodes.py プロジェクト: user135711/scrapers
    def get_sources(self, episode_url, title, year, season, episode, start_time):
        #print '::::::::::::::'+episode_url
        try:
            headers = {'User_Agent':User_Agent}
            links = requests.get(episode_url,headers=headers,timeout=5).content   
            LINK = re.compile('<div class="link-number".+?data-actuallink="(.+?)"',re.DOTALL).findall(links)
            count = 0            
            for final_url in LINK:
                #print final_url
                host = final_url.split('//')[1].replace('www.','')
                host = host.split('/')[0].lower()
                if not filter_host(host):
                    continue
                host = host.split('.')[0].title()
                count +=1
                if count<25:
                    self.sources.append({'source': host,'quality': 'DVD','scraper': self.name,'url': final_url,'direct': False})
            if dev_log=='true':
                end_time = time.time() - start_time
                send_log(self.name,end_time,count,title,year, season=season,episode=episode)
    

        except Exception, argument:        
            if dev_log == 'true':
                error_log(self.name,argument)
            return self.sources
コード例 #2
0
ファイル: kickass.py プロジェクト: andromeda420/andromeda420
 def get_source(self, start_url, title, year, season, episode, start_time):
     try:
         #print 'URL PASSED OKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK'+start_url
         count = 0
         headers = {'User-Agent': client.agent()}
         r = client.request(start_url, headers=headers)
         #print r
         Endlinks = re.compile(
             'class="nobr center">(.+?)</span></td>.+?title="Torrent magnet link" href="(.+?)".+?class="cellMainLink">(.+?)</a>',
             re.DOTALL).findall(r)
         #print 'scraperchk - scrape_movie - EndLinks: '+str(Endlinks)
         for size, Magnet, qual in Endlinks:
             Magnet = Magnet.replace('https://mylink.cx/?url=', '')
             Magnet = Magnet.replace('%3A',
                                     ':').replace('%3F', '?').replace(
                                         '%3D', '=').split('%26dn')[0]
             print Magnet + '<><><><><>'
             qual = quality_tags.get_release_quality(qual, None)[0]
             count += 1
             self.sources.append({
                 'source': 'Torrent',
                 'quality': qual + ' ' + size,
                 'scraper': self.name,
                 'url': Magnet,
                 'direct': False,
                 'debridonly': True
             })
         if dev_log == 'true':
             end_time = time.time() - start_time
             send_log(self.name, end_time, count, title, year)
     except Exception, argument:
         if dev_log == 'true':
             error_log(self.name, argument)
         return []
コード例 #3
0
ファイル: filuit.py プロジェクト: andromeda420/andromeda420
    def scrape_episode(self, title, show_year, year, season, episode, imdb, tvdb, debrid=False):
        try:
            start_time = time.time()
            hdlr = 'S%02dE%02d' % (int(season), int(episode))
            query = clean_search(title)
            query = urllib.quote_plus(query + ' ' + hdlr)
            urls = []
            for link in self.search_links:
                try:
                    url = urlparse.urljoin(self.base_link, link % query)
                    url = urlparse.urljoin(self.base_link, url)
                    r = client.request(url)
                    posts = client.parseDOM(r, 'tbody')
                    posts = client.parseDOM(posts, 'tr')
                    urls += [(client.parseDOM(i, 'button', ret='data-clipboard-text')[0]) for i in posts if i]
                except:
                    pass
            count = 0
            for url in urls:
                name = url.split('/')[-1].lower()
                name = client.replaceHTMLCodes(name).replace('%20', '')
                if 'movies' in url:
                    continue
                if any(x in url for x in ['italian', 'dubbed', 'teaser', 'subs', 'sub', 'dub',
                                          'samples', 'extras', 'french', 'trailer', 'trailers', 'sample']):
                    continue

                t = re.sub('(\.|\(|\[|\s)(S\d+E\d+|S\d+)(\.|\)|\]|\s)(.+|)', '', name, flags=re.I)
                if clean_title(t) not in clean_title(title): continue
                y = re.findall('[\.|\(|\[|\s](S\d+E\d+|S\d+)[\.|\)|\]|\s]', name, re.I)[-1].upper()
                if not y == hdlr: continue

                res, info = quality_tags.get_release_quality(name, url)

                if any(x in url for x in ['hastidl', '1tehmovies', '62.210.103.107', '79.127', '213.32.113.82',
                                          'dl5.downloadha', '89.163.255.42', '185.56.20.142', 's1.0music',
                                          'dl3.yoozdl', 'dl4.lavinmovie.net', 'dl6.lavinmovie.net',
                                          'dl3.upload08.com', 'dl8.uploadt.com', '163.172.6.218',
                                          'samba.allunix.ru', 'server417']):
                    count += 1

                    url += '|User-Agent=%s&Referer=%s' % (client.agent(), self.base_link)
                    url = urllib.quote(url, '|:?/&+=_-')

                    self.sources.append(
                        {'source': 'DirectLink', 'quality': res, 'scraper': self.name, 'url': url, 'direct': True})

            if dev_log == 'true':
                end_time = time.time() - start_time
                send_log(self.name, end_time, count, title, year)

            return self.sources
        except Exception, argument:
            if dev_log == 'true':
                error_log(self.name, argument)
            return self.sources



#filepursuit().scrape_movie('Black Panther', '2018', '')
コード例 #4
0
ファイル: moviefisher.py プロジェクト: user135711/scrapers
 def get_source(self, item_url, title, year, start_time):
     try:
         count = 0
         data = requests.get(item_url).json()
         for item in data:
             title = item["title"]["rendered"]
             content = item["content"]["rendered"]
             year2 = item["date"][:4]
             if int(year) != int(year2):
                 continue
             #Links = re.findall(r"(http.*streamango.com\/embed\/\w{1,}|https:\/\/openload\.co\/embed\/\w{1,}\/)",content)
             Links = client.parseDOM(content, 'iframe', ret='src')
             for link in Links:
                 count += 1
                 host = link.split('//')[1].replace('www.', '')
                 host = host.split('/')[0].split('.')[0].title()
                 label = "DVD"
                 self.sources.append({
                     'source': host,
                     'quality': label,
                     'scraper': self.name,
                     'url': link,
                     'direct': False
                 })
         if dev_log == 'true':
             end_time = time.time() - start_time
             send_log(self.name, end_time, count, title, year)
     except Exception, argument:
         if dev_log == 'true':
             error_log(self.name, argument)
コード例 #5
0
 def scrape_movie(self, title, year, imdb, debrid=False):
     try:
         count = 0
         start_time = time.time()
         search_id = urllib.quote_plus('%s %s' % (clean_search(title), year))
         start_url = '%s/?s=%s' % (self.base_link, search_id)
         html = client.request(start_url, referer=self.base_link)
         match = re.compile('class="thumb".+?title="(.+?)".+?href="(.+?)">', re.DOTALL).findall(html)
         for name, item_url in match:
             if not year in name:
                 continue
             if not clean_title(title) == clean_title((name.split(year)[0][:-1])):
                 continue
             OPEN = client.request(item_url, referer=self.base_link)
             link = client.parseDOM(OPEN, 'iframe', ret='src')[0]
             host = link.split('//')[1].replace('www.', '')
             host = host.split('/')[0]
             if not filter_host(host):
                 continue
             count += 1
             self.sources.append({'source': host, 'quality': 'HD', 'scraper': self.name, 'url': link, 'direct': False})
         if dev_log == 'true':
             end_time = time.time() - start_time
             send_log(self.name, end_time, count, title, year)
         return self.sources
     except Exception, argument:
         if dev_log == 'true':
             error_log(self.name, argument)
         return self.sources
コード例 #6
0
    def get_source(self, item_url, title, year, start_time, qual):
        try:
            #print 'PASSEDURL >>>>>>'+item_url
            count = 0
            OPEN = client.request(item_url)

            frame = client.parseDOM(OPEN, 'iframe', ret='src')[0]
            if 'openload' in frame:
                count += 1
                self.sources.append(
                    {'source': 'openload', 'quality': qual, 'scraper': self.name, 'url': frame, 'direct': False})

            extra_links = re.findall('''window.open\(['"]([^'"]+)['"]\).+?server:([^<]+)''', OPEN, re.DOTALL)
            for link, host in extra_links:
                if not filter_host(host.replace(' ', '')): continue
                link = client.replaceHTMLCodes(link).encode('utf-8')
                link = urlparse.urljoin(self.base_link, link)
                count += 1
                self.sources.append({'source': host, 'quality': qual, 'scraper': self.name, 'url': link, 'direct': False})
            if dev_log == 'true':
                end_time = time.time() - start_time
                send_log(self.name, end_time, count, title, year)

        except Exception, argument:
            if dev_log == 'true':
                error_log(self.name, argument)
コード例 #7
0
ファイル: streamdreams.py プロジェクト: user135711/scrapers
    def get_source(self, item_url, title, year, season, episode, start_time):
        try:
            count = 0
            headers = {
                'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36',
                'Referer': self.base_link}
            scraper = cfscrape.create_scraper()
            r = scraper.get(item_url, headers=headers).content
            data = client.parseDOM(r, 'tr')
            for item in data:
                qual = client.parseDOM(item, 'span', ret='class')[0]
                qual = qual.replace('quality_', '')

                link = client.parseDOM(item, 'a', ret='data-href')[0]

                host = link.split('//')[1].replace('www.', '')
                host = host.split('/')[0]
                if not filter_host(host): continue
                count += 1
                self.sources.append({'source': host, 'quality': qual, 'scraper': self.name, 'url': link, 'direct': False})
            if dev_log == 'true':
                end_time = time.time() - start_time
                send_log(self.name, end_time, count, title, year, season='', episode='')

            return self.sources
        except BaseException:
            return self.sources
コード例 #8
0
ファイル: freemusic.py プロジェクト: varunrai/scrapers
    def scrape_music(self, title, artist, debrid=False):
        try:
            song_search = clean_title(title.lower()).replace(' ','+')
            artist_search = clean_title(artist.lower()).replace(' ','+')
            start_url = '%sresults?search_query=%s+%s'    %(self.base_link,artist_search,song_search)
            html = requests.get(start_url, headers=headers, timeout=20).content
            match = re.compile('<h4 class="card-title">.+?</i>(.+?)</h4>.+?id="(.+?)"',re.DOTALL).findall(html)
            count = 0
            for m, link in match:
                match4 = m.replace('\n','').replace('\t','').replace('  ',' ').replace('   ',' ').replace('    ',' ').replace('     ',' ')
                match5 = re.sub('&#(\d+);', '', match4)
                match5 = re.sub('(&#[0-9]+)([^;^0-9]+)', '\\1;\\2', match5)
                match5 = match5.replace('&quot;', '\"').replace('&amp;', '&')
                match5 = re.sub('\\\|/|\(|\)|\[|\]|\{|\}|-|:|;|\*|\?|"|\'|<|>|\_|\.|\?', ' ', match5)
                match5 = ' '.join(match5.split())
                match2 = m.replace('\n','').replace('\t','').replace(' ','')
                if clean_title(title).lower() in clean_title(match2).lower():
                    if clean_title(artist).lower() in clean_title(match2).lower():
                        final_link = 'https://www.youtube.com/watch?v='+link
                        count +=1
                        self.sources.append({'source':match5, 'quality':'SD', 'scraper':self.name, 'url':final_link, 'direct': False})
            if dev_log=='true':
                end_time = time.time() - self.start_time
                send_log(self.name,end_time,count)             

            return self.sources    
        except Exception, argument:
            return self.sources
コード例 #9
0
ファイル: movie4u.py プロジェクト: varunrai/scrapers
    def get_source(self,url,title,year,season,episode,start_time):
        try:
            headers = {'User-Agent': client.agent()}
            OPEN = client.request(url, headers=headers)
            holder = client.parseDOM(OPEN, 'div', attrs={'class':'bwa-content'})[0]
            holder = client.parseDOM(holder, 'a', ret='href')[0]
            links = client.request(holder, headers=headers)
            Regex = client.parseDOM(links, 'iframe', ret='src', attrs={'class': 'metaframe rptss'})
            count = 0
            for link in Regex:
                if 'player.php' in link:
                    link = client.request(link, headers=headers, output='geturl')
                    qual = client.request(link, headers=headers)
                    qual = client.parseDOM(qual, 'meta', ret='content')[0]
                else:
                    link = link

                host = link.split('//')[1].replace('www.','')
                host = host.split('/')[0].split('.')[0].title()

                if '1080p' in qual:
                    rez = '1080p'
                elif '720p' in qual:
                    rez = '720p'
                else: rez = 'SD'
                count += 1
                self.sources.append({'source': host, 'quality': rez, 'scraper': self.name, 'url': link, 'direct': False})
            if dev_log == 'true':
                end_time = time.time() - start_time
                send_log(self.name, end_time, count, title, year, season=season, episode=episode)
        except:
            pass

#movie4u().scrape_movie('Wonder Woman', '2017','')
#movie4u().scrape_episode('Suits','2011','','8','5','','')
コード例 #10
0
    def get_source(self, item_url, title, year, start_time, qual):
        try:
            #print 'PASSEDURL >>>>>>'+item_url
            count = 0
            headers = {'User-Agent': client.agent()}
            OPEN = client.request(item_url, headers=headers)
            Endlinks = re.compile('<iframe src="(.+?)"',
                                  re.DOTALL).findall(OPEN)
            #print 'scraperchk - scrape_movie - EndLinks: '+str(Endlinks)
            for link in Endlinks:
                #print 'scraperchk - scrape_movie - link: '+str(link)
                count += 1
                host = link.split('//')[1].replace('www.', '')
                host = host.split('/')[0].split('.')[0].title()
                self.sources.append({
                    'source': host,
                    'quality': qual,
                    'scraper': self.name,
                    'url': link,
                    'direct': False
                })
            if dev_log == 'true':
                end_time = time.time() - start_time
                send_log(self.name, end_time, count, title, year)
        except Exception, argument:
            if dev_log == 'true':
                error_log(self.name, argument)
            return self.sources


#movienolimit().scrape_movie('Upgrade', '2018', '')
コード例 #11
0
 def get_source(self,url,title,year,season,episode,start_time):
     try:
         #print 'URL PASSED OK'+url
         count = 0
         headers = {'User-Agent': client.agent()}
         r = client.request(url, headers=headers)
         Endlinks=re.compile("<tr id=.+?a href='(.+?)'.+?class='quality'>(.+?) BR<",re.DOTALL).findall(r)
         #print 'scraperchk - scrape_movie - EndLinks: '+str(Endlinks)
         for link1,qual in Endlinks:
             #link1=link1.replace('#038;','&')
             #print link1+qual+">>>>>>>>>>"
             headers = {'User-Agent': client.agent()}
             r = client.request(link1, headers=headers)
             #print r
             Endlinks1=re.compile('id="link".+?href="(.+?)"',re.DOTALL).findall(r)
             for link in Endlinks1:
                 #print 'scraperchk - scrape_movie - link: '+str(link)
                 count+=1
                 host = link.split('//')[1].replace('www.','')
                 host = host.split('/')[0].split('.')[0].title()
                 self.sources.append({'source':host, 'quality':qual, 'scraper':self.name, 'url':link, 'direct':False})
         if dev_log=='true':
             end_time = time.time() - start_time
             send_log(self.name,end_time,count,title,year)
     except Exception, argument:
         if dev_log=='true':
             error_log(self.name,argument)
         return[]
コード例 #12
0
 def get_source(self, start_url, title, year, season, episode, start_time):
     try:
         #print 'URL PASSED OKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK'+start_url
         count = 0
         headers = {'User-Agent': client.agent()}
         r = client.request(start_url, headers=headers)
         #print r
         Endlinks = re.compile(
             'class="iaconbox center floatright".+?title="Torrent magnet link" href="(.+?)">.+?class="cellMainLink">(.+?)</a>.+?class="nobr center">(.+?)</span></td>',
             re.DOTALL).findall(r)
         #print 'scraperchk - scrape_movie - EndLinks: '+str(Endlinks)
         for Magnet, quality, size in Endlinks:
             Magnet = Magnet.replace('https://mylink.me.uk/?url=', '')
             qual = quality_tags.check_sd_url(quality)
             #print Magnet + '<><><><><>'
             count += 1
             self.sources.append({
                 'source': 'Torrent',
                 'quality': size + ' ' + qual,
                 'scraper': self.name,
                 'url': Magnet,
                 'direct': False,
                 'debridonly': True
             })
         if dev_log == 'true':
             end_time = time.time() - start_time
             send_log(self.name, end_time, count, title, year)
     except Exception, argument:
         if dev_log == 'true':
             error_log(self.name, argument)
         return []
コード例 #13
0
    def get_source(self,url,title,year,season,episode,start_time):
        try:
            headers = {'User-Agent': client.agent()}
            OPEN = client.request(url, headers=headers)
            holder = client.parseDOM(OPEN, 'div', attrs={'class':'bwa-content'})[0]
            holder = client.parseDOM(holder, 'a', ret='href')[0]
            links = client.request(holder, headers=headers)
            Regex = client.parseDOM(links, 'iframe', ret='src', attrs={'class': 'metaframe rptss'})
            count = 0
            for link in Regex:
                if 'player.php' in link:
                    link = client.request(link, headers=headers, output='geturl')
                    qual = client.request(link, headers=headers)
                    qual = client.parseDOM(qual, 'meta', ret='content')[0]
                else:
                    link = link

                host = link.split('//')[1].replace('www.','')
                host = host.split('/')[0].split('.')[0].title()

                if '1080p' in qual:
                    rez = '1080p'
                elif '720p' in qual:
                    rez = '720p'
                else: rez = 'SD'
                count += 1
                self.sources.append({'source': host, 'quality': rez, 'scraper': self.name, 'url': link, 'direct': False})
            if dev_log == 'true':
                end_time = time.time() - start_time
                send_log(self.name, end_time, count, title, year, season=season, episode=episode)
        except:
            pass

#movie4u().scrape_movie('Wonder Woman', '2017','')
#movie4u().scrape_episode('Suits','2011','','8','5','','')
コード例 #14
0
    def check_for_play(self, link, title, year, season, episode, start_time):
        try:
            #print 'Pass url '+ link
            frame_page = requests.get(link).content
            links = re.compile('class="playlist".+?src="(.+?)"',
                               re.DOTALL).findall(frame_page)
            count = 0
            for url in links:
                url = url.replace(
                    'videozoo.me/embed.php', 'videozoo.me/videojs/').replace(
                        'playbb.me/embed.php', 'playbb.me/new/').replace(
                            'easyvideo.me/gogo/',
                            'easyvideo.me/gogo/new/').replace(
                                'play44.net/embed.php',
                                'play44.net/new/').replace('&file=', '&vid=')
                host = url.split('//')[1].replace('www.', '')
                host = host.split('/')[0].split('.')[0].title()

                url = self.resolve(url)
                count += 1
                self.sources.append({
                    'source': host,
                    'quality': 'SD',
                    'scraper': self.name,
                    'url': url,
                    'direct': True
                })
                #print 'PASSED for PLAY '+url
            if dev_log == 'true':
                end_time = time.time() - start_time
                send_log(self.name, end_time, count, title, year, season,
                         episode)

        except:
            pass
コード例 #15
0
ファイル: cmovies.py プロジェクト: andromeda420/andromeda420
 def scrape_movie(self, title, year, imdb, debrid=False):
     count = 0
     try:
         start_time = time.time()
         search_id = '%s %s' % (clean_search(title), year)
         start_url = urlparse.urljoin(self.base_link, self.search_link % urllib.quote_plus(search_id))
         headers = {'User-Agent': client.agent()}
         html = requests.get(start_url, headers=headers, timeout=5).content
         posts = client.parseDOM(html, 'item')
         posts = [(client.parseDOM(i, 'title')[0], client.parseDOM(i, 'a', ret='href')) for i in posts if i]
         posts = [i[1] for i in posts if clean_title(i[0]) == clean_title(title)][0]
         for url in posts:
             if 'cmovies' in url:
                 continue
             link = 'https:' + url if url.startswith('//') else url
             if '1080' in link:
                 qual = '1080p'
             elif '720' in link:
                 qual = '720p'
             else:
                 qual = 'SD'
             host = url.split('//')[1].replace('www.', '')
             host = host.split('/')[0].split('.')[0].title()
             count += 1
             self.sources.append({'source': host, 'quality': qual, 'scraper': self.name, 'url': link, 'direct': False})
         if dev_log == 'true':
             end_time = time.time() - start_time
             send_log(self.name, end_time, count, title, year)
         return self.sources
     except Exception, argument:
         if dev_log == 'true':
             error_log(self.name, argument)
         return self.sources
コード例 #16
0
ファイル: bnw.py プロジェクト: varunrai/scrapers
    def scrape_movie(self, title, year, imdb, debrid=False):
        if int(year) > 1980: return self.sources
        try:
            start_time = time.time()
            query = urllib.quote_plus(clean_search(title.lower()))
            start_url = urlparse.urljoin(self.base_link, self.search_link % query)

            headers = {'User-Agent': client.agent(), 'Referer': self.base_link}
            count = 0
            html = client.request(start_url, headers=headers)
            posts = client.parseDOM(html, 'div',  attrs={'class': 'post'})
            posts = [(dom_parser.parse_dom(i, 'a', req='href')[0]) for i in posts if i]
            posts = [(i.attrs['href'], i.content) for i in posts if i]
            post = [(i[0]) for i in posts if clean_title(i[1]) == clean_title(title)][0]
            r = client.request(post, headers=headers)

            y = client.parseDOM(r, 'h1')[0]
            if not year in y: return self.sources

            links = client.parseDOM(r, 'source', ret='src')
            link = [i for i in links if i.endswith('mp4')][0]
            link += '|User-Agent=%s&Referer=%s' % (client.agent(), post)
            link = urllib.quote(link, ':/-_|&+=')
            count += 1
            self.sources.append({'source': 'bnw', 'quality': 'SD', 'scraper': self.name, 'url': link, 'direct': True})
            if dev_log=='true':
                end_time = time.time() - start_time
                send_log(self.name,end_time,count,title,year)

            return self.sources
        except Exception, argument:
            if dev_log == 'true':
                error_log(self.name,argument)
            return self.sources
コード例 #17
0
ファイル: Zooqle.py プロジェクト: Kepler-22/_zips
 def get_source(self, start_url, title, year, season, episode, start_time):
     try:
         #print 'URL PASSED OKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK'+start_url
         count = 0
         headers = {'User-Agent': client.agent()}
         r = client.request(start_url, headers=headers)
         #print r
         Endlinks = re.compile(
             'class="text-muted3 smaller pad-l2".+?style="color:green"></i>(.+?)</span>.+?rel="nofollow" href="(.+?)".+?class="progress-bar prog-blue prog-l".+?>(.+?)</div></div>',
             re.DOTALL).findall(r)
         #print 'scraperchk - scrape_movie - EndLinks: '+str(Endlinks)
         for qual, Magnet, size in Endlinks:
             #print Magnet + '<><><><><>'+size
             count += 1
             self.sources.append({
                 'source': 'Torrent',
                 'quality': size + qual,
                 'scraper': self.name,
                 'url': Magnet,
                 'direct': False,
                 'debridonly': True
             })
         if dev_log == 'true':
             end_time = time.time() - start_time
             send_log(self.name, end_time, count, title, year)
     except Exception, argument:
         if dev_log == 'true':
             error_log(self.name, argument)
         return []
コード例 #18
0
 def scrape_movie(self, title, year, imdb, debrid=False):
     if int(year) > 1980:
         return self.sources
     try:
         start_time = time.time()
         query = urllib.quote_plus(clean_search(title.lower()))
         start_url = urlparse.urljoin(self.base_link, self.search_link % query)
         headers = {'User-Agent': client.agent(), 'Referer': self.base_link}
         count = 0
         html = client.request(start_url, headers=headers)
         posts = client.parseDOM(html, 'div',  attrs={'class': 'post'})
         posts = [(dom_parser.parse_dom(i, 'a', req='href')[0]) for i in posts if i]
         posts = [(i.attrs['href'], i.content) for i in posts if i]
         post = [(i[0]) for i in posts if clean_title(i[1]) == clean_title(title)][0]
         r = client.request(post, headers=headers)
         y = client.parseDOM(r, 'h1')[0]
         if not year in y:
             return self.sources
         links = client.parseDOM(r, 'source', ret='src')
         link = [i for i in links if i.endswith('mp4')][0]
         link += '|User-Agent=%s&Referer=%s' % (client.agent(), post)
         link = urllib.quote(link, ':/-_|&+=')
         count += 1
         self.sources.append({'source': 'bnw', 'quality': 'SD', 'scraper': self.name, 'url': link, 'direct': True})
         if dev_log == 'true':
             end_time = time.time() - start_time
             send_log(self.name,end_time,count,title,year)
         return self.sources
     except Exception, argument:
         if dev_log == 'true':
             error_log(self.name,argument)
         return self.sources
コード例 #19
0
    def get_sources(self, episode_url, title, year, season, episode,
                    start_time):
        try:
            links = client.request(episode_url)
            links = client.parseDOM(links, 'div', attrs={'class': 'll-item'})
            count = 0
            for link in links:
                data = dom.parse_dom(link, 'a')[0]

                host = data.content
                if not filter_host(host):
                    continue
                count += 1
                url = data.attrs['href']
                self.sources.append({
                    'source': host,
                    'quality': 'DVD',
                    'scraper': self.name,
                    'url': url,
                    'direct': False
                })
            if dev_log == 'true':
                end_time = time.time() - start_time
                send_log(self.name,
                         end_time,
                         count,
                         title,
                         year,
                         season=season,
                         episode=episode)

        except Exception, argument:
            if dev_log == 'true':
                error_log(self.name, argument)
            return self.sources
コード例 #20
0
ファイル: hdvix.py プロジェクト: varunrai/scrapers
    def get_source(self,item_url,title,year,start_time):
        try:
            #print 'PASSEDURL >>>>>>'+item_url
            count = 0
            headers = {'User-Agent': client.agent()}
            OPEN = client.request(item_url, headers=headers)
            frame = client.parseDOM(OPEN, 'iframe', ret='src')[0]
            data = client.request(frame, headers=headers)
            data = client.parseDOM(data, 'ul', attrs={'class': 'menuPlayer'})[0]
            links = client.parseDOM(data, 'a', ret='href')

            for link in links:
                #print link+'<<<<<<<<<<<<<<<<<<<<<<<<<<'
                qual = quality_tags.check_sd_url(link)
                if qual == 'SD' and 'openload' in link:
                    data = client.request(link, headers=headers)
                    data = client.parseDOM(data, 'meta', ret='content')[0]
                    qual2, info = quality_tags.get_release_quality(data, None)
                else:
                    qual2 = qual
                count += 1
                host = link.split('//')[1].replace('www.','')
                host = host.split('/')[0].split('.')[0].title()
                self.sources.append({'source':host, 'quality':qual2, 'scraper': self.name, 'url':link, 'direct':False})
            if dev_log=='true':
                end_time = time.time() - start_time
                send_log(self.name,end_time,count,title,year)
        except Exception, argument:
            if dev_log=='true':
                error_log(self.name, argument)


#hdvix().scrape_movie('Black Panther', '2018', 'tt1825683', False)
コード例 #21
0
ファイル: eztv.py プロジェクト: 17Q/modules4all
 def get_source(self, url, title, year, season, episode, start_time):
     sources = []
     try:
         count = 0
         if url is None:
             return sources
         data = urlparse.parse_qs(url)
         data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data])
         self.title = data['tvshowtitle']
         self.hdlr = 'S%02dE%02d' % (int(data['season']), int(data['episode']))
         query = '%s S%02dE%02d' % (data['tvshowtitle'], int(data['season']), int(data['episode']))
         query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query)
         url = self.tvsearch.format(urllib.quote_plus(query).replace('+', '-'))
         items = self._get_items(url)
         for item in items:
             try:
                 name = item[0]
                 quality, info = quality_tags.get_release_quality(name, name)
                 info.append(item[2])
                 info = ' | '.join(info)
                 url = item[1]
                 url = url.split('&tr')[0]
                 count += 1
                 qual = '{0} | {1}'.format(quality, info)
                 self.sources.append({'source': 'MAGNET', 'quality': qual, 'scraper': self.name, 'url': url, 'direct': False, 'debridonly': True})
             except BaseException:
                 pass
         if dev_log == 'true':
             end_time = time.time() - float(start_time)
             send_log(self.name, end_time, count, title, year, season=season, episode=episode)
         return self.sources
     except Exception, argument:
         if dev_log == 'true':
             error_log(self.name, argument)
         return self.sources
コード例 #22
0
ファイル: pirateiro.py プロジェクト: 17Q/modules4all
 def get_source(self, start_url, title, year, season, episode, start_time):
     try:
         #print 'URL PASSED OKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK'+start_url
         count = 0
         headers = {'User-Agent': client.agent()}
         #scraper= cfscrape.create_scraper()
         #r=scraper.get(start_url, headers=headers)
         r = client.request(start_url, headers=headers)
         #print r
         Endlinks = re.compile(
             'class="imagnet icon16" href="(.+?)">.+?<font color=#004E98>(.+?)</font>.+?><b>(.+?)</b></a',
             re.DOTALL).findall(r)
         #print 'scraperchk - scrape_movie - EndLinks: '+str(Endlinks)
         for Magnet, size, quality in Endlinks:
             #Magnet=Magnet.replace('https://mylink.me.uk/?url=', '')
             qual = quality_tags.get_release_quality(quality, None)[0]
             #print Magnet + '<><><><><>'
             count += 1
             self.sources.append({
                 'source': 'Torrent',
                 'quality': qual + ' ' + size,
                 'scraper': self.name,
                 'url': Magnet,
                 'direct': False,
                 'debridonly': True
             })
         if dev_log == 'true':
             end_time = time.time() - start_time
             send_log(self.name, end_time, count, title, year)
     except Exception, argument:
         if dev_log == 'true':
             error_log(self.name, argument)
         return []
コード例 #23
0
 def scrape_music(self, title, artist, debrid=False):
     try:
         song_search = clean_title(title.lower()).replace(' ','+')
         artist_search = clean_title(artist.lower()).replace(' ','+')
         start_url = '%sresults?search_query=%s+%s' % (self.base_link,artist_search,song_search)
         html = requests.get(start_url, headers=headers, timeout=20).content
         match = re.compile('<h4 class="card-title">.+?</i>(.+?)</h4>.+?id="(.+?)"',re.DOTALL).findall(html)
         count = 0
         for m, link in match:
             match4 = m.replace('\n','').replace('\t','').replace('  ',' ').replace('   ',' ').replace('    ',' ').replace('     ',' ')
             match5 = re.sub('&#(\d+);', '', match4)
             match5 = re.sub('(&#[0-9]+)([^;^0-9]+)', '\\1;\\2', match5)
             match5 = match5.replace('&quot;', '\"').replace('&amp;', '&')
             match5 = re.sub('\\\|/|\(|\)|\[|\]|\{|\}|-|:|;|\*|\?|"|\'|<|>|\_|\.|\?', ' ', match5)
             match5 = ' '.join(match5.split())
             match2 = m.replace('\n','').replace('\t','').replace(' ','')
             if clean_title(title).lower() in clean_title(match2).lower():
                 if clean_title(artist).lower() in clean_title(match2).lower():
                     final_link = 'https://www.youtube.com/watch?v='+link
                     count +=1
                     self.sources.append({'source':match5, 'quality':'SD', 'scraper':self.name, 'url':final_link, 'direct': False})
         if dev_log=='true':
             end_time = time.time() - self.start_time
             send_log(self.name,end_time,count)             
         return self.sources    
     except Exception, argument:
         return self.sources
コード例 #24
0
 def get_source(self, start_url, title, year, season, episode, start_time):
     try:
         #print 'URL PASSED OKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK'+start_url
         count = 0
         headers = {'User-Agent': client.agent()}
         r = client.request(start_url, headers=headers)
         #print r
         Endlinks = re.compile(
             'torrent" rel="nofollow".+?img alt="(.+?)".+?href="(.+?)".+?class="is-hidden-touch">(.+?)</td>',
             re.DOTALL).findall(r)
         #print 'scraperchk - scrape_movie - EndLinks: '+str(Endlinks)
         for qual, Magnet, size in Endlinks:
             Magnet = Magnet.replace('%3A',
                                     ':').replace('%3F', '?').replace(
                                         '%3D', '=').split('&dn=')[0]
             print Magnet + '<><><><><>'
             qual = quality_tags.get_release_quality(qual, None)[0]
             count += 1
             self.sources.append({
                 'source': 'Torrent',
                 'quality': qual + ' ' + size,
                 'scraper': self.name,
                 'url': Magnet,
                 'direct': False,
                 'debridonly': True
             })
         if dev_log == 'true':
             end_time = time.time() - start_time
             send_log(self.name, end_time, count, title, year)
     except Exception, argument:
         if dev_log == 'true':
             error_log(self.name, argument)
         return []
コード例 #25
0
 def scrape_movie(self, title, year, imdb, debrid=False):
     try:
         count = 0
         urls = []
         start_time = time.time()
         search_id = clean_search(title.lower())
         start_url = '%s/search/?keyword=%s' %(self.base_link, urllib.quote_plus(search_id))
         headers = {'User-Agent': client.agent(), 'Referer': self.base_link}
         scraper = cfscrape.create_scraper()
         html = scraper.get(start_url, headers=headers).content
         match = re.compile('class="ml-item".+?href="(.+?)".+?<b>(.+?)</b>.+?<b>(.+?)</b>.+?alt="(.+?)"',re.DOTALL).findall(html)
         for item_url1, date, res, name in match:
             item_url = urlparse.urljoin(self.base_link, item_url1)
             if not clean_title(search_id) == clean_title(name):
                 continue
             OPEN = scraper.get(item_url, headers=headers).content
             Endlinks = re.compile('class="movie_links"><li(.+?)<h3><b class="icon-share-alt"', re.DOTALL).findall(OPEN)[0]
             links = re.compile('target="_blank" href="(.+?)"', re.DOTALL).findall(Endlinks)
             for link in links:
                 if not link.startswith('http'):
                     continue
                 count += 1
                 host = link.split('//')[1].replace('www.', '')
                 host = host.split('/')[0]
                 if not filter_host(host):
                     continue
                 self.sources.append({'source': host, 'quality': res, 'scraper': self.name, 'url': link, 'direct': False})
             if dev_log == 'true':
                 end_time = time.time() - start_time
                 send_log(self.name, end_time, count, title, year)
         return self.sources
     except Exception, argument:
         if dev_log == 'true':
             error_log(self.name, argument)
         return self.sources
コード例 #26
0
 def get_source(self, item_url, title, year, season, episode, debrid,
                start_time):
     try:
         count = 0
         frames = []
         frames += client.parseDOM(item_url, 'iframe', ret='src')
         frames += client.parseDOM(item_url, 'a', ret='href')
         frames += client.parseDOM(item_url, 'source', ret='src')
         frames += client.parseDOM(item_url, 'enclosure', ret='url')
         try:
             q = re.findall('<strong>Quality:</strong>([^<]+)', item_url,
                            re.DOTALL)[0]
             if 'high' in q.lower():
                 qual = '720p'
             elif 'cam' in q.lower():
                 qual = 'CAM'
             else:
                 qual = 'SD'
         except:
             qual = 'SD'
         for link in frames:
             if 'http://24hd.org' in link:
                 continue
             if '.pl/link/' in link:
                 continue
             if 'seehd.pl/d/' in link:
                 r = self.scraper.get(link).content
                 link = client.parseDOM(r, 'iframe', ret='src')[0]
             host = link.split('//')[1].replace('www.', '')
             host = host.split('/')[0].lower()
             if debrid is True:
                 rd_domains = get_rd_domains()
                 if host not in rd_domains:
                     continue
                 count += 1
                 self.sources.append({
                     'source': host,
                     'quality': qual,
                     'scraper': self.name,
                     'url': link,
                     'direct': False,
                     'debridonly': True
                 })
             if not filter_host(host):
                 continue
             count += 1
             self.sources.append({
                 'source': host,
                 'quality': qual,
                 'scraper': self.name,
                 'url': link,
                 'direct': False
             })
         if dev_log == 'true':
             end_time = time.time() - start_time
             send_log(self.name, end_time, count, title, year)
     except Exception, argument:
         if dev_log == 'true':
             error_log(self.name, argument)
コード例 #27
0
    def get_source(self, url, title, year, season, episode, start_time):
        try:
            self.items = []
            count = 0
            if url is None:
                return self.sources

            data = urlparse.parse_qs(url)
            data = dict([(i, data[i][0]) if data[i] else (i, '')
                         for i in data])
            self.title = data[
                'tvshowtitle'] if 'tvshowtitle' in data else data['title']
            self.hdlr = 'S%02dE%02d' % (
                int(data['season']), int(data['episode'])
            ) if 'tvshowtitle' in data else data['year']
            query = '%s S%02dE%02d' % (
                data['tvshowtitle'], int(data['season']),
                int(data['episode'])) if 'tvshowtitle' in data else '%s %s' % (
                    data['title'], data['year'])
            query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query)
            urls = []
            if 'tvshowtitle' in data:
                urls.append(self.tvsearch.format(urllib.quote(query), '1'))
                urls.append(self.tvsearch.format(urllib.quote(query), '2'))
                urls.append(self.tvsearch.format(urllib.quote(query), '3'))
            else:
                urls.append(self.moviesearch.format(urllib.quote(query), '1'))
                urls.append(self.moviesearch.format(urllib.quote(query), '2'))
                urls.append(self.moviesearch.format(urllib.quote(query), '3'))
            threads = []
            for url in urls:
                threads.append(workers.Thread(self._get_items, url))
            [i.start() for i in threads]
            [i.join() for i in threads]

            threads2 = []
            for i in self.items:
                count += 1
                threads2.append(workers.Thread(self._get_sources, i))
            [i.start() for i in threads2]
            [i.join() for i in threads2]

            if dev_log == 'true':
                end_time = time.time() - float(start_time)
                send_log(self.name,
                         end_time,
                         count,
                         title,
                         year,
                         season=season,
                         episode=episode)
            # xbmc.log('@#@SOURCES:%s' % self._sources, xbmc.LOGNOTICE)
            return self.sources
        except Exception, argument:
            if dev_log == 'true':
                error_log(self.name, argument)
            return self.sources
コード例 #28
0
 def get_source(self, item_url, title, year, season, episode, start_time):
     count = 0
     try:
         if item_url is None:
             return self.sources
         qual = re.search('Quality\s*:(.+?)<br', item_url,
                          re.DOTALL).groups()[0]
         qual = re.sub('<.+?>', '', qual)
         qual, info = quality_tags.get_release_quality(qual, qual)
         headers = {
             'Origin': self.base_link,
             'Referer': client.parseDOM(item_url, 'link')[0],
             'X-Requested-With': 'XMLHttpRequest',
             'User_Agent': client.agent()
         }
         fn = client.parseDOM(item_url,
                              'input',
                              attrs={'name': 'FName'},
                              ret='value')[0]
         fs = client.parseDOM(item_url,
                              'input',
                              attrs={'name': 'FSize'},
                              ret='value')[0]
         fsid = client.parseDOM(item_url,
                                'input',
                                attrs={'name': 'FSID'},
                                ret='value')[0]
         #params = re.compile('<input name="FName" type="hidden" value="(.+?)" /><input name="FSize" type="hidden" value="(.+?)" /><input name="FSID" type="hidden" value="(.+?)"').findall(html)
         post_url = self.base_link + '/thanks-for-downloading/'
         form_data = {'FName': fn, 'FSize': fs, 'FSID': fsid}
         #link = requests.post(request_url, data=form_data, headers=headers).content
         link = client.request(post_url, post=form_data, headers=headers)
         stream_url = client.parseDOM(link,
                                      'meta',
                                      attrs={'http-equiv': 'refresh'},
                                      ret='content')[0]
         stream_url = client.replaceHTMLCodes(stream_url).split('url=')[-1]
         stream_url += '|User-Agent=%s' % urllib.quote(client.agent())
         count += 1
         self.sources.append({
             'source': 'DirectLink',
             'quality': qual,
             'scraper': self.name,
             'url': stream_url,
             'direct': True
         })
         if dev_log == 'true':
             end_time = time.time() - start_time
             send_log(self.name,
                      end_time,
                      count,
                      title + ' | ' + stream_url,
                      year,
                      season=season,
                      episode=episode)
     except:
         pass
コード例 #29
0
    def get_source(self, item_url, title, year, start_time):
        try:
            #print 'PASSEDURL >>>>>>'+item_url
            count = 0
            headers = {'User-Agent': client.agent()}
            OPEN = client.request(item_url, headers=headers)
            #print OPEN
            Endlinks = dom_parser.parse_dom(OPEN, 'a', req='player-data')

            Endlinks = [(i.attrs['player-data'], i.content) for i in Endlinks
                        if i]
            if 'Season' in year:
                Endlinks = [(i[0], 'SD') for i in Endlinks if i[1] in year]
            else:
                Endlinks = [(i[0], i[1]) for i in Endlinks if i]

            #print 'series8 - scrape_movie - EndLinks: '+str(Endlinks)
            for link, quality in Endlinks:
                qual = quality_tags.check_sd_url(quality)

                if 'vidcloud' in link:
                    link = 'https:' + link if link.startswith('//') else link
                    data = client.request(link, headers=headers)
                    link = re.findall(
                        '''file\s*:\s*['"](.+?)['"].+?type['"]\s*:\s*['"](.+?)['"]''',
                        data, re.DOTALL)[0]
                    host = link[1]
                    link = link[
                        0] + '|User-Agent=%s&Referer=https://vidcloud.icu/' % urllib.quote(
                            client.agent())
                    direct = True
                else:
                    host = link.split('//')[1].replace('www.', '')
                    host = host.split('/')[0].split('.')[0].title()
                    direct = False

                count += 1
                self.sources.append({
                    'source': host,
                    'quality': qual,
                    'scraper': self.name,
                    'url': link,
                    'direct': direct
                })

            if dev_log == 'true':
                end_time = time.time() - start_time
                send_log(self.name, end_time, count, title, year)
            return self.sources
        except Exception, argument:
            if dev_log == 'true':
                error_log(self.name, argument)
            return self.sources


#seriesonline8().scrape_movie('Black Panther', '2018', 'tt1825683', False)
#seriesonline8().scrape_episode('Suits','2011','','8','5','','')
コード例 #30
0
    def get_source(self, link, title, year, season, episode, start_time):
        try:
            html = client.request(link)
            match = re.compile('var link_server.+?"(.+?)"',
                               re.DOTALL).findall(html)
            count = 0
            for link in match:
                if not link.startswith('https:'):
                    link = 'http:' + link
                if 'vidnode' in link:
                    if not 'load.php' in link:
                        continue
                    #print 'vidnodelink >>> '+link
                    html = client.request(link)

                    grab = re.compile("sources.+?file: '(.+?)',label: '(.+?)'",
                                      re.DOTALL).findall(html)
                    for end_link, rez in grab:
                        if '1080' in rez:
                            res = '1080p'
                        elif '720' in rez:
                            res = '720p'
                        else:
                            res = 'SD'
                        count += 1
                        self.sources.append({
                            'source': 'Vidnode',
                            'quality': res,
                            'scraper': self.name,
                            'url': end_link,
                            'direct': False
                        })

                else:
                    host = link.split('//')[1].replace('www.', '')
                    host = host.split('/')[0]
                    if not filter_host(host): continue
                    count += 1
                    self.sources.append({
                        'source': host,
                        'quality': 'SD',
                        'scraper': self.name,
                        'url': link,
                        'direct': False
                    })
            if dev_log == 'true':
                end_time = time.time() - start_time
                send_log(self.name,
                         end_time,
                         count,
                         title,
                         year,
                         season=season,
                         episode=episode)

        except:
            pass
コード例 #31
0
    def get_sources(self, url, title, year, season, episode, start_time):
        try:
            count = 0
            url = urlparse.urljoin(self.base_link,
                                   url) if url.startswith('/') else url

            r = client.request(url)
            data = re.findall(r'\s*(eval.+?)\s*</script', r, re.DOTALL)[1]
            data = jsunpack.unpack(data).replace('\\', '')

            # https://www.primewire.ink/ajax-78583.php?slug=watch-2809620-Black-Panther&cp=7TYP4N
            # var rtv=\'aja\';var aa=\'x-7\';var ba=\'85\';var ca=\'83\';var da=\'.ph\';var ea=\'p?sl\';var fa=\'ug=\';var ia=\'&cp=7T\';var ja=\'YP\';var ka=\'4N\';var code=ia+ja+ka;var page=rtv+aa+ba+ca+da+ea+fa;function goml(loc){$(\'#div1\').load(domain+page+loc+code)}
            patern = '''rtv='(.+?)';var aa='(.+?)';var ba='(.+?)';var ca='(.+?)';var da='(.+?)';var ea='(.+?)';var fa='(.+?)';var ia='(.+?)';var ja='(.+?)';var ka='(.+?)';'''
            links_url = re.findall(patern, data, re.DOTALL)[0]
            slug = 'slug={}'.format(url.split('/')[-1])
            links_url = self.base_link + [''.join(links_url)][0].replace(
                'slug=', slug)
            links = client.request(links_url)
            links = client.parseDOM(links, 'tbody')

            #xbmc.log('@#@LINKSSSS: %s' % links, xbmc.LOGNOTICE)
            for link in links:
                try:
                    data = [(client.parseDOM(link, 'a', ret='href')[0],
                             client.parseDOM(link,
                                             'span',
                                             attrs={'class':
                                                    'version_host'})[0])][0]
                    link = urlparse.urljoin(self.base_link, data[0])

                    host = data[1]

                    if not filter_host(host): continue

                    self.sources.append({
                        'source': host,
                        'quality': 'SD',
                        'scraper': self.name,
                        'url': link,
                        'direct': False
                    })
                except:
                    pass
            if dev_log == 'true':
                end_time = time.time() - start_time
                send_log(self.name,
                         end_time,
                         count,
                         title,
                         year,
                         season=season,
                         episode=episode)

        except Exception, argument:
            if dev_log == 'true':
                error_log(self.name, argument)
            return self.sources
コード例 #32
0
ファイル: cooltv.py プロジェクト: andromeda420/andromeda420
 def get_source(self, media_url, title, year, season, episode, start_time):
     season_bollox = "0%s" % season if len(season) < 2 else season
     episode_bollox = "0%s" % episode if len(episode) < 2 else episode
     all_bollox = 's%se%s' % (season_bollox, episode_bollox)
     try:
         headers = {'User-Agent': client.agent()}
         html = client.request(media_url, headers=headers)
         match = re.findall(r'<li><a href="([^"]+)">([^<>]*)<span.+?>',
                            str(html), re.I | re.DOTALL)
         count = 0
         for media_url, media_title in match:
             if all_bollox in media_title.lower():
                 link = client.request(media_url, headers=headers)
                 frame = client.parseDOM(link, 'iframe', ret='src')
                 for frame_link in frame:
                     self.sources.append({
                         'source': 'Openload',
                         'quality': 'Unknown',
                         'scraper': self.name,
                         'url': frame_link,
                         'direct': False
                     })
                 cool_links = re.compile(
                     '"dwn-box".+?ref="(.+?)" rel="nofollow">(.+?)<span',
                     re.DOTALL).findall(link)
                 for vid_url, res in cool_links:
                     if '1080' in res:
                         res = '1080p'
                     elif '720' in res:
                         res = '720p'
                     elif 'HD' in res:
                         res = 'HD'
                     else:
                         res = 'SD'
                     count += 1
                     vid_url += '|User-Agent=%s&Referer=%s' % (
                         client.agent(), media_url)
                     vid_url = urllib.quote(vid_url, '|:?/&+=_-')
                     self.sources.append({
                         'source': 'Direct',
                         'quality': res,
                         'scraper': self.name,
                         'url': vid_url,
                         'direct': True
                     })
         if dev_log == 'true':
             end_time = time.time() - start_time
             send_log(self.name,
                      end_time,
                      count,
                      title,
                      year,
                      season='',
                      episode='')
     except:
         pass
コード例 #33
0
    def get_source(self, item_url, title, year, season, episode, start_time):
        try:
            #print 'coolmovies pass ' + item_url
            headers = {'User-Agent': client.agent()}
            r = client.request(item_url, headers=headers)
            #xbmc.log('@#@HTML:%s' % r, xbmc.LOGNOTICE)

            data = client.parseDOM(r, 'table', attrs={'class':
                                                      'source-links'})[0]
            data = client.parseDOM(data, 'tr')
            data = [(client.parseDOM(i, 'a',
                                     ret='href')[0], client.parseDOM(i,
                                                                     'td')[1])
                    for i in data if 'version' in i.lower()]  #Watch Version
            Endlinks = [(i[0], re.sub('<.+?>', '', i[1])) for i in data if i]

            #Endlinks = re.compile('<td align="center"><strong><a href="(.+?)"',re.DOTALL).findall(r)
            #print 'coolmoviezone - scrape_movie - EndLinks: '+str(Endlinks)
            count = 0
            for link, host in Endlinks:
                if 'filebebo' in host: continue  #host with captcha
                if 'fruitad' in host:
                    link = client.request(link)
                    link = client.parseDOM(
                        link, 'meta', attrs={'name': 'og:url'},
                        ret='content')[0]  #returns the real url
                    if not link: continue

                import resolveurl
                if resolveurl.HostedMediaFile(link):
                    from universalscrapers.modules import quality_tags
                    quality, info = quality_tags.get_release_quality(
                        link, link)
                    if quality == 'SD':
                        quality = 'DVD'
                    host = host.split('/')[0].split('.')[0].title()
                    count += 1
                    self.sources.append({
                        'source': host,
                        'quality': quality,
                        'scraper': self.name,
                        'url': link,
                        'direct': False
                    })
            if dev_log == 'true':
                end_time = time.time() - start_time
                send_log(self.name,
                         end_time,
                         count,
                         title,
                         year,
                         season='',
                         episode='')
        except:
            pass
コード例 #34
0
ファイル: piratebay.py プロジェクト: myarchives/andromeda
 def get_source(self, url, title, year, season, episode, start_time):
     sources = []
     try:
         count = 0
         if url is None:
             return sources
         data = urlparse.parse_qs(url)
         data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data])
         tit = data['tvshowtitle'] if 'tvshowtitle' in data else data['title']
         hdlr = 'S%02dE%02d' % (int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else data['year']
         query = '%s S%02dE%02d' % (data['tvshowtitle'], int(data['season']), int(data['episode'])) \
                 if 'tvshowtitle' in data else '%s %s' % (data['title'], data['year'])
         query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query)
         url = urlparse.urljoin(self.base_link, self.search_link%(urllib.quote(query)))
         r = client.request(url)
         r = client.parseDOM(r, 'table', attrs={'id': 'searchResult'})[0]
         posts = client.parseDOM(r, 'td')
         posts = [i for i in posts if 'detName' in i]
         for post in posts:
             post = post.replace('&nbsp;', ' ')
             name = client.parseDOM(post, 'a')[0]
             t = name.split(hdlr)[0]
             if not clean_title(re.sub('(|)', '', t)) == clean_title(tit):
                 continue
             try:
                 y = re.findall('[\.|\(|\[|\s|\_|\-](S\d+E\d+|S\d+)[\.|\)|\]|\s|\_|\-]', name, re.I)[-1].upper()
             except BaseException:
                 y = re.findall('[\.|\(|\[|\s\_|\-](\d{4})[\.|\)|\]|\s\_|\-]', name, re.I)[-1].upper()
             if not y == hdlr:
                 continue
             links = client.parseDOM(post, 'a', ret='href')
             magnet = [i for i in links if 'magnet:' in i][0]
             url = magnet.split('&tr')[0]
             count += 1
             quality, info = quality_tags.get_release_quality(name, name)
             try:
                 size = re.findall('((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0]
                 div = 1 if size.endswith(('GB', 'GiB')) else 1024
                 size = float(re.sub('[^0-9|/.|/,]', '', size.replace(',', '.'))) / div
                 size = '%.2f GB' % size
             except BaseException:
                 size = '0'
             info.append(size)
             info = ' | '.join(info)
             qual = '{0} | {1}'.format(quality, info)
             self.sources.append({'source': 'Torrent', 'quality': qual, 'scraper': self.name, 'url': url, 'direct': False, 'debridonly': True})
         if dev_log == 'true':
             end_time = time.time() - float(start_time)
             send_log(self.name, end_time, count, title, year, season=season, episode=episode)
         return self.sources
     except Exception, argument:
         if dev_log == 'true':
             error_log(self.name, argument)
         return self.sources
コード例 #35
0
ファイル: vkflix.py プロジェクト: varunrai/scrapers
    def scrape_movie(self, title, year, imdb, debrid=False):
        try:
            start_time = time.time()
            search_id = clean_search(title.lower())
            #print search_id
            #xbmc.log('@#@TITLE: %s' % search_id, xbmc.LOGNOTICE)
            start_url = urlparse.urljoin(self.base_link, self.search_link % urllib.quote_plus(search_id))
            headers = {'User-Agent': client.agent()}

            r = client.request(start_url, headers=headers)
            posts = client.parseDOM(r, 'div', attrs={'id': 'movie-\d+'})
            posts = [(client.parseDOM(i, 'h4')[0]) for i in posts if i]
            #print posts
            posts = [(client.parseDOM(i, 'a', ret='href')[0],
                      client.parseDOM(i, 'a')[0]) for i in posts if i]

            #posts = [(i[0]) for i in posts if clean_title(search_id) == clean_title(i[1])]
            count = 0
            for link, found_title in posts:
                link = urlparse.urljoin(self.base_link, link) if link.startswith('/') else link
                if not clean_title(title) == clean_title(found_title): continue
                result = client.request(link, headers=headers)
                y = client.parseDOM(result, 'div', attrs={'class': 'showValue showValueRelease'})[0]
                if not year == y: continue

                streams = client.parseDOM(result, 'div', attrs={'class': 'linkTr'})
                for stream in streams:
                    quality = client.parseDOM(stream, 'div', attrs={'class': 'linkQualityText'})[0]
                    link = client.parseDOM(stream, 'div', attrs={'class':'linkHidden linkHiddenUrl'})[0]
                    #print link

                    if 'vidnode' in link:
                        continue

                    if 'HD' in quality:
                        quality = 'HD'
                    else:
                        quality = 'SD'

                    host = quality_tags._give_host(link)
                    #print host
                    count += 1
                    self.sources.append(
                        {'source': host, 'quality': quality, 'scraper': self.name, 'url': link, 'direct': False})
            if dev_log == 'true':
                end_time = time.time() - start_time
                send_log(self.name, end_time, count, title, year)
            return self.sources
        except Exception, argument:
            if dev_log == 'true':
                error_log(self.name, argument)
            return self.sources
コード例 #36
0
ファイル: cooltv.py プロジェクト: varunrai/scrapers
    def get_source(self,media_url, title,year,season,episode,start_time):
        #print 'source season ' + media_url
        season_bollox = "0%s" % season if len(season) < 2 else season
        episode_bollox = "0%s" % episode if len(episode) < 2 else episode
        all_bollox = 's%se%s' % (season_bollox, episode_bollox)
            

        try:
            headers = {'User-Agent': client.agent()}
            html = client.request(media_url,headers=headers)
            match = re.findall(r'<li><a href="([^"]+)">([^<>]*)<span.+?>', str(html), re.I | re.DOTALL)
            count = 0
            for media_url, media_title in match:

                if all_bollox in media_title.lower():
             
                    link = client.request(media_url, headers=headers)

                    frame = client.parseDOM(link, 'iframe', ret='src')
                    print frame
                    for frame_link in frame:
                        self.sources.append({'source': 'Openload', 'quality': 'Unknown',
                                             'scraper': self.name, 'url': frame_link, 'direct': False})
                    

                    cool_links = re.compile('"dwn-box".+?ref="(.+?)" rel="nofollow">(.+?)<span',re.DOTALL).findall(link)
                    for vid_url, res in cool_links:
                        if '1080' in res:
                            res = '1080p'
                        elif '720' in res:
                            res = '720p'
                        elif 'HD' in res:
                            res = 'HD'
                        else:
                            res = 'SD'
                        count += 1

                        vid_url += '|User-Agent=%s&Referer=%s' % (client.agent(), media_url)
                        vid_url = urllib.quote(vid_url, '|:?/&+=_-')

                        self.sources.append({'source': 'Direct', 'quality': res, 'scraper': self.name, 'url': vid_url,
                                             'direct': True})
            if dev_log=='true':
                end_time = time.time() - start_time
                send_log(self.name, end_time, count, title, year, season='', episode='')
        except:
            pass
コード例 #37
0
ファイル: seriesonline8.py プロジェクト: varunrai/scrapers
    def get_source(self, item_url, title, year, start_time):
        try:
            print 'PASSEDURL >>>>>>'+item_url
            count = 0
            headers = {'User-Agent': client.agent()}
            OPEN = client.request(item_url, headers=headers)
            #print OPEN
            Endlinks = dom_parser.parse_dom(OPEN, 'a', req='player-data')

            Endlinks = [(i.attrs['player-data'], i.content) for i in Endlinks if i]
            if 'Season' in year:
                Endlinks = [(i[0], 'SD') for i in Endlinks if i[1] in year]
            else:
                Endlinks = [(i[0], i[1]) for i in Endlinks if i]

            #print 'series8 - scrape_movie - EndLinks: '+str(Endlinks)
            for link, quality in Endlinks:
                qual = quality_tags.check_sd_url(quality)

                if 'vidcloud' in link:
                    link = 'https:' + link if link.startswith('//') else link
                    data = client.request(link, headers=headers)
                    link = re.findall('''file\s*:\s*['"](.+?)['"].+?type['"]\s*:\s*['"](.+?)['"]''', data, re.DOTALL)[0]
                    host = link[1]
                    link = link[0] + '|User-Agent=%s&Referer=https://vidcloud.icu/' % client.agent()
                    direct = True
                else:
                    host = link.split('//')[1].replace('www.', '')
                    host = host.split('/')[0].split('.')[0].title()
                    direct = False

                count += 1
                self.sources.append({'source': host, 'quality': qual, 'scraper': self.name, 'url': link, 'direct': direct})
            if dev_log == 'true':
                end_time = time.time() - start_time
                send_log(self.name, end_time, count, title, year)
        except Exception, argument:
            if dev_log=='true':
                error_log(self.name, argument)
            return[]

#seriesonline8().scrape_movie('Black Panther', '2018', 'tt1825683', False)
#seriesonline8().scrape_episode('Suits','2011','','8','5','','')
コード例 #38
0
ファイル: hdpopcorn.py プロジェクト: varunrai/scrapers
    def get_source(self,url, title, year, season, episode, start_time):
        try:
            scraper = cfscrape.create_scraper()
            headers = {'Origin': 'http://hdpopcorns.com', 'Referer': url,
                       'X-Requested-With': 'XMLHttpRequest',
                       'User-Agent': client.agent()}
            count = 0
            data = scraper.get(url, headers=headers).content
            data = client.parseDOM(data, 'div', attrs={'class': 'thecontent'})[0]
            FN720p = client.parseDOM(data, 'input', ret='value', attrs={'name': 'FileName720p'})[0]
            FS720p = client.parseDOM(data, 'input', ret='value', attrs={'name': 'FileSize720p'})[0]
            FSID720p = client.parseDOM(data, 'input', ret='value', attrs={'name': 'FSID720p'})[0]
            FN1080p = client.parseDOM(data, 'input', ret='value', attrs={'name': 'FileName1080p'})[0]
            FS1080p = client.parseDOM(data, 'input', ret='value', attrs={'name': 'FileSize1080p'})[0]
            FSID1080p = client.parseDOM(data, 'input', ret='value', attrs={'name': 'FSID1080p'})[0]
            post = {'FileName720p': FN720p, 'FileSize720p': FS720p, 'FSID720p': FSID720p,
                    'FileName1080p': FN1080p, 'FileSize1080p': FS1080p, 'FSID1080p': FSID1080p,
                    'x': 173, 'y': 22}
            data = scraper.post('%s/select-movie-quality.php' % self.base_link, data=post).content
            data = client.parseDOM(data, 'div', attrs={'id': 'btn_\d+p'})

            u = [client.parseDOM(i, 'a', ret='href')[0] for i in data]
            for url in u:
                quality, info = quality_tags.get_release_quality(url, url)

                url = client.replaceHTMLCodes(url)
                url = url.encode('utf-8')
                count += 1
                self.sources.append(
                    {'source': 'DirectLink', 'quality': quality, 'scraper': self.name, 'url': url, 'direct': True})

            if dev_log=='true':
                end_time = time.time() - start_time
                send_log(self.name,end_time,count,title,year, season=season,episode=episode)              
        except:
            pass

#hdpopcorn().scrape_movie('Blade Runner 2049', '2017', '', False) title contains 2 years
#hdpopcorn().scrape_movie('Deadpool 2', '2018', '', False) title contains number
コード例 #39
0
ファイル: movienolimit.py プロジェクト: varunrai/scrapers
    def get_source(self,item_url,title,year,start_time,qual):
        try:
            #print 'PASSEDURL >>>>>>'+item_url
            count = 0
            headers={'User-Agent': client.agent()}
            OPEN = client.request(item_url, headers=headers)
            Endlinks = re.compile('<iframe src="(.+?)"',re.DOTALL).findall(OPEN)
            #print 'scraperchk - scrape_movie - EndLinks: '+str(Endlinks)
            for link in Endlinks:
                #print 'scraperchk - scrape_movie - link: '+str(link)        
                count += 1
                host = link.split('//')[1].replace('www.','')
                host = host.split('/')[0].split('.')[0].title()
                self.sources.append({'source': host, 'quality': qual, 'scraper': self.name, 'url': link, 'direct': False})
            if dev_log == 'true':
                end_time = time.time() - start_time
                send_log(self.name, end_time, count, title, year)
        except Exception, argument:
            if dev_log == 'true':
                error_log(self.name, argument)
            return self.sources

#movienolimit().scrape_movie('Upgrade', '2018', '')
コード例 #40
0
ファイル: watch32.py プロジェクト: varunrai/scrapers
    def scrape_movie(self, title, year, imdb, debrid=False):
        try:
            start_time = time.time() 
            search_id = clean_search(title.lower())
            start_url = urlparse.urljoin(self.base_link, self.search_link % urllib.quote_plus(search_id))
            headers={'User-Agent': client.agent()}
            html = client.request(start_url, headers=headers)
            results = client.parseDOM(html, 'div', attrs={'class': 'video_title'})

            items = []
            for item in results:
                try:
                    data = dom_parser.parse_dom(item, 'a', req=['href', 'title'])[0]
                    t = data.content
                    y = re.findall('\((\d{4})\)', data.attrs['title'])[0]
                    qual = data.attrs['title'].split('-')[1]
                    link = data.attrs['href']

                    if not clean_title(t) == clean_title(title): continue
                    if not y == year: continue

                    items += [(link, qual)]

                except:
                    pass
            for item in items:
                count = 0
                try:
                    url = item[0] if item[0].startswith('http') else urlparse.urljoin(self.base_link, item[0])
                    r = client.request(url)

                    qual = client.parseDOM(r, 'h1')[0]
                    res = quality_tags.get_release_quality(item[1], qual)[0]

                    url = re.findall('''frame_url\s*=\s*["']([^']+)['"]\;''', r, re.DOTALL)[0]
                    url = url if url.startswith('http') else urlparse.urljoin('https://', url)
                    if 'vidlink' in url:
                        html = client.request(url, headers=headers)
                        action = re.findall("action'\s*:\s*'([^']+)", html)[0]
                        postID = re.findall("postID\s*=\s*'([^']+)", html)[0]
                        url = 'https://vidlink.org' + re.findall("var\s*url\s*=\s*'([^']+)", html)[0]
                        data = {'browserName': 'Firefox',
                                'platform': 'Win32',
                                'postID': postID,
                                'action': action}

                        headers['X-Requested-With'] = 'XMLHttpRequest'
                        headers['Referer'] = url
                        html = client.request(url, post=data, headers=headers)
                        html = jsunpack.unpack(html).replace('\\', '')
                        sources = json.loads(re.findall('window\.srcs\s*=\s*([^;]+)', html, re.DOTALL)[0])
                        for src in sources:
                            r = requests.head(src['url'], headers={'User-Agent': client.agent()})
                            if r.status_code < 400:
                                movie_link = src['url']
                                count += 1
                                self.sources.append({'source': 'Googlelink', 'quality': res,
                                                    'scraper': self.name, 'url': movie_link, 'direct': True})
                            else:
                               continue

                except:
                    pass
                if dev_log=='true':
                    end_time = time.time() - start_time
                    send_log(self.name,end_time, count, title,year)
            #print self.sources
            return self.sources
        except Exception, argument:
            print argument
            if dev_log == 'true':
                error_log(self.name,argument)
            return self.sources

#watch32().scrape_movie('Black Panther', '2018', 'tt1825683', False)
コード例 #41
0
ファイル: vkflix.py プロジェクト: varunrai/scrapers
    def scrape_episode(self, title, show_year, year, season, episode, imdb, tvdb, debrid=False):
        try:
            start_time = time.time()
            search_id = clean_search(title.lower())
            start_url = urlparse.urljoin(self.base_link, self.search_link % urllib.quote_plus(search_id))
            #print start_url
            headers = {'User-Agent': client.agent()}

            r = client.request(start_url, headers=headers)
            posts = client.parseDOM(r, 'div', attrs={'id': 'movie-\d+'})
            posts = [(client.parseDOM(i, 'h4')[0]) for i in posts if i]
            for item in posts:
                name = client.parseDOM(item, 'a')[0]
                link = client.parseDOM(item, 'a', ret='href')[0]
                if not clean_title(title) == clean_title(name): continue

                link = urlparse.urljoin(self.base_link, link)
                html = client.request(link)
                #<div class="season" id="season8">
                sep_id = 'Season %s Serie %s' % (int(season), int(episode))
                #print sep_id
                seasons = client.parseDOM(html, 'div', attrs={'class': 'season'})
                seasons = [i for i in seasons if 'season %s' % int(season) in i.lower()][0]

                epis = re.findall('<h3>(.+?)</div>\s+</div>\s+</div>\s+</div>', seasons, re.DOTALL | re.MULTILINE)
                epis = [i for i in epis if sep_id in i][0]

                count = 0
                streams = client.parseDOM(epis, 'div', attrs={'class': 'linkTr'})
                for stream in streams:
                    quality = client.parseDOM(stream, 'div', attrs={'class': 'linkQualityText'})[0]
                    link = client.parseDOM(stream, 'div', attrs={'class': 'linkHidden linkHiddenUrl'})[0]
                    #print link

                    if 'vidnode' in link:
                        continue

                    if 'HD' in quality:
                        quality = 'HD'
                    else:
                        quality = 'SD'

                    host = quality_tags._give_host(link)
                    # print host
                    count += 1
                    self.sources.append(
                        {'source': host, 'quality': quality, 'scraper': self.name, 'url': link, 'direct': False})

                if dev_log == 'true':
                    end_time = time.time() - start_time
                    send_log(self.name, end_time, count, title, year, season=season, episode=episode)

            return self.sources
        except Exception as argument:
            if dev_log == 'true':
                error_log(self.name, argument)
            return []


#vkflix().scrape_movie('Black Panther', '2018', '', False)
#vkflix().scrape_episode('Suits', '2011','','8','5','','')