Python create_scraper Exemples, universalscrapers.modules.cfscrape.create_scraper Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : tvmoviestream.py Projet : FierceGorilla/repository.fiercegorilla

    def scrape_movie(self, title, year, imdb, debrid=False):
        try:
            start_time = time.time()
            search_id = clean_search(title.lower())
            start_url = '%s?s=%s' % (self.base_link, search_id.replace(
                ' ', '+'))
            print start_url

            headers = {'User_Agent': User_Agent}
            scraper = cfscrape.create_scraper()
            html = scraper.get(start_url, headers=headers, timeout=5).content
            #            print html
            match = re.compile(
                '<div class="result-item">.+?href="(.+?)".+?alt="(.+?)".+?class="year">(.+?)</span>',
                re.DOTALL).findall(html)
            for item_url, name, yrs in match:
                #print item_url
                #print name
                ##print yrs
                if clean_title(search_id).lower() == clean_title(name).lower():
                    if year in yrs:
                        print 'pass me ' + item_url
                        self.get_source(item_url, title, year, start_time)
            return self.sources
        except Exception, argument:
            print argument
            if dev_log == 'true':
                error_log(self.name, argument)
            return self.sources

Exemple #2

0

Afficher le fichier

 def scrape_movie(self, title, year, imdb, debrid=False):
     try:
         count = 0
         urls = []
         start_time = time.time()
         search_id = clean_search(title.lower())
         start_url = '%s/search/?keyword=%s' %(self.base_link, urllib.quote_plus(search_id))
         headers = {'User-Agent': client.agent(), 'Referer': self.base_link}
         scraper = cfscrape.create_scraper()
         html = scraper.get(start_url, headers=headers).content
         match = re.compile('class="ml-item".+?href="(.+?)".+?<b>(.+?)</b>.+?<b>(.+?)</b>.+?alt="(.+?)"',re.DOTALL).findall(html)
         for item_url1, date, res, name in match:
             item_url = urlparse.urljoin(self.base_link, item_url1)
             if not clean_title(search_id) == clean_title(name):
                 continue
             OPEN = scraper.get(item_url, headers=headers).content
             Endlinks = re.compile('class="movie_links"><li(.+?)<h3><b class="icon-share-alt"', re.DOTALL).findall(OPEN)[0]
             links = re.compile('target="_blank" href="(.+?)"', re.DOTALL).findall(Endlinks)
             for link in links:
                 if not link.startswith('http'):
                     continue
                 count += 1
                 host = link.split('//')[1].replace('www.', '')
                 host = host.split('/')[0]
                 if not filter_host(host):
                     continue
                 self.sources.append({'source': host, 'quality': res, 'scraper': self.name, 'url': link, 'direct': False})
             if dev_log == 'true':
                 end_time = time.time() - start_time
                 send_log(self.name, end_time, count, title, year)
         return self.sources
     except Exception, argument:
         if dev_log == 'true':
             error_log(self.name, argument)
         return self.sources

Exemple #3

0

Afficher le fichier

Fichier : thewatchseries.py Projet : FierceGorilla/repository.fiercegorilla

 def scrape_movie(self, title, year, imdb, debrid=False):
     try:
         start_time = time.time()
         scraper = cfscrape.create_scraper()
         scrape = urllib.quote_plus(title.lower())
         start_url = '%s/search.html?keyword=%s' %(self.base_link,scrape)
         #print 'SEARCH  > '+start_url
         headers = {'User_Agent':User_Agent}
         html = scraper.get(start_url, headers=headers,timeout=10).content
         #print html
         thumbs = re.compile('<ul class="listing items">(.+?)</ul> ',re.DOTALL).findall(html)
         thumb = re.compile('href="(.+?)".+?alt="(.+?)"',re.DOTALL).findall(str(thumbs))  
         for link,link_title in thumb:
             if clean_title(title).lower() == clean_title(link_title).lower():
                 #print "<<<<<<<<<<<<<link>>>>>>>>>>"+link
                 page_link = self.base_link+link
                 headers = {'User_Agent':User_Agent}
                 holdpage = scraper.get(page_link, headers=headers,timeout=5).content
                 datecheck = re.compile('<span>Release: </span>(.+?)</li>',re.DOTALL).findall(holdpage)[0]
                 if year in datecheck:
                     movie_link = re.compile('<li class="child_episode".+?href="(.+?)"',re.DOTALL).findall(holdpage)[0]
                     movie_link = self.base_link + movie_link
                     #print 'GW >>>'+movie_link
                     self.get_source(movie_link,title,year,'','',start_time)
                 else:pass
         return self.sources
     except Exception, argument:        
         if dev_log == 'true':
             error_log(self.name,argument)
         return self.sources

Exemple #4

0

Afficher le fichier

Fichier : yify.py Projet : varunrai/scrapers

 def get_source(self,movie_link, title, year, season, episode, start_time):
     try:
         print 'passed show '+movie_link
         headers = {'User_Agent':User_Agent}
         scraper = cfscrape.create_scraper()
         html = scraper.get(movie_link,headers=headers,timeout=5).content
         # grab_id = re.compile('data-ids="(.+?)"',re.DOTALL).findall(html)[0]
         # nonce = re.compile('ajax_get_video_info":"(.+?)"',re.DOTALL).findall(html)[0]
         # print grab_id
         # print nonce
         # req_post = '%s/wp-admin/admin-ajax.php' %(self.base_link)
         # headers = {'User-Agent':User_Agent,'Referer':movie_link}
         
         # data = {'action':'ajax_get_video_info','ids':grab_id,
         #         'server':'1','nonce':nonce}
         
         # get_links = scraper.post(req_post,headers=headers,data=data,verify=False).content
         # print get_links
         links = re.compile('"file":"(.+?)","label":"(.+?)"',re.DOTALL).findall(html)
         count = 0
         for final_url,res in links: 
             final_url = final_url.replace('\\','')
             if '1080' in res:
                 rez = '1080p'
             elif '720' in res:
                 rez = '720p'
             else: rez = 'SD'
             count +=1
             self.sources.append({'source': 'DirectLink','quality': rez,'scraper': self.name,'url': final_url,'direct': True})
         if dev_log=='true':
             end_time = time.time() - start_time
             send_log(self.name,end_time,count,title,year, season=season,episode=episode)
     except:
         pass

Exemple #5

0

Afficher le fichier

 def scrape_movie(self, title, year, imdb, debrid=False):
     try:
         start_time = time.time()
         start_url = self.base_link + self.search_link + title.replace(
             ' ', '%20')
         scraper = cfscrape.create_scraper()
         html = scraper.get(start_url, timeout=10).content
         match = re.compile(
             '<div class="video-thumbimg">.+?href="(.+?)".+?title="(.+?)"',
             re.DOTALL).findall(html)
         for url, name in match:
             season_name_check = title.lower().replace(' ', '')
             name_check = name.replace('-', '').replace(' ', '').lower()
             check = difflib.SequenceMatcher(a=season_name_check,
                                             b=name_check)
             d = check.ratio() * 100
             if int(d) > 80:
                 #print name
                 html2 = scraper.get(self.base_link + url,
                                     timeout=10).content
                 final_page_match = re.compile(
                     '<div class="vc_col-sm-8 wpb_column column_container">.+?Released:(.+?)<.+?/series/(.+?)"',
                     re.DOTALL).findall(html2)
                 for release_year, fin_url in final_page_match:
                     release_year = release_year.replace(' ', '')
                     fin_url = self.base_link + '/series/' + fin_url
                     if release_year == year:
                         self.get_sources(fin_url, title, year, '', '',
                                          start_time)
         return self.sources
     except Exception as argument:
         if dev_log == 'true':
             error_log(self.name, argument)
         return []

Exemple #6

0

Afficher le fichier

    def scrape_movie(self, title, year, imdb, debrid=False):
        try:
            start_time = time.time()
            if not debrid:
                return []
            search_id = clean_search(title.lower())

            start_url = "%s/?s=%s+%s" % (self.base_link,
                                         search_id.replace(' ', '+'), year)
            #print start_url
            headers = {'User_Agent': User_Agent}
            scraper = cfscrape.create_scraper()
            OPEN = scraper.get(start_url, headers=headers, timeout=5).content

            content = re.compile('<h2><a href="(.+?)"',
                                 re.DOTALL).findall(OPEN)
            for url in content:
                if 'truehd' in url:
                    continue
                if not clean_title(title).lower() in clean_title(url).lower():
                    continue
                #print 'PASS '+url
                self.get_source(url, title, year, '', '', start_time)
            return self.sources
        except Exception, argument:
            if dev_log == 'true':
                error_log(self.name, 'Check Search')
            return self.sources

Exemple #7

0

Afficher le fichier

Fichier : thewatchseries.py Projet : FierceGorilla/repository.fiercegorilla

 def scrape_episode(self,title, show_year, year, season, episode, imdb, tvdb, debrid = False):
     try:
         start_time = time.time()
         scraper = cfscrape.create_scraper()
         scrape = urllib.quote_plus(title.lower())
         start_url = '%s/search.html?keyword=%s' %(self.base_link,scrape)
         #print 'SEARCH  > '+start_url
         headers = {'User_Agent':User_Agent}
         html = scraper.get(start_url, headers=headers,timeout=10).content
         thumbs = re.compile('<ul class="listing items">(.+?)</ul> ',re.DOTALL).findall(html)
         thumb = re.compile('href="(.+?)".+?alt="(.+?)"',re.DOTALL).findall(str(thumbs))  
         for link,link_title in thumb:
             if clean_title(title).lower() in clean_title(link_title).lower():
                 season_chk = '-season-%s' %season
                 #print 'season chk% '+season_chk
                 if season_chk in link:
                     page_link = self.base_link + link
                     #print 'page_link:::::::::::::: '+page_link
                     headers = {'User_Agent':User_Agent}
                     holdpage = scraper.get(page_link, headers=headers,timeout=5).content
                     series_links = re.compile('<li class="child_episode".+?href="(.+?)"',re.DOTALL).findall(holdpage)
                     for movie_link in series_links:
                         episode_chk = '-episode-%sBOLLOX' %episode
                         spoof_link = movie_link + 'BOLLOX'
                         if episode_chk in spoof_link:
                             movie_link = self.base_link + movie_link
                             #print 'pass TWS episode check: '+movie_link
                             self.get_source(movie_link,title,year,season,episode,start_time)
                 else:pass
         return self.sources
     except Exception, argument:        
         if dev_log == 'true':
             error_log(self.name,argument)
         return self.sources

Exemple #8

0

Afficher le fichier

Fichier : tvmoviestream.py Projet : varunrai/scrapers

    def scrape_movie(self, title, year, imdb, debrid = False):
        try:
            start_time = time.time()
            search_id = clean_search(title.lower())
            start_url = '%s?s=%s' %(self.base_link,search_id.replace(' ','+'))
            print start_url

            headers = {'User_Agent':User_Agent}
            scraper = cfscrape.create_scraper()
            html = scraper.get(start_url,headers=headers, timeout=5).content
#            print html
            match = re.compile('<div class="result-item">.+?href="(.+?)".+?alt="(.+?)".+?class="year">(.+?)</span>',re.DOTALL).findall(html)
            for item_url,name,yrs in match:
                #print item_url
                #print name
                ##print yrs
                if clean_title(search_id).lower() == clean_title(name).lower():
                    if year in yrs:
                        print 'pass me '+item_url
                        self.get_source(item_url,title,year,start_time)
            return self.sources
        except Exception, argument:
            print argument
            if dev_log == 'true':
                error_log(self.name,argument)
            return self.sources

Exemple #9

0

Afficher le fichier

    def scrape_movie(self, title, year, imdb, debrid=False):
        try:
            start_time = time.time()
            search_id = clean_search(title.lower())
            start_url = '%s/?s=%s' % (self.base_link,
                                      search_id.replace(' ', '+'))
            headers = {'User_Agent': User_Agent}
            scraper = cfscrape.create_scraper()
            html = scraper.get(start_url, headers=headers, timeout=5).content

            Regex = re.compile(
                'class="result-item".+?href="(.+?)".+?alt="(.+?)"',
                re.DOTALL).findall(html)
            for item_url, name in Regex:
                if not clean_title(title).lower() == clean_title(name).lower():
                    continue
                if not year in name:
                    continue
                movie_link = item_url
                print 'Grabbed movie url to pass > ' + movie_link
                self.get_source(movie_link, title, year, '', '', start_time)

            return self.sources
        except Exception, argument:
            if dev_log == 'true':
                error_log(self.name, argument)
            return self.sources

Exemple #10

0

Afficher le fichier

Fichier : streamdreams.py Projet : user135711/scrapers

    def get_source(self, item_url, title, year, season, episode, start_time):
        try:
            count = 0
            headers = {
                'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36',
                'Referer': self.base_link}
            scraper = cfscrape.create_scraper()
            r = scraper.get(item_url, headers=headers).content
            data = client.parseDOM(r, 'tr')
            for item in data:
                qual = client.parseDOM(item, 'span', ret='class')[0]
                qual = qual.replace('quality_', '')

                link = client.parseDOM(item, 'a', ret='data-href')[0]

                host = link.split('//')[1].replace('www.', '')
                host = host.split('/')[0]
                if not filter_host(host): continue
                count += 1
                self.sources.append({'source': host, 'quality': qual, 'scraper': self.name, 'url': link, 'direct': False})
            if dev_log == 'true':
                end_time = time.time() - start_time
                send_log(self.name, end_time, count, title, year, season='', episode='')

            return self.sources
        except BaseException:
            return self.sources

Exemple #11

0

Afficher le fichier

Fichier : wrzcraft.py Projet : varunrai/scrapers

 def scrape_movie(self, title, year, imdb, debrid=False):
     try:
         start_time = time.time()
         if not debrid:
             return []
         search_id = clean_search(title.lower())  
                     
         start_url = "%s/?s=%s+%s" % (self.base_link, search_id.replace(' ','+'),year)
         #print start_url
         headers = {'User_Agent':User_Agent}
         scraper = cfscrape.create_scraper()
         OPEN = scraper.get(start_url,headers=headers,timeout=5).content
         
         content = re.compile('<h2><a href="(.+?)"',re.DOTALL).findall(OPEN)
         for url in content:
             if 'truehd' in url:
                 continue
             if not clean_title(title).lower() in clean_title(url).lower():
                 continue
             #print 'PASS '+url
             self.get_source(url,title,year,'','',start_time)                        
         return self.sources
     except Exception, argument:        
         if dev_log == 'true':
             error_log(self.name,'Check Search')
         return self.sources

Exemple #12

0

Afficher le fichier

Fichier : openloadmovie.py Projet : varunrai/scrapers

    def get_source(self,movie_link, title, year, season, episode, start_time):
        try:
            #print 'passed show '+movie_link
            scraper = cfscrape.create_scraper()
            html = scraper.get(movie_link).content
            links = re.compile('data-lazy-src="(.+?)"',re.DOTALL).findall(html)
            count = 0
            for link in links:                
                if 'youtube' not in link:   
                    if '1080p' in link:
                        qual = '1080p'
                    elif '720p' in link:
                        qual='720p'
                    else:
                        qual='SD'

                    host = link.split('//')[1].replace('www.','')
                    host = host.split('/')[0].split('.')[0].title()
                    count +=1
                    self.sources.append({'source': host,'quality': qual,'scraper': self.name,'url': link,'direct': False})
            if dev_log=='true':
                end_time = time.time() - start_time
                send_log(self.name,end_time,count,title,year, season=season,episode=episode)
        except:
            pass

Exemple #13

0

Afficher le fichier

Fichier : wrzcraft.py Projet : varunrai/scrapers

 def scrape_episode(self,title, show_year, year, season, episode, imdb, tvdb, debrid = False):
     try:
         start_time = time.time()
         if not debrid:
             return []
         season_url = "0%s"%season if len(season)<2 else season
         episode_url = "0%s"%episode if len(episode)<2 else episode
         sea_epi ='s%se%s'%(season_url,episode_url)
         
         search_id = clean_search(title.lower())  
         start_url = "%s/?s=%s+%s" % (self.base_link, search_id.replace(' ','+'),sea_epi)
         print start_url
         headers = {'User_Agent':User_Agent}
         scraper = cfscrape.create_scraper()
         OPEN = scraper.get(start_url,headers=headers,timeout=5).content
         content = re.compile('<h2><a href="(.+?)"',re.DOTALL).findall(OPEN)
         for url in content:
             if not clean_title(title).lower() in clean_title(url).lower():
                 continue
             #print 'PASS '+url
             self.get_source(url,title,year,season,episode,start_time)                        
         return self.sources
     except Exception, argument:        
         if dev_log == 'true':
             error_log(self.name,'Check Search')
         return self.sources

Exemple #14

0

Afficher le fichier

Fichier : hdpopcorn.py Projet : FierceGorilla/repository.fiercegorilla

    def scrape_movie(self, title, year, imdb, debrid=False):
        try:
            start_time = time.time() 
            search_id = '%s %s' % (clean_search(title.lower()), year)
            start_url = urlparse.urljoin(self.base_link, self.search_link % urllib.quote_plus(search_id))

            headers={'User-Agent': client.agent()}
            scraper = cfscrape.create_scraper()
            html = scraper.get(start_url, headers=headers).content
            #xbmc.log('@#@DATA:%s' % html, xbmc.LOGNOTICE)
            data = client.parseDOM(html, 'div', attrs={'id': 'content_box'})[0]
            data = client.parseDOM(data, 'h2') #returns a list with all search results
            data = [dom_parser.parse_dom(i, 'a', req=['href', 'title'])[0] for i in data if i] #scraping url-title
            links = [(i.attrs['href'], i.attrs['title']) for i in data if i] #list with link-title for each result
            #links = re.compile('<header>.+?href="(.+?)" title="(.+?)"',re.DOTALL).findall(html)
            #xbmc.log('@#@LINKS:%s' % links, xbmc.LOGNOTICE)
            for m_url, m_title in links:
                movie_year = re.findall("(\d{4})", re.sub('\d{3,4}p', '', m_title))[-1]
                movie_name = m_title.split(movie_year)[0]

                if not clean_title(title) == clean_title(movie_name):
                    continue
                if not year in movie_year:
                    continue
                url = m_url

                #error_log(self.name + ' Pass',url)
                self.get_source(url, title, year, '', '', start_time)
            return self.sources
        except Exception, argument:        
            if dev_log == 'true':
                error_log(self.name,argument)
            return self.sources

Exemple #15

0

Afficher le fichier

    def scrape_movie(self, title, year, imdb, debrid=False):
        try:
            start_time = time.time()
            search_id = clean_search(title.lower())
            start_url = '%s/?s=%s' % (self.base_link,
                                      search_id.replace(' ', '+'))
            #print 'search>>>'+start_url
            headers = {'User_Agent': User_Agent}
            scraper = cfscrape.create_scraper()
            html = scraper.get(start_url, headers=headers, timeout=5).content

            Regex = re.compile(
                'id="mt-.+?href="(.+?)".+?class="tt">(.+?)</span>',
                re.DOTALL).findall(html)
            for item_url, item_name in Regex:

                if not clean_title(title).lower() == clean_title(
                        item_name).lower():
                    continue
                if not year in item_name:
                    continue
                #print item_url
                self.get_source(item_url, title, year, '', '', start_time)

            return self.sources
        except Exception, argument:
            if dev_log == 'true':
                error_log(self.name, argument)
            return self.sources

Exemple #16

0

Afficher le fichier

Fichier : streamdreams.py Projet : andrebaptista2/TeamKodiAddonsPT

    def scrape_movie(self, title, year, imdb, debrid=False):
        try:
            start_time = time.time()
            search_id = urllib.quote_plus(clean_search(title))
            query = self.search_url % search_id

            headers = {
                'User-Agent':
                'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36',
                'Referer': self.base_link
            }
            scraper = cfscrape.create_scraper()
            r = scraper.get(query, headers=headers).content

            posts = client.parseDOM(
                r,
                'div',
                attrs={'class': 'col-xs-4 col-sm-4 col-md-3 col-lg-3'})
            posts = [
                dom.parse_dom(i, 'a', req='href')[0] for i in posts
                if year in i
            ]
            post = [
                i.attrs['href'] for i in posts
                if clean_title(title) == clean_title(i.attrs['title'])
            ][0]
            self.get_source(post, title, year, '', '', start_time)

        except Exception, argument:
            if dev_log == 'true':
                error_log(self.name, 'Check Search')
            return self.sources

Exemple #17

0

Afficher le fichier

Fichier : wrzcraft.py Projet : varunrai/scrapers

    def get_source(self,url, title, year, season, episode, start_time):
        try:        
            headers = {'User_Agent':User_Agent}
            scraper = cfscrape.create_scraper()
            links = scraper.get(url,headers=headers,timeout=3).content
            Regex = re.compile('<singlelink>(.+?)</strong><br',re.DOTALL).findall(links)           
            LINK = re.compile('href="([^"]+)"',re.DOTALL).findall(str(Regex))
            count = 0            
            for url in LINK:
                if '.rar' not in url:
                    if '.srt' not in url:
                        if '1080' in url:
                            res = '1080p'
                        elif '720' in url:
                            res = '720p'
                        elif 'HDTV' in url:
                            res = 'DVD'
                        else:
                            pass

                        host = url.split('//')[1].replace('www.','')
                        host = host.split('/')[0].lower()

                        # if not filter_host(host):
                            # continue
                                # if debrid == "true":
                        rd_domains = get_rd_domains()
                        if host in rd_domains:
                            count +=1
                            self.sources.append({'source': host,'quality': res,'scraper': self.name,'url': url,'direct': False, 'debridonly': True})
            if dev_log=='true':
                end_time = time.time() - start_time
                send_log(self.name,end_time,count,title,year, season=season,episode=episode)                

        except:pass

Exemple #18

0

Afficher le fichier

Fichier : streamango.py Projet : varunrai/scrapers

 def get_source(self,item_url,title,year,start_time,res):
     try:
         #print 'PASSEDURL >>>>>>'+item_url
         count = 0
         scraper = cfscrape.create_scraper()
         headers={'User-Agent':random_agent()}
         OPEN = scraper.get(item_url,headers=headers,timeout=5).content
         Endlinks = re.compile('TrvideoFirst\">(.+?)</div>',re.DOTALL).findall(OPEN)
         #print 'scraperchk - scrape_movie - EndLinks: '+str(Endlinks)
         for link2 in Endlinks:
             #print 'scraperchk - scrape_movie - link: '+str(link2)        
             link1=base64.b64decode(link2)
             #print link1+'decoded?????????????????????????????????????????'
             Endlink =link1.split('src=')[1].split('allowfullscreen')[0].replace('"','').rstrip()
             #print Endlink +'<<<<<<<<<<<<<<<<endlink>>>>>>>>>>>>>>'
             if 'openlinks' in Endlink:
                 headers= {'User-Agent':random_agent()}
                 OPEN = requests.get(Endlink,headers=headers,timeout=5,allow_redirects=True).content
                 finalurl = re.compile('url\" content="(.+?)">',re.DOTALL).findall(OPEN)
                 for link in finalurl:
                     #print '===================================='+link
                     count+=1
                     host = link.split('//')[1].replace('www.','')
                     host = host.split('/')[0].split('.')[0].title()
                     self.sources.append({'source':host, 'quality':res, 'scraper':self.name, 'url':link, 'direct':False})
         if dev_log=='true':
             end_time = time.time() - start_time
             send_log(self.name,end_time,count,title,year)
     except Exception, argument:
         if dev_log=='true':
             error_log(self.name,argument)
         return[]

Exemple #19

0

Afficher le fichier

    def scrape_episode(self,
                       title,
                       show_year,
                       year,
                       season,
                       episode,
                       imdb,
                       tvdb,
                       debrid=False):
        try:
            start_time = time.time()
            if not debrid:
                return []
            season_url = "0%s" % season if len(season) < 2 else season
            episode_url = "0%s" % episode if len(episode) < 2 else episode
            sea_epi = 's%se%s' % (season_url, episode_url)

            start_url = "%s/?s=%s+%s" % (self.base_link, title.replace(
                ' ', '+').lower(), sea_epi)
            headers = {'User_Agent': User_Agent}
            scraper = cfscrape.create_scraper()
            OPEN = scraper.get(start_url, headers=headers, timeout=5).content
            content = re.compile('<h2><a href="([^"]+)"',
                                 re.DOTALL).findall(OPEN)
            for url in content:
                if not clean_title(title).lower() in clean_title(url).lower():
                    continue
                self.get_source(url, title, year, season, episode, start_time)
            return self.sources
        except Exception, argument:
            if dev_log == 'true':
                error_log(self.name, argument)
            return self.sources

Exemple #20

0

Afficher le fichier

Fichier : streamango.py Projet : FierceGorilla/repository.fiercegorilla

    def scrape_movie(self, title, year, imdb, debrid=False):
        try:

            start_time = time.time()
            search_id = clean_search(title.lower())
            start_url = '%s?s=%s' % (self.base_link, search_id.replace(
                ' ', '+'))
            #print 'scraperchk - scrape_movie - start_url:  ' + start_url
            headers = {'User-Agent': random_agent()}
            scraper = cfscrape.create_scraper()
            html = scraper.get(start_url, headers=headers, timeout=5).content
            match = re.compile(
                '<li class="TPostMv".+?class="TPMvCn">.+?<a href="(.+?)"><div class="Title">(.+?)</div></a>.+?class="Date">(.+?)</span><span class="Qlty">(.+?)</span>',
                re.DOTALL).findall(html)
            for item_url, name, date, res in match:
                #print 'scraperchk - scrape_movie - name: '+name+ ' '+date
                #print 'scraperchk - scrape_movie - item_url: '+item_url+'   '+res
                if year in date:
                    if clean_title(search_id).lower() == clean_title(
                            name).lower():
                        #print 'scraperchk - scrape_movie - Send this URL: ' + item_url
                        self.get_source(item_url, title, year, start_time, res)
            return self.sources
        except Exception, argument:
            if dev_log == 'true':
                error_log(self.name, argument)

Exemple #21

0

Afficher le fichier

Fichier : tvmoviestream.py Projet : FierceGorilla/repository.fiercegorilla

    def get_source(self, item_url, title, year, start_time):
        #        print item_url
        try:
            count = 0
            headers = {'User_Agent': User_Agent}
            scraper = cfscrape.create_scraper()
            OPEN = scraper.get(item_url, headers=headers, timeout=10).content
            #           print OPEN
            # match = re.compile('<iframe.+?src="(.+?)"',re.DOTALL).findall(OPEN)
            # for link in match:
            #     if 'youtube' not in link:

            #         host = link.split('//')[1].replace('www.','')
            #         host = host.split('/')[0].lower()
            #         host = host.split('.')[0]
            #         count+=1
            #         self.sources.append({'source': host, 'quality': 'SD', 'scraper': self.name, 'url': link,'direct': False})

            match2 = re.compile('href="(https://tvmoviestream.me/links/.+?)"',
                                re.DOTALL).findall(OPEN)
            for altlink in match2:
                #              print altlink
                headers = {'User-Agent': User_Agent}
                r = requests.get(altlink,
                                 headers=headers,
                                 allow_redirects=False)
                final_url = r.headers['location']

                host = final_url.split('//')[1].replace('www.', '')
                host = host.split('/')[0].lower()
                host = host.split('.')[0]
                if '1080' in final_url:
                    res = '1080p'
                elif '720' in final_url:
                    res = '720p'
                else:
                    res = 'SD'
                count += 1
                self.sources.append({
                    'source': host,
                    'quality': res,
                    'scraper': self.name,
                    'url': final_url,
                    'direct': False
                })
            if dev_log == 'true':
                end_time = time.time() - start_time
                send_log(self.name, end_time, count, title, year)

        except Exception, argument:
            print argument
            if dev_log == 'true':
                error_log(self.name, argument)
            return self.sources

Exemple #22

0

Afficher le fichier

    def get_source(self, url, title, year, season, episode, start_time):
        try:
            headers = {'User_Agent': User_Agent}
            scraper = cfscrape.create_scraper()
            links = scraper.get(url, headers=headers, timeout=3).content
            LINK = re.compile('href="([^"]+)" rel="nofollow"',
                              re.DOTALL).findall(links)
            count = 0
            for url in LINK:
                if '.rar' not in url:
                    if '.srt' not in url:
                        if '1080' in url:
                            res = '1080p'
                        elif '720' in url:
                            res = '720p'
                        elif 'HDTV' in url:
                            res = 'HD'
                        else:
                            res = "SD"

                        host = url.split('//')[1].replace('www.', '')
                        host = host.split('/')[0].lower()

                        # if not filter_host(host):
                        # continue
                        # if debrid == "true":
                        rd_domains = get_rd_domains()
                        if host in rd_domains:
                            count += 1
                            self.sources.append({
                                'source': host,
                                'quality': res,
                                'scraper': self.name,
                                'url': url,
                                'direct': False,
                                'debridonly': True
                            })
            if dev_log == 'true':
                end_time = time.time() - start_time
                send_log(self.name,
                         end_time,
                         count,
                         title,
                         year,
                         season=season,
                         episode=episode)

        except Exception, argument:
            if dev_log == 'true':
                error_log(self.name, argument)
            return self.sources

Exemple #23

0

Afficher le fichier

Fichier : movie321.py Projet : varunrai/scrapers

 def scrape_movie(self, title, year, imdb, debrid = False):
     try:
         start_time = time.time() 
         mock_ID = clean_search(title.lower())
         #print mock_ID
         loop_url = ['online-free','for-free','online-free-movies','free','']
         for attempt in loop_url:
             movie_url = '%s/film/watch-%s-%s' %(self.base_link,mock_ID.replace(' ','-'),attempt)
             if movie_url.endswith('-'):
                 movie_url=movie_url.replace('watch-','')[:-1]
             #print 'allurls '+movie_url
             headers={'User-Agent':User_Agent}
             scraper = cfscrape.create_scraper()
             html = scraper.get(movie_url,headers=headers,timeout=5).content
             #print 'PAGE > '+html
             match = re.compile('name="title" value="(.+?)"',re.DOTALL).findall(html)
             for item_title in match:
                # print item_title
                 if not clean_title(title.lower()) == clean_title(item_title.lower()):
                     continue
                # print 'clean321movie pass '+ movie_url
                 Regex = re.compile('</iframe>.+?class="metaframe rptss" src="(.+?)"',re.DOTALL).findall(html)
                 count = 0
                 for link in Regex: 
                     host = link.split('//')[1].replace('www.','')
                     host = host.split('/')[0].split('.')[0].title()
                     if 'streamango.com' in link:
                         holder = requests.get(link).content
                         qual = re.compile('type:"video/mp4".+?height:(.+?),',re.DOTALL).findall(holder)[0]
                         count +=1
                         self.sources.append({'source': host, 'quality': qual + 'p', 'scraper': self.name, 'url': link,'direct': False})
                     elif 'goo.gl' in link:
                         headers = {'User-Agent': User_Agent}
                         r = requests.get(link,headers=headers,allow_redirects=False)
                         link = r.headers['location']
                         count +=1                            
                         self.sources.append({'source': 'Waaw', 'quality': '720p', 'scraper': self.name, 'url': link,'direct': False})
                     else:
                         count +=1
                         self.sources.append({'source': host, 'quality': '720p', 'scraper': self.name, 'url': link,'direct': False})
                 if dev_log=='true':
                     end_time = time.time() - start_time
                     send_log(self.name,end_time,count,title,year)  
         return self.sources
     except Exception, argument:        
         if dev_log == 'true':
             error_log(self.name,argument)
         return self.sources

Exemple #24

0

Afficher le fichier

    def get_source(self, movie_link, title, year, season, episode, start_time):
        try:
            print 'passed show ' + movie_link
            headers = {'User_Agent': User_Agent}
            scraper = cfscrape.create_scraper()
            html = scraper.get(movie_link, headers=headers, timeout=5).content
            # grab_id = re.compile('data-ids="(.+?)"',re.DOTALL).findall(html)[0]
            # nonce = re.compile('ajax_get_video_info":"(.+?)"',re.DOTALL).findall(html)[0]
            # print grab_id
            # print nonce
            # req_post = '%s/wp-admin/admin-ajax.php' %(self.base_link)
            # headers = {'User-Agent':User_Agent,'Referer':movie_link}

            # data = {'action':'ajax_get_video_info','ids':grab_id,
            #         'server':'1','nonce':nonce}

            # get_links = scraper.post(req_post,headers=headers,data=data,verify=False).content
            # print get_links
            links = re.compile('"file":"(.+?)","label":"(.+?)"',
                               re.DOTALL).findall(html)
            count = 0
            for final_url, res in links:
                final_url = final_url.replace('\\', '')
                if '1080' in res:
                    rez = '1080p'
                elif '720' in res:
                    rez = '720p'
                else:
                    rez = 'SD'
                count += 1
                self.sources.append({
                    'source': 'DirectLink',
                    'quality': rez,
                    'scraper': self.name,
                    'url': final_url,
                    'direct': True
                })
            if dev_log == 'true':
                end_time = time.time() - start_time
                send_log(self.name,
                         end_time,
                         count,
                         title,
                         year,
                         season=season,
                         episode=episode)
        except:
            pass

Exemple #25

0

Afficher le fichier

Fichier : 1337x.py Projet : andrebaptista2/TeamKodiAddonsPT

    def _get_items(self, url):
        try:
            headers = {'User-Agent': client.agent()}
            scraper = cfscrape.create_scraper()
            r = scraper.get(url, headers=headers)
            #r = client.request(url, headers=headers)
            posts = client.parseDOM(r.content, 'tbody')[0]
            posts = client.parseDOM(posts, 'tr')
            for post in posts:
                data = dom.parse_dom(post, 'a', req='href')[1]
                link = urlparse.urljoin(self.base_link, data.attrs['href'])
                name = data.content
                t = name.split(self.hdlr)[0]

                if not clean_title(re.sub('(|)', '', t)) == clean_title(
                        self.title):
                    continue

                try:
                    y = re.findall(
                        '[\.|\(|\[|\s|\_|\-](S\d+E\d+|S\d+)[\.|\)|\]|\s|\_|\-]',
                        name, re.I)[-1].upper()
                except BaseException:
                    y = re.findall(
                        '[\.|\(|\[|\s\_|\-](\d{4})[\.|\)|\]|\s\_|\-]', name,
                        re.I)[-1].upper()
                if not y == self.hdlr: continue

                try:
                    size = re.findall(
                        '((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))',
                        post)[0]
                    div = 1 if size.endswith(('GB', 'GiB')) else 1024
                    size = float(
                        re.sub('[^0-9|/.|/,]', '', size.replace(',',
                                                                '.'))) / div
                    size = '%.2f GB' % size

                except BaseException:
                    size = '0'

                self.items.append((name, link, size))

            return self.items
        except BaseException:
            return self.items

Exemple #26

0

Afficher le fichier

Fichier : tvmoviestream.py Projet : varunrai/scrapers

    def get_source(self,item_url, title, year, start_time):
#        print item_url
        try:
            count = 0
            headers = {'User_Agent':User_Agent}
            scraper = cfscrape.create_scraper()
            OPEN = scraper.get(item_url,headers=headers,timeout=10).content
 #           print OPEN
            # match = re.compile('<iframe.+?src="(.+?)"',re.DOTALL).findall(OPEN)
            # for link in match:
            #     if 'youtube' not in link:

            #         host = link.split('//')[1].replace('www.','')
            #         host = host.split('/')[0].lower()
            #         host = host.split('.')[0]
            #         count+=1
            #         self.sources.append({'source': host, 'quality': 'SD', 'scraper': self.name, 'url': link,'direct': False})

            match2 = re.compile('href="(https://tvmoviestream.me/links/.+?)"',re.DOTALL).findall(OPEN)
            for altlink in match2:
  #              print altlink
                headers = {'User-Agent': User_Agent}
                r = requests.get(altlink,headers=headers,allow_redirects=False)
                final_url = r.headers['location']
                
                host = final_url.split('//')[1].replace('www.','')
                host = host.split('/')[0].lower()
                host = host.split('.')[0] 
                if '1080' in final_url:
                    res = '1080p'
                elif '720' in final_url:
                    res = '720p'
                else:
                    res='SD'
                count += 1
                self.sources.append({'source': host, 'quality': res, 'scraper': self.name, 'url': final_url,'direct': False})
            if dev_log=='true':
                end_time = time.time() - start_time
                send_log(self.name,end_time,count,title,year)

        except Exception, argument:
            print argument
            if dev_log == 'true':
                error_log(self.name,argument)
            return self.sources

Exemple #27

0

Afficher le fichier

    def scrape_episode(self,
                       title,
                       show_year,
                       year,
                       season,
                       episode,
                       imdb,
                       tvdb,
                       debrid=False):
        try:
            start_time = time.time()
            start_url = self.base_link + self.search_link + title.replace(
                ' ', '%20') + '%20season%20' + season
            #print start_url
            scraper = cfscrape.create_scraper()
            html = scraper.get(start_url, timeout=10).content
            match = re.compile(
                '<div class="video-thumbimg">.+?href="(.+?)".+?title="(.+?)"',
                re.DOTALL).findall(html)
            for url, name in match:
                season_name_check = title.lower().replace(
                    ' ', '') + 'season' + season
                name_check = name.replace('-', '').replace(' ', '').lower()
                check = difflib.SequenceMatcher(a=season_name_check,
                                                b=name_check)
                d = check.ratio() * 100
                if int(d) > 80:
                    html2 = scraper.get(self.base_link + url + '/season',
                                        timeout=10).content
                    episodes = re.findall(
                        '<div class="video_container">.+?<a href="(.+?)" class="view_more"></a></div>.+?class="videoHname"><b>(.+?)</b></a></span>.+?<div class="video_date icon-calendar">.+?, (.+?)</div>',
                        html2, re.DOTALL)
                    for url2, ep_no, aired_year in episodes:
                        url2 = self.base_link + url2
                        ep_no = ep_no.replace('Episode ', '').replace(':', '')
                        if ep_no == episode:
                            #print url2
                            self.get_sources(url2, title, year, season,
                                             episode, start_time)
            return self.sources

        except Exception as argument:
            if dev_log == 'true':
                error_log(self.name, argument)
            return []

Exemple #28

0

Afficher le fichier

    def scrape_movie(self, title, year, imdb, debrid=False):
        try:
            start_time = time.time()
            if not debrid:
                return self.sources
            start_url = "%s/?s=%s" % (self.base_link, title.replace(' ', '+').lower())
            headers = {'User-Agent': client.agent()}
            scraper = cfscrape.create_scraper()
            OPEN = scraper.get(start_url, headers=headers).content

            content = re.compile('<h2><a href="(.+?)"', re.DOTALL).findall(OPEN)
            for url in content:
                self.get_source(url, title, year, '', '', start_time)

        except Exception, argument:
            if dev_log == 'true':
                error_log(self.name, argument)
            return self.sources

Exemple #29

0

Afficher le fichier

Fichier : streamango.py Projet : FierceGorilla/repository.fiercegorilla

 def get_source(self, item_url, title, year, start_time, res):
     try:
         #print 'PASSEDURL >>>>>>'+item_url
         count = 0
         scraper = cfscrape.create_scraper()
         headers = {'User-Agent': random_agent()}
         OPEN = scraper.get(item_url, headers=headers, timeout=5).content
         Endlinks = re.compile('TrvideoFirst\">(.+?)</div>',
                               re.DOTALL).findall(OPEN)
         #print 'scraperchk - scrape_movie - EndLinks: '+str(Endlinks)
         for link2 in Endlinks:
             #print 'scraperchk - scrape_movie - link: '+str(link2)
             link1 = base64.b64decode(link2)
             #print link1+'decoded?????????????????????????????????????????'
             Endlink = link1.split('src=')[1].split(
                 'allowfullscreen')[0].replace('"', '').rstrip()
             #print Endlink +'<<<<<<<<<<<<<<<<endlink>>>>>>>>>>>>>>'
             if 'openlinks' in Endlink:
                 headers = {'User-Agent': random_agent()}
                 OPEN = requests.get(Endlink,
                                     headers=headers,
                                     timeout=5,
                                     allow_redirects=True).content
                 finalurl = re.compile('url\" content="(.+?)">',
                                       re.DOTALL).findall(OPEN)
                 for link in finalurl:
                     #print '===================================='+link
                     count += 1
                     host = link.split('//')[1].replace('www.', '')
                     host = host.split('/')[0].split('.')[0].title()
                     self.sources.append({
                         'source': host,
                         'quality': res,
                         'scraper': self.name,
                         'url': link,
                         'direct': False
                     })
         if dev_log == 'true':
             end_time = time.time() - start_time
             send_log(self.name, end_time, count, title, year)
     except Exception, argument:
         if dev_log == 'true':
             error_log(self.name, argument)
         return []

Exemple #30

0

Afficher le fichier

Fichier : releasebb.py Projet : user135711/scrapers

    def scrape_movie(self, title, year, imdb, debrid=False):
        try:
            start_time = time.time()
            if not debrid: return self.sources

            query = '%s %s' % (title, year)
            query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query)
            query = urllib.quote_plus(query).replace('+', '%2B')
            url = urlparse.urljoin(self.search_base_link,
                                   self.search_link % query)
            headers = {'User-Agent': client.agent(), 'Referer': self.base_link}
            scraper = cfscrape.create_scraper()
            r = scraper.get(url, headers=headers).content
            posts = json.loads(r)['results']
            posts = [(i['post_title'], i['post_name']) for i in posts]
            posts = [(i[0], i[1]) for i in posts if clean_title(
                i[0].lower().split(year)[0]) == clean_title(title)]

            filter = [
                'uhd', '4K', '2160', '1080', '720', 'hevc', 'bluray', 'web'
            ]
            posts = [(urlparse.urljoin(self.base_link, i[1]), year)
                     for i in posts if any(x in i[1] for x in filter)]

            threads = []
            for i in posts:
                threads.append(
                    workers.Thread(self.get_sources, i, title, year, '', '',
                                   str(start_time)))
            [i.start() for i in threads]

            alive = [x for x in threads if x.is_alive() is True]
            while alive:
                alive = [x for x in threads if x.is_alive() is True]
                time.sleep(0.1)

            return self.sources
        except Exception, argument:
            if dev_log == 'true':
                error_log(self.name, argument)
            return self.sources

Exemple #31

0

Afficher le fichier

Fichier : movie321.py Projet : varunrai/scrapers

    def scrape_episode(self, title, show_year, year, season, episode, imdb, tvdb, debrid = False):
        try:
            start_time = time.time() 
            movie_id= clean_search(title.lower().replace(' ', '-'))
            show_url = '%s/episodes/%s-%sx%s' %(self.base_link,movie_id,season,episode)
            
            #print '321tv url> '+show_url
            
            headers={'User-Agent':User_Agent}
            scraper = cfscrape.create_scraper()
            html = scraper.get(show_url,headers=headers,timeout=5).content

            match = re.compile('class="metaframe rptss" src="(.+?)"').findall(html)
            count = 0
            for link in match: 
                host = link.split('//')[1].replace('www.','')
                host = host.split('/')[0].split('.')[0].title()
                if 'streamango.com' in link:
                    holder = requests.get(link).content
                    qual = re.compile('type:"video/mp4".+?height:(.+?),',re.DOTALL).findall(holder)[0]
                    count +=1
                    self.sources.append({'source': host, 'quality': qual+'p', 'scraper': self.name, 'url': link,'direct': False})
                elif 'goo.gl' in link:
                    headers = {'User-Agent': User_Agent}
                    r = requests.get(link,headers=headers,allow_redirects=False)
                    link = r.headers['location'] 
                    count +=1
                    self.sources.append({'source': 'Waaw', 'quality': '720p', 'scraper': self.name, 'url': link,'direct': False})
                
                else:
                    count +=1
                    self.sources.append({'source': host, 'quality': '720p', 'scraper': self.name, 'url': link,'direct': False})
            if dev_log=='true':
                end_time = time.time() - start_time
                send_log(self.name,end_time,count,title,year, season=season,episode=episode)                       
            return self.sources
        except Exception, argument:        
            if dev_log == 'true':
                error_log(self.name,argument)
            return self.sources

Exemple #32

0

Afficher le fichier

Fichier : streamango.py Projet : varunrai/scrapers

    def scrape_movie(self, title, year, imdb, debrid = False):
        try:

            start_time = time.time()                                                   
            search_id = clean_search(title.lower())                                      
            start_url = '%s?s=%s' %(self.base_link,search_id.replace(' ','+'))         
            #print 'scraperchk - scrape_movie - start_url:  ' + start_url                                  
            headers={'User-Agent':random_agent()}
            scraper = cfscrape.create_scraper()
            html = scraper.get(start_url,headers=headers,timeout=5).content            
            match = re.compile('<li class="TPostMv".+?class="TPMvCn">.+?<a href="(.+?)"><div class="Title">(.+?)</div></a>.+?class="Date">(.+?)</span><span class="Qlty">(.+?)</span>',re.DOTALL).findall(html) 
            for item_url, name, date, res in match:
                #print 'scraperchk - scrape_movie - name: '+name+ ' '+date
                #print 'scraperchk - scrape_movie - item_url: '+item_url+'   '+res
                if year in date:                                                           
                    if clean_title(search_id).lower() == clean_title(name).lower():                                                                    
                        #print 'scraperchk - scrape_movie - Send this URL: ' + item_url                             
                        self.get_source(item_url,title,year,start_time,res)                                      
            return self.sources
        except Exception, argument:
            if dev_log=='true':
                error_log(self.name,argument)

Exemple #33

0

Afficher le fichier

Fichier : yify.py Projet : varunrai/scrapers

 def scrape_episode(self, title, show_year, year, season, episode, imdb, tvdb, debrid = False):
     try:
         start_time = time.time()
         search_id = clean_search(title.lower())
         start_url = '%s/?s=%s' %(self.base_link,search_id.replace(' ','+'))
         headers = {'User_Agent':User_Agent}
         scraper = cfscrape.create_scraper()
         html = scraper.get(start_url,headers=headers,timeout=5).content
         Regex = re.compile('class="result-item".+?href="(.+?)".+?alt="(.+?)"',re.DOTALL).findall(html)
         for item_url,name in Regex:
             if not clean_title(title).lower() == clean_title(name).lower():
                 continue
             if "/series/" in item_url:
                 movie_link = item_url[:-1].replace('/series/','/episodes/')+'-%sx%s/'%(season,episode)
                 #print 'Grabbed Showpass url to pass > ' + movie_link    
                 self.get_source(movie_link,title,year,season,episode,start_time)
             
         return self.sources
     except Exception, argument:        
         if dev_log == 'true':
             error_log(self.name,argument)
         return self.sources

Exemple #34

0

Afficher le fichier

Fichier : hdpopcorn.py Projet : FierceGorilla/repository.fiercegorilla

    def get_source(self,url, title, year, season, episode, start_time):
        try:
            scraper = cfscrape.create_scraper()
            headers = {'Origin': 'http://hdpopcorns.com', 'Referer': url,
                       'X-Requested-With': 'XMLHttpRequest',
                       'User-Agent': client.agent()}
            count = 0
            data = scraper.get(url, headers=headers).content
            data = client.parseDOM(data, 'div', attrs={'class': 'thecontent'})[0]
            FN720p = client.parseDOM(data, 'input', ret='value', attrs={'name': 'FileName720p'})[0]
            FS720p = client.parseDOM(data, 'input', ret='value', attrs={'name': 'FileSize720p'})[0]
            FSID720p = client.parseDOM(data, 'input', ret='value', attrs={'name': 'FSID720p'})[0]
            FN1080p = client.parseDOM(data, 'input', ret='value', attrs={'name': 'FileName1080p'})[0]
            FS1080p = client.parseDOM(data, 'input', ret='value', attrs={'name': 'FileSize1080p'})[0]
            FSID1080p = client.parseDOM(data, 'input', ret='value', attrs={'name': 'FSID1080p'})[0]
            post = {'FileName720p': FN720p, 'FileSize720p': FS720p, 'FSID720p': FSID720p,
                    'FileName1080p': FN1080p, 'FileSize1080p': FS1080p, 'FSID1080p': FSID1080p,
                    'x': 173, 'y': 22}
            data = scraper.post('%s/select-movie-quality.php' % self.base_link, data=post).content
            data = client.parseDOM(data, 'div', attrs={'id': 'btn_\d+p'})

            u = [client.parseDOM(i, 'a', ret='href')[0] for i in data]
            for url in u:
                quality, info = quality_tags.get_release_quality(url, url)

                url = client.replaceHTMLCodes(url)
                url = url.encode('utf-8')
                count += 1
                self.sources.append(
                    {'source': 'DirectLink', 'quality': quality, 'scraper': self.name, 'url': url, 'direct': True})

            if dev_log=='true':
                end_time = time.time() - start_time
                send_log(self.name,end_time,count,title,year, season=season,episode=episode)              
        except:
            pass

#hdpopcorn().scrape_movie('Blade Runner 2049', '2017', '', False) title contains 2 years
#hdpopcorn().scrape_movie('Deadpool 2', '2018', '', False) title contains number

Exemple #35

0

Afficher le fichier

Fichier : openloadmovie.py Projet : FierceGorilla/repository.fiercegorilla

    def get_source(self, movie_link, title, year, season, episode, start_time):
        try:
            #print 'passed show '+movie_link
            scraper = cfscrape.create_scraper()
            html = scraper.get(movie_link).content
            links = re.compile('data-lazy-src="(.+?)"',
                               re.DOTALL).findall(html)
            count = 0
            for link in links:
                if 'youtube' not in link:
                    if '1080p' in link:
                        qual = '1080p'
                    elif '720p' in link:
                        qual = '720p'
                    else:
                        qual = 'SD'

                    host = link.split('//')[1].replace('www.', '')
                    host = host.split('/')[0].split('.')[0].title()
                    count += 1
                    self.sources.append({
                        'source': host,
                        'quality': qual,
                        'scraper': self.name,
                        'url': link,
                        'direct': False
                    })
            if dev_log == 'true':
                end_time = time.time() - start_time
                send_log(self.name,
                         end_time,
                         count,
                         title,
                         year,
                         season=season,
                         episode=episode)
        except:
            pass

Exemple #36

0

Afficher le fichier

 def __init__(self):
     self.base_link = 'https://321movies.cc'
     self.scraper = cfscrape.create_scraper()
     if dev_log == 'true':
         self.start_time = time.time()

Exemple #37

0

Afficher le fichier

Fichier : thewatchseries.py Projet : varunrai/scrapers

    def get_source(self,movie_link,title,year,season,episode,start_time):
        print '###'+movie_link
        try:
            scraper = cfscrape.create_scraper()
            html = scraper.get(movie_link).content
            links = re.compile('data-video="(.+?)"',re.DOTALL).findall(html)
            count = 0 
            for link in links:
                print '::::::::::::::::::::::final link> ' + link
                if 'vidnode.net' in link:
                    if not 'load.php' in link:
                        continue
                    link = 'http:'+link
                    page = requests.get(link,timeout=10).content
                    try:
                        grab = re.compile("sources.+?file: '(.+?)',label: '(.+?)'",re.DOTALL).findall(page)
                        for end_link,rez in grab:
                            if '1080' in rez:
                                res = '1080p'
                            elif '720' in rez:
                                res= '720p'
                            else: res = 'unknown'

                            count +=1
                            self.sources.append({'source': 'Vidnode','quality': res,'scraper': self.name,'url': end_link,'direct': True})
                    except:pass
                        # vid_url = re.compile("sources.+?file: '(.+?)'",re.DOTALL).findall(page)[0]
                        # vid_url = 'http:'+vid_url
                        # #count +=1
                        # self.sources.append({'source': 'GoogleLink','quality': '720p','scraper': self.name,'url': vid_url,'direct': True})
                elif 'openload' in link:
                    try:
                        chk = requests.get(link).content
                        rez = re.compile('"description" content="(.+?)"',re.DOTALL).findall(chk)[0]
                        if '1080' in rez:
                            res='1080p'
                        elif '720' in rez:
                            res='720p'
                        else:
                            res ='DVD'
                    except: res = 'DVD'
                    count +=1
                    self.sources.append({'source': 'Openload', 'quality': res, 'scraper': self.name, 'url': link,'direct': False})
                elif 'streamango.com' in link:
                    get_res=requests.get(link).content
                    try:
                        res = re.compile('{type:"video/mp4".+?height:(.+?),',re.DOTALL).findall(get_res)[0]
                        count +=1
                        self.sources.append({'source': 'Streamango', 'quality': res, 'scraper': self.name, 'url': link,'direct': False})
                    except:
                        pass
                else:
                    host = link.split('//')[1].replace('www.','')
                    host = host.split('/')[0].split('.')[0].title()
                    count +=1
                    self.sources.append({'source': host,'quality': 'DVD','scraper': self.name,'url': link,'direct': False})
            if dev_log=='true':
                end_time = time.time() - start_time
                send_log(self.name,end_time,count,title,year, season=season,episode=episode)
        except Exception, argument:        
            if dev_log == 'true':
                error_log(self.name,argument)
            return self.sources

Exemple #38

0

Afficher le fichier

 def __init__(self):
     self.base_link = 'http://oceanofmovies.de'
     self.scraper = cfscrape.create_scraper()

Exemple #39

0

Afficher le fichier

 def __init__(self):
     self.base_link = 'http://123movieshubz.com'
     self.search_link = '/watch/%s-%s-online-123movies.html'
     self.scraper = cfscrape.create_scraper()

Exemple #40

0

Afficher le fichier

 def scrape_movie(self, x, y, z, debrid=False):
     try:
         count = 0
         start_headache = time.time()
         scraper = cfscrape.create_scraper()
         e = clean_search(x.lower())
         l = '%s/?s=%s+%s' % (self.i, e.replace(' ', '+'), y)
         #            print l
         i = scraper.get(l, headers=c, timeout=5).content
         f = re.findall(
             '<div class="thumbnail animation-2">.+?href="(.+?)">.+?alt="(.+?)"',
             i)
         for e, l in f:
             d, g = re.findall('(.+?)\((.+?)\)', str(l))[0]
             if clean_title(d) == clean_title(x) and g == y:
                 #                   print e
                 q = scraper.get(e).content
                 r = re.findall(
                     '<iframe class="metaframe rptss" src="(.+?)"', q)[0]
                 t = scraper.get(r).content
                 u = re.findall(
                     "var tc = '(.+?)'.+?url: \"(.+?)\".+?\"_token\": \"(.+?)\".+?function.+?\(s\)(.+?)</script>",
                     t, re.DOTALL)
                 for v, w, xoxo, yoyo in u:
                     o = self.get_x_token(v, yoyo)
                     p = {
                         'User-Agent':
                         'Mozilla/5.0 (Windows NT 6.3; Win64; x64; rv:56.0) Gecko/20100101 Firefox/56.0',
                         'Host': 'gomostream.com',
                         'Referer': r,
                         'x-token': o
                     }
                     g = {'tokenCode': v, '_token': xoxo}
                     l = b.post(w, headers=p, data=g).json()
                     for a in l:
                         if 'putvid' in a:
                             z = scraper.get(a).content
                             enjoy = re.findall(
                                 "<script type='text/javascript'>(.+?)</script>",
                                 z, re.DOTALL)[0]
                             enjoy = unpack(enjoy)
                             enjoy = re.findall('sources:\["(.+?)"',
                                                str(enjoy))[0]
                             count += 1
                             self.headache.append({
                                 'source': 'Putvid',
                                 'quality': 'Unknown - Probably good',
                                 'scraper': self.name,
                                 'url': enjoy,
                                 'direct': False
                             })
                         elif 'openload' in a or 'streamango' in a:
                             if 'openload' in a:
                                 something = 'Openload'
                             elif 'streamango' in a:
                                 something = 'Streamango'
                             z = scraper.get(a).content
                             fun = re.findall(
                                 '"description" content="(.+?)"', z)[0]
                             if '1080p' in fun:
                                 somestuff = '1080p'
                             elif '720p' in fun:
                                 somestuff = '720p'
                             else:
                                 somestuff = 'SD'
                             count += 1
                             self.headache.append({
                                 'source': something,
                                 'quality': somestuff,
                                 'scraper': self.name,
                                 'url': a,
                                 'direct': False
                             })
             if dev_log == 'true':
                 end_it_all = time.time() - start_headache
                 send_log(self.name, end_it_all, count, x, y)
         return self.headache
     except Exception, argument:
         print argument
         if dev_log == 'true':
             error_log(self.name, argument)
         return self.sources

Exemple #41

0

Afficher le fichier

Fichier : openloadmovie.py Projet : scooters5670/repository.digitalrevolution

 def __init__(self):
     self.base_link = 'https://openloadmovie.me'
     self.scraper = cfscrape.create_scraper()
     if dev_log == 'true':
         self.start_time = time.time()

Exemple #42

0

Afficher le fichier

Fichier : extramovies.py Projet : mancave22/ManCave

 def __init__(self):
     self.base_link = 'http://extramovies.cc'
     self.scraper = cfscrape.create_scraper()

Exemple #43

0

Afficher le fichier

 def __init__(self):
     self.base_link = 'http://2ddl.io'
     self.sources = []
     self.scraper = cfscrape.create_scraper()