Python random_agentの例、universalscrapers.common.random_agent Pythonの例

コード例 #1

0

ファイルを表示

 def get_source(self, item_url, qual, title, year, start_time):
     try:
         print 'hi'
         count = 0
         headers = {'User-Agent': random_agent()}
         OPEN = requests.get(item_url, headers=headers, timeout=5).content
         Endlinks = re.compile(
             'class="entry-content">.+?<iframe src="(.+?)".+?</iframe>',
             re.DOTALL).findall(OPEN)
         for link in Endlinks:
             headers = {
                 'User-Agent': random_agent(),
                 'Host': 'consistent.stream',
                 'Referer': item_url
             }
             consistant = requests.get(link, headers=headers,
                                       timeout=5).content
             print consistant
             consistant = self.replace_html_entities(consistant)
             final_links = re.compile('"src":"(.+?)"',
                                      re.DOTALL).findall(consistant)
             print str(final_links)
             for links in final_links:
                 print links
                 irl = 'irl-'
                 gool = 'googleusercontent'
                 if irl not in links:
                     if gool not in links:
                         links = links.replace('\\', '')
                         host = links.split('//')[1].replace('www.', '')
                         hostname = host.split('/')[0].split('.')[0].title()
                         if 'mentor' in links:
                             links = links + '|' + 'User-Agent=Mozilla/5.0 (Windows NT 6.3; WOW64; rv:55.0) Gecko/20100101 Firefox/55.0&Referer=' + link
                         count += 1
                         self.sources.append({
                             'source': hostname,
                             'quality': qual,
                             'scraper': self.name,
                             'url': links,
                             'direct': False
                         })
                     if dev_log == 'true':
                         end_time = time.time() - start_time
             send_log(self.name, end_time, count, title, year)
     except Exception, argument:
         print argument
         if dev_log == 'true':
             error_log(self.name, argument)

コード例 #2

0

ファイルを表示

ファイル: proxy.py プロジェクト: andromeda420/andromeda420

def get(url, check, headers=None, data=None):
    if headers is None:
        headers = {
            'User-Agent': random_agent(),
        }
        try:
            request = urllib2.Request(url, headers=headers, data=data)
            html = urllib2.urlopen(request, timeout=10).read()
            if check in str(html): return html
        except:
            pass

    try:
        new_url = get_proxy_url() % urllib.quote_plus(url)
        headers['Referer'] = 'http://%s/' % urlparse.urlparse(new_url).netloc
        request = urllib2.Request(new_url, headers=headers)
        response = urllib2.urlopen(request, timeout=10)
        html = response.read()
        response.close()
        if check in html: return html
    except:
        pass

    try:
        new_url = get_proxy_url() % urllib.quote_plus(url)
        headers['Referer'] = 'http://%s/' % urlparse.urlparse(new_url).netloc
        request = urllib2.Request(new_url, headers=headers)
        html = urllib2.urlopen(request, timeout=10).read()
        if check in html: return html
    except:
        pass

    return

コード例 #3

0

ファイルを表示

ファイル: proxy.py プロジェクト: varunrai/scrapers

def get(url, check, headers=None, data=None):
    if headers is None:
        headers = {
            'User-Agent': random_agent(),
        }
        try:
            request = urllib2.Request(url, headers=headers, data=data)
            html = urllib2.urlopen(request, timeout=10).read()
            if check in str(html): return html
        except:
            pass

    try:
        new_url = get_proxy_url() % urllib.quote_plus(url)
        headers['Referer'] = 'http://%s/' % urlparse.urlparse(new_url).netloc
        request = urllib2.Request(new_url, headers=headers)
        response = urllib2.urlopen(request, timeout=10)
        html = response.read()
        response.close()
        if check in html: return html
    except:
        pass

    try:
        new_url = get_proxy_url() % urllib.quote_plus(url)
        headers['Referer'] = 'http://%s/' % urlparse.urlparse(new_url).netloc
        request = urllib2.Request(new_url, headers=headers)
        html = urllib2.urlopen(request, timeout=10).read()
        if check in html: return html
    except:
        pass

    return

コード例 #4

0

ファイルを表示

    def scrape_movie(self, title, year, imdb, debrid=False):
        try:
            start_time = time.time()
            get_link = self.base_link + '%s/' % (imdb)
            headers = {'User-Agent': random_agent(), 'referrer': get_link}
            data = {'tmp_chk': '1'}
            html = requests.post(get_link,
                                 headers=headers,
                                 data=data,
                                 verify=False,
                                 timeout=5).content
            #print html
            try:
                link = re.compile('<iframe src="(.+?)"',
                                  re.DOTALL).findall(html)[0]
            except:
                link = ''
            #print link
            count = 0
            if link != '':
                try:
                    chk = requests.get(link).content
                    rez = re.compile('"description" content="(.+?)"',
                                     re.DOTALL).findall(chk)[0]
                    if '1080' in rez:
                        res = '1080p'
                    elif '720' in rez:
                        res = '720p'
                    else:
                        res = 'DVD'
                except:
                    res = 'DVD'
                count += 1
                if 'http' not in link:
                    link = 'http:' + link
                self.sources.append({
                    'source': 'Openload',
                    'quality': res,
                    'scraper': self.name,
                    'url': link,
                    'direct': False
                })
                if dev_log == 'true':
                    end_time = time.time() - start_time
                    send_log(self.name, end_time, count, title, year)

                return self.sources
        except Exception, argument:
            if dev_log == 'true':
                error_log(self.name, argument)
            return self.sources

コード例 #5

0

ファイルを表示

ファイル: proxy.py プロジェクト: andromeda420/andromeda420

def get_raw(url, headers=None, data=None):
    if headers is None:
        headers = {
            'User-Agent': random_agent(),
        }

    try:
        new_url = get_proxy_url() % urllib.quote_plus(url)
        headers['Referer'] = 'http://%s/' % urlparse.urlparse(new_url).netloc
        request = urllib2.Request(new_url, headers=headers)
        response = urllib2.urlopen(request, timeout=10)
        return response
    except:
        pass

コード例 #6

0

ファイルを表示

ファイル: proxy.py プロジェクト: varunrai/scrapers

def get_raw(url, headers=None, data=None):
    if headers is None:
        headers = {
            'User-Agent': random_agent(),
        }

    try:
        new_url = get_proxy_url() % urllib.quote_plus(url)
        headers['Referer'] = 'http://%s/' % urlparse.urlparse(new_url).netloc
        request = urllib2.Request(new_url, headers=headers)
        response = urllib2.urlopen(request, timeout=10)
        return response
    except:
        pass

コード例 #7

0

ファイルを表示

 def get_source(self,item_url,qual,title,year,start_time):
     try:
         #print item_url
         #print 'hi'
         count = 0
         headers={'User-Agent':random_agent()}
         OPEN = session.get(item_url,headers=headers,timeout=5).content           
         Endlinks = re.compile('class="entry-content">.+?<iframe src="(.+?)".+?</iframe>',re.DOTALL).findall(OPEN)
         for link in Endlinks:
             headers={'User-Agent':user_agent, 'Host':'consistent.stream',
                      'Referer':item_url}
             #print link
             consistant = session.get(link,headers=headers,timeout=5).content
             video_, Hash = re.findall('video="(.+?)" hash="(.+?)"',consistant)[0]
             api_url = 'https://consistent.stream/api/getVideo'
             headers={'User-Agent':user_agent, 'Host':'consistent.stream','Referer': 'https://consistent.stream/titles/'+video_}
             #print video_
             #print Hash
             data = {'key':Hash,'referrer':item_url,'video':video_}
             api_html = session.post(api_url,headers=headers,data=data).content
             #print str(api_html)
             #### trying to get response from api_url but being a nob
            #consistant = self.replace_html_entities(consistant)
             final_links = re.compile('"src":"(.+?)"',re.DOTALL).findall(api_html)
             #print str(final_links)
             for links in final_links:
             
                 irl= 'irl-'
                 gool = 'googleusercontent'
                 if irl not in links:
                     if gool not in links:
                         links = links.replace('\\','')
                         host = links.split('//')[1].replace('www.','')
                         hostname = host.split('/')[0].split('.')[0].title()
                         if 'mentor' in links:
                             links = links +'|'+'User-Agent=Mozilla/5.0 (Windows NT 6.3; WOW64; rv:55.0) Gecko/20100101 Firefox/55.0&Referer='+link
                             count += 1
                             self.sources.append({'source': hostname, 'quality': qual, 'scraper': self.name, 'url': links,'direct': False})
                         else:
                             count += 1
                             self.sources.append({'source': hostname, 'quality': qual, 'scraper': self.name, 'url': links,'direct': False})   
         if dev_log=='true':
             end_time = time.time() - start_time
             send_log(self.name,end_time,count,title,year)
     except Exception, argument:
         #print argument
         if dev_log == 'true':
             error_log(self.name,argument)

コード例 #8

0

ファイルを表示

ファイル: onfreecin.py プロジェクト: Teamj103/repository.fiercegorilla

    def get_source(self, item_url, title, year, start_time, season, episode):
        try:

            headers = {'User-Agent': random_agent()}
            OPEN = requests.get(item_url, headers=headers, timeout=5).content

            Endlinks = re.compile('<IFRAME SRC="(.+?)"',
                                  re.DOTALL).findall(OPEN)
            count = 0
            for link in Endlinks:
                print link + '::::::::::::::::::::::::::::'
                html = requests.get(link, headers=headers, timeout=5).content
                packed = packed = re.findall(
                    "id='flvplayer'.+?<script type='text/javascript'>(.+?)</script>",
                    html.replace('\\', ''), re.DOTALL)
                for item in packed:
                    #print item
                    item = unpack(item)
                    print item
                    item = item.split('file:"')[1].split('",')[0]
                    print item + '>>>>>>>>>>>>>..split?'
                    host = item.split('//')[1].replace('www.', '')
                    hostname = host.split('/')[0].split('.')[0].title()
                    count += 1
                    self.sources.append({
                        'source': hostname,
                        'quality': 'DVD',
                        'scraper': self.name,
                        'url': item,
                        'direct': False
                    })
            if dev_log == 'true':
                end_time = time.time() - start_time
                send_log(self.name,
                         end_time,
                         count,
                         title,
                         year,
                         season=season,
                         episode=episode)
        except Exception, argument:
            if dev_log == 'true':
                error_log(self.name, argument)
            return self.sources

コード例 #9

0

ファイルを表示

 def scrape_movie(self, title, year, imdb, debrid=False):
     try:
         start_time = time.time()
         search_id = clean_search(title.lower())
         start_url = '%s/?s=%s' % (self.base_link,
                                   search_id.replace(' ', '+'))
         headers = {'User-Agent': random_agent()}
         html = requests.get(start_url, headers=headers, timeout=5).content
         match = re.compile(
             'id="mt-.+?href="(.+?)">.+?alt="(.+?)".+?<span class="year">(.+?)</span>.+?class="calidad2">(.+?)</span>',
             re.DOTALL).findall(html)
         for item_url, name, release, qual in match:
             if year == release:
                 if clean_title(search_id).lower() == clean_title(
                         name).lower():
                     self.get_source(item_url, qual, title, year,
                                     start_time)
         return self.sources
     except Exception, argument:
         if dev_log == 'true':
             error_log(self.name, argument)

コード例 #10

0

ファイルを表示

ファイル: onfreecin.py プロジェクト: Teamj103/repository.fiercegorilla

    def scrape_movie(self, title, year, imdb, debrid=False):
        try:
            start_time = time.time()
            search_id = clean_search(title.lower())

            start_url = '%s%s%s' % (self.base_link, self.search,
                                    search_id.replace(' ', '%20'))
            #print start_url
            headers = {'User-Agent': random_agent()}
            html = requests.get(start_url, headers=headers, timeout=5).content
            match = re.compile(
                'class="title search title".+?href="(.+?)">(.+?)</a>',
                re.DOTALL).findall(html)
            for item_url, name in match:
                #print item_url +'    ><><><><><>   '+name
                if clean_title(search_id).lower() == clean_title(name).lower():
                    self.get_source(item_url, title, year, start_time, '', '')
                    #print 'passedurl>>>>>>>>>>>>>>>>>>>>>>'+item_url

            return self.sources
        except Exception, argument:
            if dev_log == 'true':
                error_log(self.name, argument)

コード例 #11

0

ファイルを表示

ファイル: freemusic.py プロジェクト: varunrai/scrapers

# -*- coding: utf-8 -*-
# Universal Scrapers
import xbmc,xbmcaddon,time
import re
import requests
from universalscrapers.common import clean_title,clean_search,random_agent,send_log,error_log
from ..scraper import Scraper

dev_log = xbmcaddon.Addon('script.module.universalscrapers').getSetting("dev_log")

headers = {"User-Agent": random_agent()}

class freemusic(Scraper):
    domains = ['freemusicdownloads']
    name = "Freemusic"
    sources = []
    

    def __init__(self):
        self.base_link = 'http://down.freemusicdownloads.world/'
        self.sources = []
        if dev_log=='true':
            self.start_time = time.time()
    
    def scrape_music(self, title, artist, debrid=False):
        try:
            song_search = clean_title(title.lower()).replace(' ','+')
            artist_search = clean_title(artist.lower()).replace(' ','+')
            start_url = '%sresults?search_query=%s+%s'    %(self.base_link,artist_search,song_search)
            html = requests.get(start_url, headers=headers, timeout=20).content
            match = re.compile('<h4 class="card-title">.+?</i>(.+?)</h4>.+?id="(.+?)"',re.DOTALL).findall(html)

コード例 #12

0

ファイルを表示

ファイル: yesmovies.py プロジェクト: pkenneth23/baseaddons.repository

 def get_source(self, url, title, year, season, episode, start_time):
     try:
         #            print url
         get_cookies = session.get(url, timeout=5).content
         cookie_frame = re.findall('<iframe.+?src="(.+?)"', get_cookies)
         for cookie_page in cookie_frame:
             #               print cookie_page
             if 'gomostream' in cookie_page:
                 #                  print cookie_page
                 html2 = session.get(cookie_page,
                                     allow_redirects=True).content
                 #                 print html2
                 get_cookie_info = re.compile(
                     "var tc = '(.+?)'.+?url: \"(.+?)\".+?\"_token\": \"(.+?)\".+?function _tsd_tsd_ds\(s\)(.+?)</script>",
                     re.DOTALL).findall(html2)
                 for tokencode, url_to_open, _token, xtokenscript in get_cookie_info:
                     #                    print tokencode,xtokenscript, url_to_open,_token
                     x_token = self.get_x_token(tokencode, xtokenscript)
                     #                   print x_token
                     headers = {
                         'User-Agent': random_agent(),
                         'Host': 'gomostream.com',
                         'Referer': cookie_page,
                         'x-token': x_token
                     }
                     data = {'tokenCode': tokencode, '_token': _token}
                     html3 = session.post(url_to_open,
                                          headers=headers,
                                          data=data,
                                          timeout=5).json()
                     count = 0
                     for playlink in html3:
                         if playlink != '':
                             try:
                                 if playlink[0] == ' ':
                                     playlink = playlink[1:]
                             except:
                                 playlink = playlink
                             try:
                                 source = re.findall(
                                     '//(.+?)/', str(playlink))[0]
                             except:
                                 source = self.name
                             count += 1
                             self.sources.append({
                                 'source': source,
                                 'quality': 'HD',
                                 'scraper': self.name,
                                 'url': playlink,
                                 'direct': False
                             })
         if dev_log == 'true':
             end_time = time.time() - start_time
             send_log(self.name,
                      end_time,
                      count,
                      title,
                      year,
                      season=season,
                      episode=episode)
     except Exception, argument:
         xbmc.log('YESMOVIE : sources ' + argument, xbmc.LOGNOTICE)
         print argument
         if dev_log == 'true':
             error_log(self.name, arguemnt)
         return self.sources

コード例 #13

0

ファイルを表示

ファイル: freemusic.py プロジェクト: andromeda420/andromeda420

# -*- coding: utf-8 -*-
# Universal Scrapers
import xbmc,xbmcaddon,time
import re
import requests
from universalscrapers.common import clean_title,clean_search,random_agent,send_log,error_log
from ..scraper import Scraper

dev_log = xbmcaddon.Addon('script.module.universalscrapers').getSetting("dev_log")
headers = {"User-Agent": random_agent()}

class freemusic(Scraper):
    domains = ['down.getfreemusic.world']
    name = "FreeMusic"
    sources = []
    

    def __init__(self):
        self.base_link = 'https://down.getfreemusic.world/'
        self.sources = []
        if dev_log=='true':
            self.start_time = time.time()


    def scrape_music(self, title, artist, debrid=False):
        try:
            song_search = clean_title(title.lower()).replace(' ','+')
            artist_search = clean_title(artist.lower()).replace(' ','+')
            start_url = '%sresults?search_query=%s+%s' % (self.base_link,artist_search,song_search)
            html = requests.get(start_url, headers=headers, timeout=20).content
            match = re.compile('<h4 class="card-title">.+?</i>(.+?)</h4>.+?id="(.+?)"',re.DOTALL).findall(html)