Beispiel #1
0
    def __init__(self, *, proxies, ssl_context):
        proxies = _normalize_proxies(proxies)
        super().__init__(proxies=proxies, ssl_context=ssl_context)

        # `ProxyHandler` should be present even when actually there're
        # no proxies. `build_opener` contains it anyway. By specifying
        # it here explicitly we can disable system proxies (i.e.
        # from HTTP_PROXY env var) by setting `proxies` to `{}`.
        # Otherwise, if we didn't specify ProxyHandler for empty
        # `proxies` here, the `build_opener` would have used one internally
        # which could have unwillingly picked up the system proxies.
        opener = build_opener(
            HTTPSHandler(context=ssl_context),
            ProxyHandler(proxies),
        )
        self.urlopen = opener.open
def use_proxy(proxies, url):
    # 1. 调用urllib.request.ProxyHandler
    proxy_support = ProxyHandler(proxies=proxies)
    # 2. Opener 类似于urlopen
    opener = build_opener(proxy_support)
    # 3. 安装Opener
    install_opener(opener)

    # user_agent =  "Mozilla/5.0 (X11; Linux x86_64; rv:45.0) Gecko/20100101 Firefox/45.0"
    # user_agent =  "Mozilla/5.0 (X11; Linux x86_64; rv:45.0) Gecko/20100101 Firefox/45.0"
    user_agent = 'Mozilla/5.0 (iPad; CPU OS 5_0 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 Mobile/9A334 Safari/7534.48.3'
    # 模拟浏览器;
    opener.addheaders = [('User-agent', user_agent)]
    urlObj = urlopen(url)
    content = urlObj.read().decode('utf-8')
    return content
Beispiel #3
0
    def GetHandlers(self):
        """Retrieve the appropriate urllib handlers for the given configuration.

    Returns:
      A list of urllib.request.BaseHandler subclasses to be used when making
      calls with proxy.
    """
        handlers = []

        if self.ssl_context:
            handlers.append(HTTPSHandler(context=self.ssl_context))

        if self.proxies:
            handlers.append(ProxyHandler(self.proxies))

        return handlers
Beispiel #4
0
def gen_tags(rss, stops, proxy=''):
    # support for Proxy
    protocol = proxy.split("/")[0][:-1]
    our_proxy = ProxyHandler({protocol: proxy})
    headlines = []
    # used for stemming
    ps = PorterStemmer()
    # our counter for word frequency
    ct = Counter()
    ct2 = Counter()
    word_tokenize = RegexpTokenizer(r'\w+')
    # stores tokenized headlines
    list_titles = []
    # go through each news outlet
    for source in rss:
        # get the parsed RSS feed
        d = feedparser.parse(source,  handlers=[our_proxy])
        # extract each head line
        a = d['entries']
        for title in a:
            # removes punctuation and tokenizes the word
            words = word_tokenize.tokenize(title['title'])
            # removes stop words
            filtered_words = [word for word in words if (word.lower()) not in stops]
            list_titles.append(filtered_words)
            # stem the words and add them to the counter
            for fw in filtered_words:
                ct[ps.stem(fw)] += 1
                ct2[fw] += 1

    tags = dict(ct.most_common(20))
    processed_set = Counter()
    # go through each title and see if we can combine tags ie. #south #korea -> #South Korea
    is_prev_tag = False
    prev = ""
    for title in list_titles:
        for index in range(1, len(title)):
            if (title[index-1].lower() in tags) and (title[index].lower() in tags):
                combined = title[index-1] + " " + title[index]
                processed_set[combined] += 1
                is_prev_tag = True
            else:
                if (title[index-1].lower() in tags) and not is_prev_tag:
                    processed_set[title[index - 1]] += 1
                is_prev_tag = False

    return processed_set.most_common(5)
Beispiel #5
0
    def loadTestImage(self, _listImageFileName):
        """
        This method checks the presence of all the images in the list of image file names
        in the $EDNA_HOME/tests/data/images directory. If one image is not present
        this method tries to download it from http://www.edna-site.org/data/tests/images
        """
        if not os.path.isdir(EDUtilsPath.EDNA_TESTIMAGES):
            os.makedirs(EDUtilsPath.EDNA_TESTIMAGES)
        for strImageName in _listImageFileName:
            strImagePath = os.path.join(EDUtilsPath.EDNA_TESTIMAGES,
                                        strImageName)
            if (not os.path.exists(strImagePath)):
                EDVerbose.unitTest(
                    "Trying to download image %s, timeout set to %d s" %
                    (strImagePath, iMAX_DOWNLOAD_TIME))
                if "http_proxy" in os.environ:
                    dictProxies = {'http': os.environ["http_proxy"]}
                    proxy_handler = ProxyHandler(dictProxies)
                    opener = build_opener(proxy_handler).open
                else:
                    opener = urlopen

                timer = threading.Timer(iMAX_DOWNLOAD_TIME + 1,
                                        timeoutDuringDownload)
                timer.start()
                data = opener("%s/%s" % (self.URL_EDNA_SITE, strImageName),
                              data=None,
                              timeout=iMAX_DOWNLOAD_TIME).read()
                timer.cancel()

                try:
                    open(strImagePath, "wb").write(data)
                except IOError:
                    raise IOError(
                        "unable to write downloaded data to disk at %s" %
                        strImagePath)

                if os.path.exists(strImagePath):
                    EDVerbose.unitTest("Image %s successfully downloaded." %
                                       strImagePath)
                else:
                    raise RuntimeError(
                        "Could not automatically download test images %r! \n \
                                        If you are behind a firewall, please set the environment variable http_proxy. \n \
                                        Otherwise please try to download the images manually from \n \
                                        http://www.edna-site.org/data/tests/images"
                        % _listImageFileName)
Beispiel #6
0
def setup_urllib_proxies():
    global _urllib_proxies_installed, SYSTEM_PROXIES

    if _urllib_proxies_installed:
        return
    _urllib_proxies_installed = True
    if not SYSTEM_PROXIES:
        return
    proxies = {
        k: "%s://%s:%s" % (k, SYSTEM_PROXIES[k][0], SYSTEM_PROXIES[k][1])
        for k in SYSTEM_PROXIES
    }
    from urllib.request import ProxyHandler, build_opener, install_opener

    proxy_handler = ProxyHandler(proxies)
    opener = build_opener(proxy_handler)
    install_opener(opener)
Beispiel #7
0
 def _get_fresh_token(self, refresh_token, token_name, token_variant=None):
     ''' Refresh an expired X-VRT-Token, vrtlogin-at or vrtlogin-rt token '''
     token = None
     refresh_url = self._TOKEN_GATEWAY_URL + '/refreshtoken'
     cookie_value = 'vrtlogin-rt=' + refresh_token
     headers = {'Cookie': cookie_value}
     cookiejar = cookielib.CookieJar()
     opener = build_opener(HTTPCookieProcessor(cookiejar),
                           ProxyHandler(self._proxies))
     log(2, 'URL get: {url}', url=refresh_url)
     req = Request(refresh_url, headers=headers)
     opener.open(req)
     token = TokenResolver._create_token_dictionary(cookiejar, token_name)
     if token is not None:
         self._set_cached_token(token, token_variant)
         token = list(token.values())[0]
     return token
Beispiel #8
0
    def __init__(self):
        # error message
        self.error = None

        # establish connection
        self.session = build_opener()

        # add proxy handler if needed
        if config['proxy']:
            if any(config['proxies'].values()):
                self.session.add_handler(ProxyHandler(config['proxies']))
                logger.debug("Proxy is set!")
            else:
                self.error = "Proxy enabled, but not set!"

        # change user-agent
        self.session.addheaders = [('User-Agent', config['ua'])]
Beispiel #9
0
def test_proxy_2():
    aurl = 'https://www.rainasmoon.com/me.html'
    proxy = call_a_new_proxy()
    print('use proxy:' + proxy)
    proxy_handler = ProxyHandler({
        'http': 'http://' + proxy,
        'https': 'http://' + proxy,
    })

    opener = build_opener(proxy_handler)
    try:
        f = opener.open(aurl)
        r = f.read().decode('utf-8')

        print(r)
    except URLError as err:
        print('ERROR PROXY', err)
Beispiel #10
0
    def __init__(self):
        # establish connection
        #
        # make cookie
        cj = CookieJar()
        # if we wanna use https we mast add ssl=enable_ssl to cookie
        c = Cookie(0, 'ssl', "enable_ssl", None, False, '.nnm-club.me', True,
                   False, '/', True, False, None, 'ParserCookie', None, None,
                   None)
        cj.set_cookie(c)
        self.session = build_opener(HTTPCookieProcessor(cj))

        # avoid endless waiting
        self.blocked = False

        # add proxy handler if needed
        if self.config['proxy'] and any(self.config['proxies'].keys()):
            self.session.add_handler(ProxyHandler(self.config['proxies']))

        # change user-agent
        self.session.addheaders.pop()
        self.session.addheaders.append(('User-Agent', self.config['ua']))

        response = self._catch_error_request(self.url + 'login.php')
        if not self.blocked:
            code = re.search(r'code"\svalue="(.+?)"',
                             response.read().decode('cp1251'))[1]
            form_data = {
                "username": self.config['username'],
                "password": self.config['password'],
                "autologin": "******",
                "code": code,
                "login": "******"
            }
            # so we first encode keys to cp1251 then do default decode whole string
            data_encoded = urlencode(
                {k: v.encode('cp1251')
                 for k, v in form_data.items()}).encode()

            self._catch_error_request(self.url + 'login.php', data_encoded)

            if 'phpbb2mysql_4_sid' not in [cookie.name for cookie in cj]:
                logging.warning(
                    "we not authorized, please check your credentials")
            else:
                logging.info('We successfully authorized')
Beispiel #11
0
			def perform_custom_request(self, method, url, headers, data):
				result = None
				try:
					proxy_setting = Settings().get_string('downloadClient.httpsProxy')
					if proxy_setting:
						opener = build_opener(ProxyHandler({'https': proxy_setting}))
						install_opener(opener)

					if b"Content-Length" in headers:
						del headers[b"Content-Length"]

					req = PythonDownloadInstance.CustomRequest(pyNativeStr(url), data=data, headers=headers, method=pyNativeStr(method))
					result = urlopen(req)
				except HTTPError as he:
					result = he
				except URLError as e:
					core.BNSetErrorForDownloadInstance(self.handle, e.__class__.__name__)
					log.log_error(str(e))
					return None
				except:
					core.BNSetErrorForDownloadInstance(self.handle, "Unknown Exception!")
					log.log_error(traceback.format_exc())
					return None

				total_size = int(result.headers.get('content-length', 0))
				bytes_sent = 0
				while True:
					data = result.read(4096)
					if not data:
						break
					raw_bytes = (ctypes.c_ubyte * len(data)).from_buffer_copy(data)
					bytes_wrote = core.BNWriteDataForDownloadInstance(self.handle, raw_bytes, len(raw_bytes))
					if bytes_wrote != len(raw_bytes):
						core.BNSetErrorForDownloadInstance(self.handle, "Bytes written mismatch!")
						return None
					bytes_sent = bytes_sent + bytes_wrote
					continue_download = core.BNNotifyProgressForDownloadInstance(self.handle, bytes_sent, total_size)
					if continue_download is False:
						core.BNSetErrorForDownloadInstance(self.handle, "Download aborted!")
						return None

				if not bytes_sent:
					core.BNSetErrorForDownloadInstance(self.handle, "Received no data!")
					return None

				return DownloadInstance.Response(result.getcode(), result.headers, None)
Beispiel #12
0
def get_all_comment_with(url):
    detail_url = 'https://www.qiushibaike.com' + url
    print(detail_url)
    proxy_handle = ProxyHandler(proxies)
    request = Request(detail_url, headers=headers)
    opener = build_opener(proxy_handle)
    response = opener.open(request)
    code = response.read().decode()
    # print(code)
    pattern = re.compile(
        r'<a.*?class="userlogin".*?title="(.*?)">.*?<span class="body">(.*?)</span>',
        re.S)
    result = pattern.findall(code)
    print(result)
    print(
        '---------------------------------------------------------------------'
    )
Beispiel #13
0
    def get_page(self):
        # 设置访问网址的键值对
        params = {
            "g_tk": 776011230,
            "callback": "viewer_Callback",
            "t": 455717206,
            "topicId": "V13LWQts0VPfSK",
            "picKey": "NDR0Vq0mL2C1Fln8EiUvbAEAAAAAAAA!",
            "shootTime": '',
            "cmtOrder": 1,
            "fupdate": 1,
            "plat": "qzone",
            "source": "qzone",
            "cmtNum": 10,
            "likeNum": 5,
            "inCharset": "utf-8",
            "outCharset": "utf-8",
            "callbackFun": "viewer",
            "offset": 0,
            "number": 15,
            "uin": self.fang_wen_qq,
            "appid": 4,
            "isFirst": 1,
            "hostUin": self.qq,
            "sortOrder": 1,
            "showMode": 1,
            "need_private_comment": 1,
            "prevNum": 9,
            "postNum": 18,
            "_": str(time.time()).replace('.', '')[0:13]
        }
        url = self.base_url + urlencode(params)
        request = urllib.request.Request(url, headers=self.headers)
        ip_proxyhandler = ProxyHandler(self.ip)
        opener = build_opener(ip_proxyhandler)
        urllib.request.install_opener(opener)
        try:
            response = urllib.request.urlopen(request)
            if response.status == 200:
                html = response.read().decode('utf-8')
                print(html)
                # 调用parse_json来解析json数据
                # self.parse_json(html)

        except Exception as e:
            return None
Beispiel #14
0
 def do_socks(self, line):
     headers = ["Protocol", "Target", "Username", "AdminStatus", "Port"]
     url = "http://localhost:9090/ntlmrelayx/api/v1.0/relays"
     try:
         proxy_handler = ProxyHandler({})
         opener = build_opener(proxy_handler)
         response = Request(url)
         r = opener.open(response)
         result = r.read()
         items = json.loads(result)
     except Exception as e:
         logging.error("ERROR: %s" % str(e))
     else:
         if len(items) > 0:
             self.printTable(items, header=headers)
         else:
             logging.info('No Relays Available!')
Beispiel #15
0
 def get_total_page(self):
     #创建request对象
     request = urllib.request.Request(url=self.base_url,
                                      headers=self.headers)
     #创建IP代理对象
     ip_proxyhandler = ProxyHandler(self.ip)
     #创建opener对象
     opener = build_opener(ip_proxyhandler)
     #根据opener对象调用opener方法对网页发起请求.拿到源代码
     reponse = opener.open(request).read().decode('utf-8')
     #创建正则表达式规则
     patter_obj = re.compile(r'<span class="red">(.*?)</span>', re.S)
     #通过调用re.search这个函数从源代码中获取总页数
     #由于拿到的total_page是字符串,所以要使用int转换成整数
     total_page = int(re.search(patter_obj, reponse)[1])
     #再get_total_page这个函数中,调用get_user_and_comment这个函数,并且把total_page当做实参传递给get_user_and_comment这个函数的形参,这个时候,total_page这个形参就接受到了total_page这个实参
     self.get_user_and_comment(total_page)
Beispiel #16
0
 def __init__(self, proxy=""):
     # 生成cookie池
     self.cookie = http.cookiejar.CookieJar()
     self.opener = urllib.request.build_opener(
         HTTPCookieProcessor(self.cookie))
     if proxy:
         # proxy format '127.0.0.1:8087'
         # self.p = urllib2.ProxyHandler({'http': proxy})
         # self.proxy_opener = urllib2.build_opener(self.p)
         self.opener = urllib.request.build_opener(
             HTTPCookieProcessor(self.cookie), ProxyHandler({'http':
                                                             proxy}))
     urllib.request.install_opener(self.opener)
     self.headers = {
         'User-Agent':
         'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'
     }
Beispiel #17
0
 def download(self):
     print('Downloading...')
     headers = {
         'User-Agent':
         'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.95 Safari/537.36'
     }
     # proxy_params = {
     #     'http': 'http://*****:*****@zproxy.lum-superproxy.io:22225',
     #     'https': 'http://*****:*****@zproxy.lum-superproxy.io:22225'}
     password = '******'
     username = '******'
     proxy_url = 'http://gate.smartproxy.com'
     countries = ['us', 'ca', 'gb', 'de', 'fr', 'es', 'it', 'se', 'gr']
     country = countries[randint(0, len(countries) - 1)]
     proxy_url = f'http://{username}:{password}@{country}.smartproxy.com:23827'
     print('Proxy URL: ' + proxy_url)
     proxy_params = {'http': proxy_url}
     ua = UserAgent()
     headers = {'User-Agent': ua.random}
     request = Request(self.url, headers=headers)
     opener = build_opener()
     if proxy_params:
         if self.attempts != 3:
             opener.add_handler(ProxyHandler(proxy_params))
     try:
         response = opener.open(request)
         html = response.read()
     except (URLError, RemoteDisconnected, requests.ConnectionError,
             IncompleteRead, HTTPError) as e:
         print('Failing on {}'.format(self.url))
         print(e)
         print(type(e))
         if self.attempts > 4:
             html = '<html><title>Page Not Found</title></html>'
             return html
         # if hasattr(e, 'code'):
         #     if e.code >= 400 or e.code < 500:
         #         html = '<html><title>Page Not Found</title></html>'
         #         return html
         self.attempts += 1
         minutes = 0.5 * self.attempts
         print('Attempt nummber{}'.format(self.attempts))
         print('Retying in {} min'.format(minutes))
         time.sleep(minutes * 60)
         html = self.download()
     return html
Beispiel #18
0
def get_value_from_url(url, timeout_secs=5, return_errors=False):
    try:
        # http://www.webuseragents.com/my-user-agent
        user_agents = [
            'Mozilla/5.0 (X11; Linux x86_64; rv:61.0) Gecko/20100101 Firefox/61.0',
            'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36',
            'Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0'
        ]

        # Create proxy handler
        proxy = ProxyHandler({})
        auth = HTTPBasicAuthHandler()
        opener = build_opener(proxy, auth, HTTPHandler)
        install_opener(opener)

        # Create a request object with given url
        req = Request(url)

        # Get a random user agent and add that to the request object
        ua = choice(user_agents)
        req.add_header('User-Agent', ua)

        # Get the output of the URL
        output = urlopen(req, timeout=timeout_secs)

        # Decode to text
        txt = output.read().decode('utf-8')

        # Return the text
        return txt

    except (HTTPError, URLError) as error:
        err = 'ERROR: could not connect to {}: {}'.format(url, error)
        if return_errors:
            return err
        else:
            print((err))
            return None
    except timeout:
        err = 'ERROR: socket timeout on: {}'.format(url)
        if return_errors:
            return err
        else:
            print((err))
            return None
Beispiel #19
0
 def get_user_and_comment(self, total_page_1):
     # 遍历所有页,拿到每一页的页码,page_num指的就是每一页的数据。
     for page_num in range(1, total_page_1 + 1):
         print('正在爬取第{}页,请稍候'.format(page_num))
         # 拼接完整的url,让网址变成每一页的完整性
         abs_url = self.base_url + '?pn' + str(page_num)
         request = urllib.request.Request(url=abs_url, headers=self.headers)
         ip_proxy_handler = ProxyHandler(self.ip)
         opener = build_opener(ip_proxy_handler)
         response = opener.open(request).read().decode('utf-8')
         # 定义正则表达式规则从每一页中提取想要的数据信息
         pattern_obj = re.compile(
             r'<a.*?class="p_author_name.*?>(.*?)</a>.*?<div id="post_content.*?>(.*?)</div>',
             re.S)
         # 使用findall匹配到每一页中所有想要的数据,返回值是一个列表,里面嵌套元组,一个元组对应着一个用户的昵称还有回复信息。
         result_list = re.findall(pattern_obj, response)
         # 将匹配到的result_list的数据,放置到update_data这个函数中进行处理。
         self.update_data(result_list)
 def get_origin_page_code(self, page_num):
     # page_num表示当前的页码
     print('正在爬取第{}页数据'.format(page_num))
     # 将网址拼接为完整的网址
     abs_url = self.base_url + "page/" + str(page_num) + "/"
     # 创建request对象
     request = urllib.request.Request(abs_url, headers=self.headers)
     # 创建IP代理对象
     proxy_handler = ProxyHandler(self.ip)
     # 创建opener对象
     opener = build_opener(proxy_handler)
     try:
         response = opener.open(request)
     except Exception as e:
         print('链接失败的原因是:', e)
     else:
         html_string = response.read().decode('utf-8')
         return html_string
Beispiel #21
0
def test_ip(agreement, ip, port, number):
    url = 'http://httpbin.org/get'
    headers = {
        'User-Agent': UserAgent().chrome
    }
    request = Request(url, headers=headers)
    handler = ProxyHandler({agreement.lower(): ip + ':' + port})
    opener = build_opener(handler)
    print('\r正在测试第' + str(number) + '个ip', end=' ')
    try:
        response = opener.open(request, timeout=5)
        print(response.state_code)
        if response.state_code == 200:
            return True
        else:
            return False
    except:
        return False
Beispiel #22
0
def check_proxy():
    global proxy_info
    global result_ip
    connectinfo = json.loads(conn_info)
    proxy_info = (connectinfo['IP_Address_td']) + \
        ':' + (connectinfo['Port_td'])
    result_ip = connectinfo['IP_Address_td']
    opener = build_opener(ProxyHandler({'http': proxy_info}))
    opener.addheaders = [('User-agent', 'Mozilla/5.0')]
    install_opener(opener)
    try:
        with urlopen('http://ifconfig.co/ip', timeout=1) as n:
            result = (n.read().decode('UTF-8') + ':' +
                      connectinfo['Port_td']).replace('\n', '')
    except Exception as e:
        return('proxy_invalid')
    else:
        return(result)
def authenticate(mail_add, passwd):
    """Activate nicovideo."""
    proxy = ProxyHandler({
        'http': 'http://proxy.example.co.jp:xxxx',
        'https': 'http://proxy.example.xo.jp:xxxx'
    })
    post = {'mail_tel': mail_add, 'password': passwd}

    data = urlencode(post).encode('utf-8')
    cj = CookieJar()
    opener = build_opener(HTTPCookieProcessor(cj), proxy)
    res = opener.open('https://secure.nicovideo.jp/secure/login', data)

    if not 'user_session' in cj._cookies['.nicovideo.jp']['/']:
        raise Exception('PermissionError')

    else:
        return opener
    def _call_verify(self, params, proxy):
        """Performs a call to reCaptcha API with given params"""
        data = []
        if proxy:
            proxy_handler = ProxyHandler({'http': proxy})
            opener = build_opener(proxy_handler)
            install_opener(opener)

        try:
            response = urlopen('http://www.google.com/recaptcha/api/verify',
                               data=urlencode(params).encode('utf-8'))
            data = response.read().decode('utf-8').splitlines()
            response.close()
        except Exception as e:
            logger.error(str(e))
            raise ValidationError(self.errors['recaptcha-not-reachable'])

        return data
Beispiel #25
0
 def get_total_page(self):
     #创建request对象
     request = urllib.request.Request(url=self.base_url,
                                      headers=self.headers)
     #创建IP代理对象
     ip_proxyhandler = ProxyHandler(self.ip)
     #创建opener
     opener = build_opener(ip_proxyhandler)
     #根据opener对象调用open方法对网页发起请求,拿到源代码
     response = opener.open(request).read().decode('utf-8')
     #创建正则表达式匹配规则
     patter_obj = re.compile(r'<span class="red">(.*?)</span>', re.S)
     #通过调用re.search这个函数从源代码中提取总页数
     #由于拿到的total_page是字符串,所有要使用int转换成整数
     total_page = int(re.search(patter_obj, response)[1])
     #在get_total_page这个函数中,调用get_user_and_comment这个函数,并且把get_total_page中的total_page当做实参传递给
     #get_user_and_comment这个函数中的形参,这个时候total_page_1这个形参就接收到total_page这个实参的值。
     self.get_user_and_comment(total_page)
Beispiel #26
0
    def __init__(self,
                 package=None,
                 version=None,
                 component=None,
                 dscfile=None,
                 lp=None,
                 mirrors=(),
                 workdir='.',
                 quiet=False):
        "Can be initialised either using package, version or dscfile"
        assert ((package is not None and version is not None)
                or dscfile is not None)

        self.source = package
        self._lp = lp
        self.workdir = workdir
        self.quiet = quiet

        # Cached values:
        self._component = component
        self._dsc = None
        self._spph = None

        # State:
        self._dsc_fetched = False

        # Mirrors
        self._dsc_source = dscfile
        self.mirrors = list(mirrors)
        if self.distribution:
            self.masters = [
                UDTConfig.defaults['%s_MIRROR' % self.distribution.upper()]
            ]
        if dscfile is not None:
            if self.source is None:
                self.source = 'unknown'
            if version is None:
                version = 'unknown'

        self.version = debian.debian_support.Version(version)

        # uses default proxies from the environment
        proxy = ProxyHandler()
        self.url_opener = build_opener(proxy)
Beispiel #27
0
 def openUrl(self, proxyAddr):
     totalS = 0
     # 测试,取10次平均值
     for i in range(TEST_SPEED_COUNT):
         try:
             starttime = datetime.datetime.now()
             # 使用无验证的代理
             # proxy_handler = urllib.ProxyHandler({"http": proxyAddr})
             typestr = 'https' if proxyAddr.find('https') >= 0 else 'http'
             proxy_handler = ProxyHandler({typestr: proxyAddr})
             opener = build_opener(proxy_handler)
             ##// 延迟3秒
             opener.open(VALIDATOR_BASE_URL, timeout=3)
             endtime = datetime.datetime.now()
             logger.info(str(endtime - starttime) + "|" + proxyAddr)
             totalS += (endtime - starttime).seconds * 1000 + (
                 endtime - starttime).microseconds
         except error.URLError as e:
             # 输出错误信息,如果代理一直出错,该代理应该废弃
             logger.info(proxyAddr + "|" + str(e))
             totalS += 10 * 1000000
             if (str(e) ==
                     "<urlopen error (10054, 'Connection reset by peer')>"
                     or str(e)
                     == "<urlopen error (10060, 'Operation timed out')>"
                     or str(e)
                     == "<urlopen error (10061, 'Connection refused')>" or
                     str(e) == "<urlopen error (10065, 'No route to host')>"
                     or str(e) == "HTTP Error 502: Bad Gateway"
                     or str(e) == "HTTP Error 503: Service Unavailable"
                     or str(e) == "HTTP Error 504: Gateway Time-out"
                     or str(e) == "HTTP Error 404: Not Found"):
                 # openUrl(proxyAddr)
                 # return
                 totalS += 10 * 1000000
         except Exception as e:
             logger.info(proxyAddr + "|" + "httplib.BadStatusLine")
             # 出错就重试
             # openUrl(proxyAddr)
             # return
             totalS += 10 * 1000000
     logger.info(totalS)
     # 输出10次的平均值,单位秒
     return totalS / TEST_SPEED_COUNT / 1000000
Beispiel #28
0
def download5(url, user_agent='wswp', proxy=None, num_retries=2):
    """Download function with support for proxies"""
    print('Downloading:', url)
    headers = {'User-agent': user_agent}
    request = Request(url, headers=headers)
    opener = build_opener()
    if proxy:
        proxy_params = {urllib.parse.urlparse(url).scheme: proxy}
        opener.add_handler(ProxyHandler(proxy_params))
    try:
        html = opener.open(request).read()
    except URLError as e:
        print('Download error:', e.reason)
        html = None
        if num_retries > 0:
            if hasattr(e, 'code') and 500 <= e.code < 600:
                # retry 5XX HTTP errors
                html = download5(url, user_agent, proxy, num_retries - 1)
    return html
Beispiel #29
0
 def get(self, url):
     # request = build_opener()
     if self.proxies is not None:
         proxy_support = ProxyHandler(self.proxies)
         request = build_opener(proxy_support)
     else:
         request = build_opener()
     request.addheaders = [('X-OTX-API-KEY', self.key)]
     response = None
     try:
         response = request.open(url)
     except URLError as e:
         if e.code == 403:
             raise InvalidAPIKey("Invalid API Key")
         elif e.code == 400:
             raise BadRequest("Bad Request")
     data = response.read()
     json_data = json.loads(data)
     return json_data
Beispiel #30
0
    def get_file(self, fname, lan_ip=lan_gateway):
        """Download the file via a webproxy from webserver of OpenWrt routers.

        E.g. A device on the board's LAN
        """
        if not self.web_proxy:
            raise Exception('No web proxy defined to access board.')
        url = 'http://%s/TEMP' % lan_ip
        self.sendline("\nchmod a+r %s" % fname)
        self.expect('chmod ')
        self.expect(self.prompt)
        self.sendline("ln -sf %s /www/TEMP" % fname)
        self.expect(self.prompt)
        proxy = ProxyHandler({'http': self.web_proxy + ':8080'})
        opener = build_opener(proxy)
        install_opener(opener)
        print("\nAttempting download of %s via proxy %s" %
              (url, self.web_proxy + ':8080'))
        return urlopen(url, timeout=30)