예제 #1
0
    def urlopen(self,
                url,
                timeout=30,
                data=None,
                headers=None,
                files=None,
                show_error=True):
        url = urllib2.quote(ss(url), safe="%/:=&?~#+!$,;'@()*[]")

        if not headers: headers = {}
        if not data: data = {}

        # Fill in some headers
        parsed_url = urlparse(url)
        host = '%s%s' % (parsed_url.hostname,
                         (':' +
                          str(parsed_url.port) if parsed_url.port else ''))

        headers['Referer'] = headers.get('Referer',
                                         '%s://%s' % (parsed_url.scheme, host))
        headers['Host'] = headers.get('Host', host)
        headers['User-Agent'] = headers.get('User-Agent', self.user_agent)
        headers['Accept-encoding'] = headers.get('Accept-encoding', 'gzip')
        headers['Connection'] = headers.get('Connection', 'keep-alive')
        headers['Cache-Control'] = headers.get('Cache-Control', 'max-age=0')

        r = self.http_opener

        # Don't try for failed requests
        if self.http_failed_disabled.get(host, 0) > 0:
            if self.http_failed_disabled[host] > (time.time() - 900):
                log.info2(
                    'Disabled calls to %s for 15 minutes because so many failed requests.',
                    host)
                if not show_error:
                    raise Exception(
                        'Disabled calls to %s for 15 minutes because so many failed requests'
                    )
                else:
                    return ''
            else:
                del self.http_failed_request[host]
                del self.http_failed_disabled[host]

        self.wait(host)
        try:

            kwargs = {
                'headers': headers,
                'data': data if len(data) > 0 else None,
                'timeout': timeout,
                'files': files,
            }
            method = 'post' if len(data) > 0 or files else 'get'

            log.info('Opening url: %s %s, data: %s',
                     (method, url, [x for x in data.keys()] if isinstance(
                         data, dict) else 'with data'))
            response = r.request(method, url, verify=False, **kwargs)

            if response.status_code == requests.codes.ok:
                data = response.content
            else:
                response.raise_for_status()

            self.http_failed_request[host] = 0
        except (IOError, MaxRetryError, Timeout):
            if show_error:
                log.error('Failed opening url in %s: %s %s',
                          (self.getName(), url, traceback.format_exc(0)))

            # Save failed requests by hosts
            try:
                if not self.http_failed_request.get(host):
                    self.http_failed_request[host] = 1
                else:
                    self.http_failed_request[host] += 1

                    # Disable temporarily
                    if self.http_failed_request[host] > 5 and not isLocalIP(
                            host):
                        self.http_failed_disabled[host] = time.time()

            except:
                log.debug('Failed logging failed requests for %s: %s',
                          (url, traceback.format_exc()))

            raise

        self.http_last_use[host] = time.time()

        return data
예제 #2
0
    def urlopen(self,
                url,
                timeout=30,
                params=None,
                headers=None,
                opener=None,
                multipart=False,
                show_error=True):
        url = urllib2.quote(ss(url), safe="%/:=&?~#+!$,;'@()*[]")

        if not headers: headers = {}
        if not params: params = {}

        # Fill in some headers
        parsed_url = urlparse(url)
        host = '%s%s' % (parsed_url.hostname,
                         (':' +
                          str(parsed_url.port) if parsed_url.port else ''))

        headers['Referer'] = headers.get('Referer',
                                         '%s://%s' % (parsed_url.scheme, host))
        headers['Host'] = headers.get('Host', host)
        headers['User-Agent'] = headers.get('User-Agent', self.user_agent)
        headers['Accept-encoding'] = headers.get('Accept-encoding', 'gzip')
        headers['Connection'] = headers.get('Connection', 'keep-alive')
        headers['Cache-Control'] = headers.get('Cache-Control', 'max-age=0')

        # Don't try for failed requests
        if self.http_failed_disabled.get(host, 0) > 0:
            if self.http_failed_disabled[host] > (time.time() - 900):
                log.info2(
                    'Disabled calls to %s for 15 minutes because so many failed requests.',
                    host)
                if not show_error:
                    raise Exception(
                        'Disabled calls to %s for 15 minutes because so many failed requests'
                    )
                else:
                    return ''
            else:
                del self.http_failed_request[host]
                del self.http_failed_disabled[host]

        self.wait(host)
        try:

            # Make sure opener has the correct headers
            if opener:
                opener.add_headers = headers

            if multipart:
                log.info('Opening multipart url: %s, params: %s',
                         (url, [x for x in params.iterkeys()] if isinstance(
                             params, dict) else 'with data'))
                request = urllib2.Request(url, params, headers)

                if opener:
                    opener.add_handler(MultipartPostHandler())
                else:
                    cookies = cookielib.CookieJar()
                    opener = urllib2.build_opener(
                        urllib2.HTTPCookieProcessor(cookies),
                        MultipartPostHandler)

                response = opener.open(request, timeout=timeout)
            else:
                log.info('Opening url: %s, params: %s',
                         (url, [x for x in params.iterkeys()] if isinstance(
                             params, dict) else 'with data'))

                if isinstance(params, (str, unicode)) and len(params) > 0:
                    data = params
                else:
                    data = tryUrlencode(params) if len(params) > 0 else None

                request = urllib2.Request(url, data, headers)

                if opener:
                    response = opener.open(request, timeout=timeout)
                else:
                    response = urllib2.urlopen(request, timeout=timeout)

            # unzip if needed
            if response.info().get('Content-Encoding') == 'gzip':
                buf = StringIO(response.read())
                f = gzip.GzipFile(fileobj=buf)
                data = f.read()
                f.close()
            else:
                data = response.read()
            response.close()

            self.http_failed_request[host] = 0
        except IOError:
            if show_error:
                log.error('Failed opening url in %s: %s %s',
                          (self.getName(), url, traceback.format_exc(1)))

            # Save failed requests by hosts
            try:
                if not self.http_failed_request.get(host):
                    self.http_failed_request[host] = 1
                else:
                    self.http_failed_request[host] += 1

                    # Disable temporarily
                    if self.http_failed_request[host] > 5 and not isLocalIP(
                            host):
                        self.http_failed_disabled[host] = time.time()

            except:
                log.debug('Failed logging failed requests for %s: %s',
                          (url, traceback.format_exc()))

            raise

        self.http_last_use[host] = time.time()

        return data
예제 #3
0
    def urlopen(self, url, timeout = 30, data = None, headers = None, files = None, show_error = True, stream = False):
        url = quote(ss(url), safe = "%/:=&?~#+!$,;'@()*[]")

        if not headers: headers = {}
        if not data: data = {}

        # Fill in some headers
        parsed_url = urlparse(url)
        host = '%s%s' % (parsed_url.hostname, (':' + str(parsed_url.port) if parsed_url.port else ''))

        headers['Referer'] = headers.get('Referer', '%s://%s' % (parsed_url.scheme, host))
        headers['Host'] = headers.get('Host', None)
        headers['User-Agent'] = headers.get('User-Agent', self.user_agent)
        headers['Accept-encoding'] = headers.get('Accept-encoding', 'gzip')
        headers['Connection'] = headers.get('Connection', 'keep-alive')
        headers['Cache-Control'] = headers.get('Cache-Control', 'max-age=0')

        use_proxy = Env.setting('use_proxy')
        proxy_url = None

        if use_proxy:
            proxy_server = Env.setting('proxy_server')
            proxy_username = Env.setting('proxy_username')
            proxy_password = Env.setting('proxy_password')

            if proxy_server:
                loc = "{0}:{1}@{2}".format(proxy_username, proxy_password, proxy_server) if proxy_username else proxy_server
                proxy_url = {
                    "http": "http://"+loc,
                    "https": "https://"+loc,
                }
            else:
                proxy_url = getproxies()

        r = Env.get('http_opener')

        # Don't try for failed requests
        if self.http_failed_disabled.get(host, 0) > 0:
            if self.http_failed_disabled[host] > (time.time() - 900):
                log.info2('Disabled calls to %s for 15 minutes because so many failed requests.', host)
                if not show_error:
                    raise Exception('Disabled calls to %s for 15 minutes because so many failed requests' % host)
                else:
                    return ''
            else:
                del self.http_failed_request[host]
                del self.http_failed_disabled[host]

        self.wait(host, url)
        status_code = None
        try:

            kwargs = {
                'headers': headers,
                'data': data if len(data) > 0 else None,
                'timeout': timeout,
                'files': files,
                'verify': False, #verify_ssl, Disable for now as to many wrongly implemented certificates..
                'stream': stream,
                'proxies': proxy_url,
            }
            method = 'post' if len(data) > 0 or files else 'get'

            log.info('Opening url: %s %s, data: %s', (method, url, [x for x in data.keys()] if isinstance(data, dict) else 'with data'))
            response = r.request(method, url, **kwargs)

            status_code = response.status_code
            if response.status_code == requests.codes.ok:
                data = response if stream else response.content
            else:
                response.raise_for_status()

            self.http_failed_request[host] = 0
        except (IOError, MaxRetryError, Timeout):
            if show_error:
                log.error('Failed opening url in %s: %s %s', (self.getName(), url, traceback.format_exc(0)))

            # Save failed requests by hosts
            try:

                # To many requests
                if status_code in [429]:
                    self.http_failed_request[host] = 1
                    self.http_failed_disabled[host] = time.time()

                if not self.http_failed_request.get(host):
                    self.http_failed_request[host] = 1
                else:
                    self.http_failed_request[host] += 1

                    # Disable temporarily
                    if self.http_failed_request[host] > 5 and not isLocalIP(host):
                        self.http_failed_disabled[host] = time.time()

            except:
                log.debug('Failed logging failed requests for %s: %s', (url, traceback.format_exc()))

            raise

        self.http_last_use[host] = time.time()

        return data
예제 #4
0
    def urlopen(self, url, timeout = 30, data = None, headers = None, files = None, show_error = True, verify_ssl = True):
        url = urllib2.quote(ss(url), safe = "%/:=&?~#+!$,;'@()*[]")

        if not headers: headers = {}
        if not data: data = {}

        # Fill in some headers
        parsed_url = urlparse(url)
        host = '%s%s' % (parsed_url.hostname, (':' + str(parsed_url.port) if parsed_url.port else ''))

        headers['Referer'] = headers.get('Referer', '%s://%s' % (parsed_url.scheme, host))
        headers['Host'] = headers.get('Host', host)
        headers['User-Agent'] = headers.get('User-Agent', self.user_agent)
        headers['Accept-encoding'] = headers.get('Accept-encoding', 'gzip')
        headers['Connection'] = headers.get('Connection', 'keep-alive')
        headers['Cache-Control'] = headers.get('Cache-Control', 'max-age=0')

        r = self.http_opener

        # Don't try for failed requests
        if self.http_failed_disabled.get(host, 0) > 0:
            if self.http_failed_disabled[host] > (time.time() - 900):
                log.info2('Disabled calls to %s for 15 minutes because so many failed requests.', host)
                if not show_error:
                    raise Exception('Disabled calls to %s for 15 minutes because so many failed requests')
                else:
                    return ''
            else:
                del self.http_failed_request[host]
                del self.http_failed_disabled[host]

        self.wait(host)
        try:

            kwargs = {
                'headers': headers,
                'data': data if len(data) > 0 else None,
                'timeout': timeout,
                'files': files,
                'verify': verify_ssl,
            }
            method = 'post' if len(data) > 0 or files else 'get'

            log.info('Opening url: %s %s, data: %s', (method, url, [x for x in data.keys()] if isinstance(data, dict) else 'with data'))
            response = r.request(method, url, **kwargs)

            if response.status_code == requests.codes.ok:
                data = response.content
            else:
                response.raise_for_status()

            self.http_failed_request[host] = 0
        except (IOError, MaxRetryError, Timeout):
            if show_error:
                log.error('Failed opening url in %s: %s %s', (self.getName(), url, traceback.format_exc(0)))

            # Save failed requests by hosts
            try:
                if not self.http_failed_request.get(host):
                    self.http_failed_request[host] = 1
                else:
                    self.http_failed_request[host] += 1

                    # Disable temporarily
                    if self.http_failed_request[host] > 5 and not isLocalIP(host):
                        self.http_failed_disabled[host] = time.time()

            except:
                log.debug('Failed logging failed requests for %s: %s', (url, traceback.format_exc()))

            raise

        self.http_last_use[host] = time.time()

        return data
예제 #5
0
    def urlopen(self,
                url,
                timeout=30,
                data=None,
                headers=None,
                files=None,
                show_error=True,
                stream=False):
        url = quote(ss(url), safe="%/:=&?~#+!$,;'@()*[]")

        if not headers: headers = {}
        if not data: data = {}

        # Fill in some headers
        parsed_url = urlparse(url)
        host = '%s%s' % (parsed_url.hostname,
                         (':' +
                          str(parsed_url.port) if parsed_url.port else ''))

        headers['Referer'] = headers.get('Referer',
                                         '%s://%s' % (parsed_url.scheme, host))
        headers['Host'] = headers.get('Host', None)
        headers['User-Agent'] = headers.get('User-Agent', self.user_agent)
        headers['Accept-encoding'] = headers.get('Accept-encoding', 'gzip')
        headers['Connection'] = headers.get('Connection', 'keep-alive')
        headers['Cache-Control'] = headers.get('Cache-Control', 'max-age=0')

        if headers.get('Authorization', '') != '':
            headers['Authorization'] = headers.get('Authorization', '')

        use_proxy = Env.setting('use_proxy')
        proxy_url = None

        if use_proxy:
            proxy_server = Env.setting('proxy_server')
            proxy_username = Env.setting('proxy_username')
            proxy_password = Env.setting('proxy_password')

            if proxy_server:
                loc = "{0}:{1}@{2}".format(
                    proxy_username, proxy_password,
                    proxy_server) if proxy_username else proxy_server
                proxy_url = {
                    "http": "http://" + loc,
                    "https": "https://" + loc,
                }
            else:
                proxy_url = getproxies()

        r = Env.get('http_opener')

        # Don't try for failed requests
        if self.http_failed_disabled.get(host, 0) > 0:
            if self.http_failed_disabled[host] > (time.time() - 900):
                log.info2(
                    'Disabled calls to %s for 15 minutes because so many failed requests.',
                    host)
                if not show_error:
                    raise Exception(
                        'Disabled calls to %s for 15 minutes because so many failed requests'
                        % host)
                else:
                    return ''
            else:
                del self.http_failed_request[host]
                del self.http_failed_disabled[host]

        self.wait(host, url)
        status_code = None
        try:

            kwargs = {
                'headers': headers,
                'data': data if len(data) > 0 else None,
                'timeout': timeout,
                'files': files,
                'verify':
                False,  #verify_ssl, Disable for now as to many wrongly implemented certificates..
                'stream': stream,
                'proxies': proxy_url,
            }
            method = 'post' if len(data) > 0 or files else 'get'

            log.info('Opening url: %s %s, data: %s',
                     (method, url, [x for x in data.keys()] if isinstance(
                         data, dict) else 'with data'))
            response = r.request(method, url, **kwargs)

            status_code = response.status_code
            if response.status_code == requests.codes.ok:
                data = response if stream else response.content
            else:
                response.raise_for_status()

            self.http_failed_request[host] = 0
        except (IOError, MaxRetryError, Timeout):
            if show_error:
                log.error('Failed opening url in %s: %s %s',
                          (self.getName(), url, traceback.format_exc(0)))

            # Save failed requests by hosts
            try:

                # To many requests
                if status_code in [429]:
                    self.http_failed_request[host] = 1
                    self.http_failed_disabled[host] = time.time()

                if not self.http_failed_request.get(host):
                    self.http_failed_request[host] = 1
                else:
                    self.http_failed_request[host] += 1

                    # Disable temporarily
                    if self.http_failed_request[host] > 5 and not isLocalIP(
                            host):
                        self.http_failed_disabled[host] = time.time()

            except:
                log.debug('Failed logging failed requests for %s: %s',
                          (url, traceback.format_exc()))

            raise

        self.http_last_use[host] = time.time()

        return data
예제 #6
0
    def urlopen(self, url, timeout=30, data=None, headers=None, files=None, show_error=True):
        url = urllib2.quote(ss(url), safe="%/:=&?~#+!$,;'@()*[]")

        if not headers:
            headers = {}
        if not data:
            data = {}

        # Fill in some headers
        parsed_url = urlparse(url)
        host = "%s%s" % (parsed_url.hostname, (":" + str(parsed_url.port) if parsed_url.port else ""))

        headers["Referer"] = headers.get("Referer", "%s://%s" % (parsed_url.scheme, host))
        headers["Host"] = headers.get("Host", host)
        headers["User-Agent"] = headers.get("User-Agent", self.user_agent)
        headers["Accept-encoding"] = headers.get("Accept-encoding", "gzip")
        headers["Connection"] = headers.get("Connection", "keep-alive")
        headers["Cache-Control"] = headers.get("Cache-Control", "max-age=0")

        r = self.http_opener

        # Don't try for failed requests
        if self.http_failed_disabled.get(host, 0) > 0:
            if self.http_failed_disabled[host] > (time.time() - 900):
                log.info2("Disabled calls to %s for 15 minutes because so many failed requests.", host)
                if not show_error:
                    raise Exception("Disabled calls to %s for 15 minutes because so many failed requests")
                else:
                    return ""
            else:
                del self.http_failed_request[host]
                del self.http_failed_disabled[host]

        self.wait(host)
        try:

            kwargs = {"headers": headers, "data": data if len(data) > 0 else None, "timeout": timeout, "files": files}
            method = "post" if len(data) > 0 or files else "get"

            log.info(
                "Opening url: %s %s, data: %s",
                (method, url, [x for x in data.keys()] if isinstance(data, dict) else "with data"),
            )
            response = r.request(method, url, verify=False, **kwargs)

            if response.status_code == requests.codes.ok:
                data = response.content
            else:
                response.raise_for_status()

            self.http_failed_request[host] = 0
        except (IOError, MaxRetryError, Timeout):
            if show_error:
                log.error("Failed opening url in %s: %s %s", (self.getName(), url, traceback.format_exc(0)))

            # Save failed requests by hosts
            try:
                if not self.http_failed_request.get(host):
                    self.http_failed_request[host] = 1
                else:
                    self.http_failed_request[host] += 1

                    # Disable temporarily
                    if self.http_failed_request[host] > 5 and not isLocalIP(host):
                        self.http_failed_disabled[host] = time.time()

            except:
                log.debug("Failed logging failed requests for %s: %s", (url, traceback.format_exc()))

            raise

        self.http_last_use[host] = time.time()

        return data
예제 #7
0
    def urlopen(self, url, timeout = 30, params = None, headers = None, opener = None, multipart = False, show_error = True):
        url = urllib2.quote(ss(url), safe = "%/:=&?~#+!$,;'@()*[]")

        if not headers: headers = {}
        if not params: params = {}

        # Fill in some headers
        parsed_url = urlparse(url)
        host = '%s%s' % (parsed_url.hostname, (':' + str(parsed_url.port) if parsed_url.port else ''))

        headers['Referer'] = headers.get('Referer', '%s://%s' % (parsed_url.scheme, host))
        headers['Host'] = headers.get('Host', host)
        headers['User-Agent'] = headers.get('User-Agent', self.user_agent)
        headers['Accept-encoding'] = headers.get('Accept-encoding', 'gzip')
        headers['Connection'] = headers.get('Connection', 'keep-alive')
        headers['Cache-Control'] = headers.get('Cache-Control', 'max-age=0')

        # Don't try for failed requests
        if self.http_failed_disabled.get(host, 0) > 0:
            if self.http_failed_disabled[host] > (time.time() - 900):
                log.info2('Disabled calls to %s for 15 minutes because so many failed requests.', host)
                if not show_error:
                    raise Exception('Disabled calls to %s for 15 minutes because so many failed requests')
                else:
                    return ''
            else:
                del self.http_failed_request[host]
                del self.http_failed_disabled[host]

        self.wait(host)
        try:

            # Make sure opener has the correct headers
            if opener:
                opener.add_headers = headers

            if multipart:
                log.info('Opening multipart url: %s, params: %s', (url, [x for x in params.iterkeys()] if isinstance(params, dict) else 'with data'))
                request = urllib2.Request(url, params, headers)

                if opener:
                    opener.add_handler(MultipartPostHandler())
                else:
                    cookies = cookielib.CookieJar()
                    opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookies), MultipartPostHandler)

                response = opener.open(request, timeout = timeout)
            else:
                log.info('Opening url: %s, params: %s', (url, [x for x in params.iterkeys()] if isinstance(params, dict) else 'with data'))

                if isinstance(params, (str, unicode)) and len(params) > 0:
                    data = params
                else:
                    data = tryUrlencode(params) if len(params) > 0 else None

                request = urllib2.Request(url, data, headers)

                if opener:
                    response = opener.open(request, timeout = timeout)
                else:
                    response = urllib2.urlopen(request, timeout = timeout)

            # unzip if needed
            if response.info().get('Content-Encoding') == 'gzip':
                buf = StringIO(response.read())
                f = gzip.GzipFile(fileobj = buf)
                data = f.read()
                f.close()
            else:
                data = response.read()
            response.close()

            self.http_failed_request[host] = 0
        except IOError:
            if show_error:
                log.error('Failed opening url in %s: %s %s', (self.getName(), url, traceback.format_exc(1)))

            # Save failed requests by hosts
            try:
                if not self.http_failed_request.get(host):
                    self.http_failed_request[host] = 1
                else:
                    self.http_failed_request[host] += 1

                    # Disable temporarily
                    if self.http_failed_request[host] > 5 and not isLocalIP(host):
                        self.http_failed_disabled[host] = time.time()

            except:
                log.debug('Failed logging failed requests for %s: %s', (url, traceback.format_exc()))

            raise

        self.http_last_use[host] = time.time()

        return data