Example #1
0
    def _fetch(self, url, method='GET', params=None, headers=None, body='', max_redirects=5, content_parser=None):
        """
        Fetches a URL.
        
        :param str url:
            The URL to fetch.
            
        :param str method:
            HTTP method of the request.
            
        :param dict params:
            Dictionary of request parameters.
            
        :param dict headers:
            HTTP headers of the request.
            
        :param str body:
            Body of ``POST``, ``PUT`` and ``PATCH`` requests.
            
        :param int max_redirects:
            Number of maximum HTTP redirects to follow.
            
        :param function content_parser:
            A callable to be used to parse the :attr:`.Response.data` from :attr:`.Response.content`.
        """
        params = params or {}
        params.update(self.access_params)
        
        headers = headers or {}
        headers.update(self.access_headers)
        
        scheme, host, path, query, fragment = parse.urlsplit(url)
        body = parse.parse_qsl(body)
        query = parse.urlencode(params)

        if method in ('POST', 'PUT', 'PATCH'):
            if not body:
                body = params
                query = ''
                headers.update({'Content-Type': 'application/x-www-form-urlencoded'})
        elif method in ('GET'):
           body = []
        print(path, query)
        request_path = parse.urlunsplit(('', '', path or '', query or '', ''))
        
        self._log(logging.DEBUG, ' \u251C\u2500 host: {0}'.format(host))
        self._log(logging.DEBUG, ' \u251C\u2500 path: {0}'.format(request_path))
        self._log(logging.DEBUG, ' \u251C\u2500 method: {0}'.format(method))
        self._log(logging.DEBUG, ' \u251C\u2500 body: {0}'.format(str(body)))
        self._log(logging.DEBUG, ' \u251C\u2500 params: {0}'.format(params))
        self._log(logging.DEBUG, ' \u2514\u2500 headers: {0}'.format(headers))
        
        # Connect
        try:
            url = parse.urlunsplit((scheme.lower(), host, request_path, '', ''))
            response = yield from aiohttp.request(method,
                                                  url,
                                                  data=body,
                                                  headers=headers)
            response.page_content = yield from response.read()
        except Exception as e:
            raise FetchError('Could not connect!',
                             original_message=e.args[0],
                             url=url)
        
        location = response.headers.get('Location')
        
        if response.status in (300, 301, 302, 303, 307) and location:
            if location == url:
                raise FetchError('Url redirects to itself!',
                                 url=location,
                                 status=response.status)
                
            elif max_redirects > 0:
                remaining_redirects = max_redirects - 1
                
                self._log(logging.DEBUG, 'Redirecting to {0}'.format(url))
                self._log(logging.DEBUG, 'Remaining redirects: '.format(remaining_redirects))
                
                # Call this method again.
                response = self._fetch(url=location,
                                      params=params,
                                      method=method,
                                      headers=headers,
                                      max_redirects=remaining_redirects)
                
            else:
                raise FetchError('Max redirects reached!',
                                 url=location,
                                 status=response.status)
        else:
            self._log(logging.DEBUG, 'Got response:')
            self._log(logging.DEBUG, ' \u251C\u2500 url: {0}'.format(url))
            self._log(logging.DEBUG, ' \u251C\u2500 status: {0}'.format(response.status))
            self._log(logging.DEBUG, ' \u2514\u2500 headers: {0}'.format(list(response.headers.items())))
                
        return authomatic.core.Response(response, content_parser)
Example #2
0
    def _fetch(cls,
               url,
               method='GET',
               params=None,
               headers=None,
               body='',
               max_redirects=5,
               content_parser=None):
        """
        Fetches a URL.
        
        :param str url:
            The URL to fetch.
            
        :param str method:
            HTTP method of the request.
            
        :param dict params:
            Dictionary of request parameters.
            
        :param dict headers:
            HTTP headers of the request.
            
        :param str body:
            Body of ``POST``, ``PUT`` and ``PATCH`` requests.
            
        :param int max_redirects:
            Number of maximum HTTP redirects to follow.
            
        :param function content_parser:
            A callable to be used to parse the :attr:`.Response.data` from :attr:`.Response.content`.
        """

        params = params or {}
        headers = headers or {}

        scheme, host, path, query, fragment = urlparse.urlsplit(url)

        query = urllib.urlencode(params)

        if method in ('POST', 'PUT', 'PATCH'):
            if not body:
                # Put querystring to body
                body = query
                query = None
                headers.update(
                    {'Content-Type': 'application/x-www-form-urlencoded'})

        request_path = urlparse.urlunsplit((None, None, path, query, None))

        cls._log(logging.DEBUG, u' \u251C\u2500 host: {}'.format(host))
        cls._log(logging.DEBUG, u' \u251C\u2500 path: {}'.format(request_path))
        cls._log(logging.DEBUG, u' \u251C\u2500 method: {}'.format(method))
        cls._log(logging.DEBUG, u' \u251C\u2500 body: {}'.format(body))
        cls._log(logging.DEBUG, u' \u251C\u2500 params: {}'.format(params))
        cls._log(logging.DEBUG, u' \u2514\u2500 headers: {}'.format(headers))

        # Connect
        if scheme.lower() == 'https':
            connection = httplib.HTTPSConnection(host)
        else:
            connection = httplib.HTTPConnection(host)

        try:
            connection.request(method, request_path, body, headers)
        except Exception as e:
            raise FetchError('Could not connect!',
                             original_message=e.message,
                             url=request_path)

        response = connection.getresponse()
        location = response.getheader('Location')

        if response.status in (300, 301, 302, 303, 307) and location:
            if location == url:
                raise FetchError('Url redirects to itself!',
                                 url=location,
                                 status=response.status)

            elif max_redirects > 0:
                remaining_redirects = max_redirects - 1

                cls._log(logging.DEBUG, 'Redirecting to {}'.format(url))
                cls._log(logging.DEBUG,
                         'Remaining redirects: '.format(remaining_redirects))

                # Call this method again.
                response = cls._fetch(url=location,
                                      params=params,
                                      method=method,
                                      headers=headers,
                                      max_redirects=remaining_redirects)

            else:
                raise FetchError('Max redirects reached!',
                                 url=location,
                                 status=response.status)
        else:
            cls._log(logging.DEBUG, u'Got response:')
            cls._log(logging.DEBUG, u' \u251C\u2500 url: {}'.format(url))
            cls._log(logging.DEBUG,
                     u' \u251C\u2500 status: {}'.format(response.status))
            cls._log(
                logging.DEBUG,
                u' \u2514\u2500 headers: {}'.format(response.getheaders()))

        return authomatic.core.Response(response, content_parser)
Example #3
0
def custom_fetch(self,
                 url,
                 method='GET',
                 params=None,
                 headers=None,
                 body='',
                 max_redirects=5,
                 content_parser=None):  # NOQA
    params = params or {}
    params.update(self.access_params)

    headers = headers or {}
    headers.update(self.access_headers)

    scheme, host, path, query, fragment = urlparse.urlsplit(url)
    query = urllib.urlencode(params)

    if method in ('POST', 'PUT', 'PATCH'):
        if not body:
            # Put querystring to body
            body = query
            query = None
            headers.update(
                {'Content-Type': 'application/x-www-form-urlencoded'})

    request_path = urlparse.urlunsplit((None, None, path, query, None))

    self._log(logging.DEBUG, u' \u251C\u2500 host: {0}'.format(host))
    self._log(logging.DEBUG, u' \u251C\u2500 path: {0}'.format(request_path))
    self._log(logging.DEBUG, u' \u251C\u2500 method: {0}'.format(method))
    self._log(logging.DEBUG, u' \u251C\u2500 body: {0}'.format(body))
    self._log(logging.DEBUG, u' \u251C\u2500 params: {0}'.format(params))
    self._log(logging.DEBUG, u' \u2514\u2500 headers: {0}'.format(headers))

    # Connect
    proxy = os.environ.get('http_proxy', None)

    if proxy is None:
        if scheme.lower() == 'https':
            connection = httplib.HTTPSConnection(host)
        else:
            connection = httplib.HTTPConnection(host)
    else:
        proxy_scheme, proxy_host, proxy_path, _, _ = urlparse.urlsplit(proxy)
        proxy_host, proxy_port = proxy_host.split(':')

        self._log(
            logging.INFO, u'Using proxy on %s://%s:%s' %
            (proxy_scheme, proxy_host, proxy_port))

        if proxy_scheme.lower() == 'https':
            connection = httplib.HTTPSConnection(proxy_host, proxy_port)
        else:
            connection = httplib.HTTPConnection(proxy_host, proxy_port)

        request_path = "%s://%s" % (scheme, (join(host.rstrip('/'),
                                                  request_path.lstrip('/'))))

    try:
        connection.request(method, request_path, body, headers)
    except Exception as e:
        raise FetchError('Could not connect!',
                         original_message=e.message,
                         url=request_path)

    response = connection.getresponse()
    location = response.getheader('Location')

    if response.status in (300, 301, 302, 303, 307) and location:
        if location == url:
            raise FetchError('Url redirects to itself!',
                             url=location,
                             status=response.status)

        elif max_redirects > 0:
            remaining_redirects = max_redirects - 1

            self._log(logging.DEBUG, 'Redirecting to {0}'.format(url))
            self._log(logging.DEBUG,
                      'Remaining redirects: {0}'.format(remaining_redirects))

            # Call this method again.
            response = self._fetch(url=location,
                                   params=params,
                                   method=method,
                                   headers=headers,
                                   max_redirects=remaining_redirects)

        else:
            raise FetchError('Max redirects reached!',
                             url=location,
                             status=response.status)
    else:
        self._log(logging.DEBUG, u'Got response:')
        self._log(logging.DEBUG, u' \u251C\u2500 url: {0}'.format(url))
        self._log(logging.DEBUG,
                  u' \u251C\u2500 status: {0}'.format(response.status))
        self._log(logging.DEBUG,
                  u' \u2514\u2500 headers: {0}'.format(response.getheaders()))

    return authomatic.core.Response(response, content_parser)