Example #1
0
    def http_get(self, url, headers=None, timeout=None, cache_enabled=True):
        """ Returns a requests.models.Response object or raises exception
            on failure. Will cache requests to the same URL. """

        from totalimpact.api import app

        # first thing is to try to retrieve from cache
        # use the cache if the config parameter is set and the arg allows it
        use_cache = app.config["CACHE_ENABLED"] and cache_enabled
        cache_data = None
        if use_cache:
            c = Cache(self.max_cache_duration)
            cache_data = c.get_cache_entry(url)
        if cache_data:
            # Return a stripped down equivalent of requests.models.Response
            # We don't store headers or other information here. If we need
            # that later, we can add it
            class CachedResponse:
                pass
            r = CachedResponse()
            r.status_code = cache_data['status_code']
            r.text = cache_data['text']
            return r
            
        # ensure that a user-agent string is set
        if headers is None:
            headers = {}
        
        # make the request        
        try:
            from totalimpact.api import app
            proxies = None
            if app.config["PROXY"]:
                proxies = {'http' : app.config["PROXY"], 'https' : app.config["PROXY"]}
            r = requests.get(url, headers=headers, timeout=timeout, proxies=proxies)
        except requests.exceptions.Timeout as e:
            logger.debug("Attempt to connect to provider timed out during GET on " + url)
            raise ProviderTimeout("Attempt to connect to provider timed out during GET on " + url, e)
        except requests.exceptions.RequestException as e:
            logger.info("RequestException during GET on: " + url)
            raise ProviderHttpError("RequestException during GET on: " + url, e)
        
        # cache the response and return
        if use_cache:
            c.set_cache_entry(url, {'text' : r.text, 'status_code' : r.status_code})
        return r
Example #2
0
    def http_get(self,
                 url,
                 headers=None,
                 timeout=20,
                 cache_enabled=True,
                 allow_redirects=False):
        """ Returns a requests.models.Response object or raises exception
            on failure. Will cache requests to the same URL. """

        from totalimpact import app

        # first thing is to try to retrieve from cache
        # use the cache if the config parameter is set and the arg allows it
        use_cache = app.config["CACHE_ENABLED"] and cache_enabled

        cache_data = None
        if headers:
            cache_key = headers.copy()
        else:
            cache_key = {}
        cache_key.update({"url": url, "allow_redirects": allow_redirects})
        if use_cache:
            c = Cache(self.max_cache_duration)
            cache_data = c.get_cache_entry(cache_key)
            if cache_data:

                class CachedResponse:
                    pass

                r = CachedResponse()
                r.status_code = cache_data['status_code']

                # Return a stripped down equivalent of requests.models.Response
                # We don't store headers or other information here. If we need
                # that later, we can add it
                # use it if it was a 200, otherwise go get it again
                if (r.status_code == 200):
                    r.url = cache_data['url']
                    r.text = cache_data['text']
                    self.logger.debug(u"returning from cache: %s" % (url))
                    return r

        # ensure that a user-agent string is set
        if headers is None:
            headers = {}
        headers["User-Agent"] = app.config["USER_AGENT"]

        analytics.track("CORE",
                        "Sent GET to Provider", {
                            "provider": self.provider_name,
                            "url": url
                        },
                        context={"providers": {
                            'Mixpanel': False
                        }})

        # make the request
        try:
            from totalimpact import app
            proxies = None
            if app.config["PROXY"]:
                proxies = {
                    'http': app.config["PROXY"],
                    'https': app.config["PROXY"]
                }
            try:
                self.logger.debug(u"/biblio_print LIVE {url}".format(url=url))
            except UnicodeDecodeError:
                self.logger.debug(
                    u"%s fyi: needing force url to unicode to print" %
                    (self.provider_name))
                self.logger.debug(u"/biblio_print LIVE {url}".format(
                    url=unicode(url, "utf-8")))

            r = requests.get(url,
                             headers=headers,
                             timeout=timeout,
                             proxies=proxies,
                             allow_redirects=allow_redirects,
                             verify=False)

        except requests.exceptions.Timeout as e:
            analytics.track("CORE",
                            "Received no response from Provider (timeout)", {
                                "provider": self.provider_name,
                                "url": url
                            })

            self.logger.info(u"%s Provider timed out during GET on %s" %
                             (self.provider_name, url))
            raise ProviderTimeout("Provider timed out during GET on " + url, e)

        except requests.exceptions.RequestException as e:
            analytics.track("CORE", "Received RequestException from Provider",
                            {
                                "provider": self.provider_name,
                                "url": url
                            })

            self.logger.info(u"%s RequestException during GET on %s" %
                             (self.provider_name, url))
            raise ProviderHttpError("RequestException during GET on: " + url,
                                    e)

        if not r.encoding:
            r.encoding = "utf-8"

        # cache the response and return
        if r and use_cache:
            cache_data = {
                'text': r.text,
                'status_code': r.status_code,
                'url': r.url
            }
            c.set_cache_entry(cache_key, cache_data)
        return r
Example #3
0
    def http_get(self, url, headers=None, timeout=20, cache_enabled=True, allow_redirects=False):
        """ Returns a requests.models.Response object or raises exception
            on failure. Will cache requests to the same URL. """

        from totalimpact import app

        # first thing is to try to retrieve from cache
        # use the cache if the config parameter is set and the arg allows it
        use_cache = app.config["CACHE_ENABLED"] and cache_enabled

        cache_data = None
        if headers:
            cache_key = headers.copy()
        else:
            cache_key = {}
        cache_key.update({"url":url, "allow_redirects":allow_redirects})
        if use_cache:
            c = Cache(self.max_cache_duration)
            cache_data = c.get_cache_entry(cache_key)
            if cache_data:
                class CachedResponse:
                    pass
                r = CachedResponse()
                r.status_code = cache_data['status_code']

                # Return a stripped down equivalent of requests.models.Response
                # We don't store headers or other information here. If we need
                # that later, we can add it
                # use it if it was a 200, otherwise go get it again
                if (r.status_code == 200):
                    r.url = cache_data['url']
                    r.text = cache_data['text']
                    self.logger.debug("returning from cache: %s" %(url))
                    return r
            
        # ensure that a user-agent string is set
        if headers is None:
            headers = {}
        headers["User-Agent"] = app.config["USER_AGENT"]
        
        # make the request        
        try:
            from totalimpact import app
            proxies = None
            if app.config["PROXY"]:
                proxies = {'http' : app.config["PROXY"], 'https' : app.config["PROXY"]}
            self.logger.debug("LIVE %s" %(url))
            r = requests.get(url, headers=headers, timeout=timeout, proxies=proxies, allow_redirects=allow_redirects, verify=False)
        except requests.exceptions.Timeout as e:
            self.logger.info("%s Attempt to connect to provider timed out during GET on %s" %(self.provider_name, url))
            raise ProviderTimeout("Attempt to connect to provider timed out during GET on " + url, e)
        except requests.exceptions.RequestException as e:
            raise ProviderHttpError("RequestException during GET on: " + url, e)

        if not r.encoding:
            r.encoding = "utf-8"            
        
        # cache the response and return
        if r and use_cache:
            cache_data = {'text' : r.text, 
                'status_code' : r.status_code, 
                'url': r.url}
            c.set_cache_entry(cache_key, cache_data)
        return r