def get_seeded_content(db: DatabaseHandler, topic_fetch_url: dict) -> typing.Optional[str]: """Return content for this url and topic in topic_seed_urls. Arguments: db - db handle topic_fetch_url - topic_fetch_url dict from db Returns: dummy response object """ r = db.query( "select content from topic_seed_urls where topics_id = %(a)s and url = %(b)s and content is not null", { 'a': topic_fetch_url['topics_id'], 'b': topic_fetch_url['url'] }).flat() if len(r) == 0: return None response = Response(code=200, message='OK', headers={}, data=r[0]) response.set_request(Request('GET', topic_fetch_url['url'])) return response
def _make_dummy_bypassed_response(url: str) -> Response: """Given a url, make and return a response object with that url and empty content.""" response = Response(code=200, message='OK', headers={}, data='') response.set_request(Request('GET', url)) return response
def request(self, request: Request) -> Response: """Execute a request, return a response. All other helpers are supposed to use request() internally as it implements max. size, callbacks, blacklisted URLs etc.""" if request is None: raise McRequestException("Request is None.") request = self.__blacklist_request_if_needed(request=request) self.__log_request(request=request) try: requests_prepared_request = self.__prepare_request(request) except Exception as ex: raise McRequestException("Unable to prepare request %s: %s" % (str(request), str(ex),)) try: user_agent_response = self.__execute_request(requests_prepared_request) except Exception as ex: raise McRequestException("Unable to execute request %s: %s" % (str(requests_prepared_request), str(ex),)) if user_agent_response.requests_response is None: raise McRequestException("Response from 'requests' is None.") response = Response( requests_response=user_agent_response.requests_response, max_size=self.max_size(), error_is_client_side=user_agent_response.error_is_client_side, ) # Build the previous request / response chain from the redirects current_response = response for previous_rq_response in reversed(user_agent_response.requests_response.history): previous_rq_request = previous_rq_response.request previous_response_request = Request.from_requests_prepared_request( requests_prepared_request=previous_rq_request ) # Sometimes reading the (chunked?) previous response's data fails with: # # AttributeError: 'NoneType' object has no attribute 'readline' # # Previous response's data is not that important, so fail rather silently. try: previous_rq_response.text except Exception as ex: log.warning("Reading previous response's data failed: %s" % str(ex)) previous_rq_response.raw_data = io.StringIO('') previous_response = Response(requests_response=previous_rq_response, max_size=self.max_size()) previous_response.set_request(request=previous_response_request) current_response.set_previous(previous=previous_response) current_response = previous_response # Redirects might have happened, so we have to recreate the request object from the latest page that was # redirected to response_request = Request.from_requests_prepared_request( requests_prepared_request=user_agent_response.requests_response.request ) response.set_request(response_request) return response
def request(self, request: Request) -> Response: """Execute a request, return a response. All other helpers are supposed to use request() internally as it implements max. size, callbacks, blacklisted URLs etc.""" if request is None: raise McRequestException("Request is None.") request = self.__blacklist_request_if_needed(request=request) self.__log_request(request=request) try: requests_prepared_request = self.__prepare_request(request) except Exception as ex: raise McRequestException("Unable to prepare request %s: %s" % ( str(request), str(ex), )) try: user_agent_response = self.__execute_request( requests_prepared_request) except Exception as ex: raise ex raise McRequestException("Unable to execute request %s: %s" % ( str(requests_prepared_request), str(ex), )) if user_agent_response.requests_response is None: raise McRequestException("Response from 'requests' is None.") response = Response( requests_response=user_agent_response.requests_response, max_size=self.max_size(), error_is_client_side=user_agent_response.error_is_client_side, ) # Build the previous request / response chain from the redirects current_response = response for previous_rq_response in reversed( user_agent_response.requests_response.history): previous_rq_request = previous_rq_response.request previous_response_request = Request.from_requests_prepared_request( requests_prepared_request=previous_rq_request) # Sometimes reading the (chunked?) previous response's data fails with: # # AttributeError: 'NoneType' object has no attribute 'readline' # # Previous response's data is not that important, so fail rather silently. try: previous_rq_response.text except Exception as ex: log.warning("Reading previous response's data failed: %s" % str(ex)) previous_rq_response.raw_data = io.StringIO('') previous_response = Response( requests_response=previous_rq_response, max_size=self.max_size()) previous_response.set_request(request=previous_response_request) current_response.set_previous(previous=previous_response) current_response = previous_response # Redirects might have happened, so we have to recreate the request object from the latest page that was # redirected to response_request = Request.from_requests_prepared_request( requests_prepared_request=user_agent_response.requests_response. request) response.set_request(response_request) return response