def add_image_from_recipe(body: RecipeEntry) -> RecipeEntry: image_source = body.pop("imgSrc", None) if not image_source: return body if image_source.startswith( IMAGE_PREFIX) or image_source.startswith( # Already self-hosted "data:image/"): # Data URL body["imgSrc"] = image_source return body key = str(uuid4()) try: with requests.get(prepend_scheme_if_needed(image_source, "http"), stream=True) as res: content_type = res.headers["Content-Type"] file_type, extension = content_type.rsplit("/") if res.ok and file_type == "image": image: Object = boto3.resource("s3").Object( os.environ["images_bucket_name"], f"{key}.{extension}") image.put(Body=res.content, ContentType=content_type, ACL="public-read") body["imgSrc"] = f"{IMAGE_PREFIX}{key}.{extension}" except (ConnectionError, InvalidURL): pass return body
def request_get(webapp, route, data_dict=None, scheme='', header_dict={}): """ :param webapp: web app address. If the address contains schema information, then the schema argument should be None. Port, if any, is assumed to already be appended to webapp. :param route: web application route :param data_dict: dict of url parameters if any :param scheme: web address scheme. If the address contains schema information, then this argument is ignored. :param header_dict: dict of header for request. Default: None. :return: the HTTP response object :raises WebRequestError if HTTP status is not 200 """ webapp_url = prepend_scheme_if_needed(webapp, scheme) url = urljoin(webapp_url, route) req = requests.get(url, params=data_dict, headers=header_dict) log.debug("HTTP get " + str(req.url)) if req.status_code != 200: raise WebRequestError("Status code: " + str(req.status_code) + ", msg: " + req.text) return req
def get_connection(self, url, proxies=None, verify=None, cert=None): """Returns a urllib3 connection for the given URL. This should not be called from user code, and is only exposed for use when subclassing the :class:`HTTPAdapter <requests.adapters.HTTPAdapter>`. :param url: The URL to connect to. :param proxies: (optional) A Requests-style dictionary of proxies used on this request. """ parsed_url = urlparse(url) scheme = parsed_url.scheme.lower() with self._pool_kw_lock: proxies = proxies or {} proxy = proxies.get(scheme) if proxy: proxy = prepend_scheme_if_needed(proxy, 'http') pool_manager = self.proxy_manager_for(proxy) else: pool_manager = self.poolmanager if scheme == 'https': self._update_poolmanager_ssl_kw(pool_manager, verify, cert) conn = pool_manager.connection_from_url(parsed_url.geturl()) return conn
def get_connection(self, url, proxies=None, verify=None, cert=None): """Returns a urllib3 connection for the given URL. This should not be called from user code, and is only exposed for use when subclassing the :class:`HTTPAdapter <requests.adapters.HTTPAdapter>`. :param url: The URL to connect to. :param proxies: (optional) A Requests-style dictionary of proxies used on this request. """ with self._pool_kw_lock: if url.lower().startswith('https'): self._update_poolmanager_ssl_kw(verify, cert) proxies = proxies or {} proxy = proxies.get(urlparse(url.lower()).scheme) if proxy: proxy = prepend_scheme_if_needed(proxy, 'http') proxy_manager = self.proxy_manager_for(proxy) conn = proxy_manager.connection_from_url(url) else: # Only scheme should be lower case parsed = urlparse(url) url = parsed.geturl() conn = self.poolmanager.connection_from_url(url) return conn
def _parse_proxy_url(proxy_url): proxy_url = prepend_scheme_if_needed(proxy_url, "http") parsed_proxy_url = parse_url(proxy_url) if not parsed_proxy_url.host: raise InvalidProxyURL("Please check proxy URL. It is malformed" " and could be missing the host.") return parsed_proxy_url
def send_proxy(self, request, proxy, **kwargs): '''Send an FTP request through a HTTP proxy''' # Direct the request through a HTTP adapter instead proxy_url = prepend_scheme_if_needed(proxy, 'http') s = requests.Session() adapter = s.get_adapter(proxy_url) try: return adapter.send(request, **kwargs) finally: adapter.close()
def send_proxy(self, request, proxy, **kwargs): '''Send a FTP request through a HTTP proxy''' # Direct the request through a HTTP adapter instead proxy_url = prepend_scheme_if_needed(proxy, 'http') s = requests.Session() adapter = s.get_adapter(proxy_url) try: return adapter.send(request, **kwargs) finally: adapter.close()
def send(self, stream=False, timeout=None, verify=True, cert=None, proxies=None): request = self.request connect_timeout, self.read_timeout = parse_timeout(timeout) self.stream_body = stream # set connect timeout with stack_context.ExceptionStackContext(self._handle_exception): if connect_timeout: self._timeout = self.io_loop.call_later( connect_timeout, stack_context.wrap( functools.partial(self._on_timeout, 'while connecting'))) # set proxy related info proxy = select_proxy(request.url, proxies) self.headers = request.headers.copy() if proxy: proxy = prepend_scheme_if_needed(proxy, 'http') parsed = urlparse(proxy) scheme, host, port = parsed.scheme, proxy, parsed.port port = port or (443 if scheme == 'https' else 80) self.start_line = RequestStartLine(request.method, request.url, '') self.headers.update(get_proxy_headers(proxy)) else: host, port = None, None self.start_line = request.start_line self.tcp_client.connect(request.host, request.port, af=request.af, ssl_options=self._get_ssl_options( request, verify, cert), max_buffer_size=self.max_buffer_size, source_ip=host, source_port=port, callback=self._on_connect)
def get(self): """Scrape a url for recipe info.""" def _normalize_list(list_: Union[str, List[str]]) -> List[str]: """Normalize a list or string with possible leading markers to just a list.""" return ( [re.sub(r"^\d+[.:]? ?", "", entry) for entry in list_.split("\n")] if isinstance(list_, str) else list_ ) url = request.args.get("url") if not url: return ResponseData(message="No url provided."), 400 logging.info(f"Scraping url: {url}") try: scraped = scrape_me(prepend_scheme_if_needed(url, "http"), wild_mode=True) except NoSchemaFoundInWildMode: return ResponseData(message=f"No recipe schema found at {url}"), 200 except (ConnectionError, InvalidURL): return ResponseData(message=f"{url} is not a valid url."), 404 except Exception: logging.exception(r"¯\_(ツ)_/¯") return ResponseData(message=r"¯\_(ツ)_/¯"), 500 data = { "url": url, "name": scraped.title(), "imgSrc": scraped.image(), "adaptedFrom": scraped.site_name() or scraped.host(), "yield": scraped.yields(), "cookTime": scraped.total_time() or "", "instructions": _normalize_list(scraped.instructions()), "ingredients": _normalize_list(scraped.ingredients()), } logging.info(f"Found data:\n{pformat(data)}") return ResponseData(data=data), 200
def test_prepend_scheme_if_needed(value, expected): assert prepend_scheme_if_needed(value, 'http') == expected
def to_internal_value(self, data): data['subscribers'] = [self.context['request'].user.id] url = data.get('url') if url: data['url'] = prepend_scheme_if_needed(data['url'], 'http') return super().to_internal_value(data)