def __new__(cls, val='', *, encoded=False, strict=None): if strict is not None: # pragma: no cover warnings.warn("strict parameter is ignored") if type(val) is cls: return val if type(val) is str: val = urlsplit(val) elif type(val) is SplitResult: if not encoded: raise ValueError("Cannot apply decoding to SplitResult") else: raise TypeError("Constructor parameter should be str") if not encoded: if not val[1]: # netloc netloc = '' else: host = val.hostname if host is None: raise ValueError( "Invalid URL: host is required for abolute urls.") netloc = cls._make_netloc(val.username, val.password, host, val.port, encode=True) path = cls._PATH_QUOTER(val[2]) if netloc: path = cls._normalize_path(path) query = cls._QUERY_QUOTER(val[3]) fragment = cls._FRAGMENT_QUOTER(val[4]) val = SplitResult(val[0], netloc, path, query, fragment) self = object.__new__(cls) self._val = val self._cache = {} return self
def parse_origin(url): """ Return the origin of a URL or None if empty or invalid. Per https://tools.ietf.org/html/rfc6454#section-7 : Return ``<scheme> + '://' + <host> + <port>`` for a URL. :param url: URL string :rtype: str or None """ if url is None: return None parsed = urlsplit(url) if not parsed.scheme or not parsed.netloc: return None # netloc contains both host and port origin = SplitResult(parsed.scheme, parsed.netloc, "", "", "") return origin.geturl() or None
def preprocess_url(self, referrer, url): ''' Clean and filter URLs before scraping. ''' if not url: return None fields = urlsplit(urljoin(referrer, url))._asdict() fields['path'] = re.sub(r'/$', '', fields['path']) fields['fragment'] = '' fields = SplitResult(**fields) if fields.netloc == self.domain: # Scrape pages of current domain only if fields.scheme == 'http': httpurl = cleanurl = fields.geturl() httpsurl = httpurl.replace('http:', 'https:', 1) else: httpsurl = cleanurl = fields.geturl() httpurl = httpsurl.replace('https:', 'http:', 1) if httpurl not in self.urls and httpsurl not in self.urls: return cleanurl return None
def host_port_split(host_port): """Like urllib.parse.splitport, but return port as int, and as None if not given. Also, it allows giving IPv6 addresses like a netloc: >> host_port_split('foo') ('foo', None) >> host_port_split('foo:5683') ('foo', 5683) >> host_port_split('[::1%eth0]:56830') ('::1%eth0', 56830) """ pseudoparsed = SplitResult(None, host_port, None, None, None) try: return pseudoparsed.hostname, pseudoparsed.port except ValueError: if '[' not in host_port and host_port.count(':') > 1: raise ValueError( "Could not parse network location. " "Beware that when IPv6 literals are expressed in URIs, they " "need to be put in square brackets to distinguish them from " "port numbers.") raise
def list_files(self, first_date: dt.date, last_date: dt.date, test_type: str, country: str) -> Iterable[FileEntry]: if last_date < dt.date(2020, 10, 20): return paginator = self._s3_client.get_paginator('list_objects_v2') pages = paginator.paginate( Bucket=_2020OoniClient._BUCKET, Delimiter='/', Prefix=_2020OoniClient._PREFIX, StartAfter= f'{_2020OoniClient._PREFIX}{first_date.strftime("%Y%m%d")}') for page in pages: self.num_list_requests += 1 for entry in page.get('CommonPrefixes', []): date_dir = entry['Prefix'] date_str = posixpath.basename(posixpath.dirname(date_dir)) date = dt.datetime.strptime(date_str, "%Y%m%d").date() if date > last_date: return for hour in range(24): prefix = f'''{date_dir}{hour:02}/{country}/''' if test_type: prefix += f'{test_type}/' for page in paginator.paginate(Bucket=page['Name'], Prefix=prefix): self.num_list_requests += 1 for entry in page.get('Contents', []): key = entry['Key'] file_path = PosixPath(key) if file_path.name.endswith('.jsonl.gz'): file_test_type = file_path.parent.name url = SplitResult('s3', page['Name'], key, None, None) yield FileEntry( lambda: self._get_measurements(url), file_test_type, country, date, url, entry['Size'])
def preprocess_url(self, referrer, url): ''' Clean and filter URLs before scraping. ''' if not url: return None fields = urlsplit(urljoin(referrer, url))._asdict() # convert to absolute URLs and split fields['path'] = re.sub(r'/$', '', fields['path']) # remove trailing / fields['fragment'] = '' # remove targets within a page fields = SplitResult(**fields) if fields.netloc == self.domain: # Scrape pages of current domain only cleanurl = '' if fields.scheme == 'http': cleanurl = fields.geturl() cleanurl = cleanurl.replace('http:', 'https:', 1) else: cleanurl = fields.geturl() if cleanurl not in self.urls and cleanurl not in self.urls: # Return URL only if it's not already in list return cleanurl return None
async def get_article_details(entry, fetch=True): article_link = entry.get('link') article_title = html.unescape(entry.get('title', '')) if fetch and conf.CRAWLER_RESOLV and article_link or not article_title: try: # resolves URL behind proxies (like feedproxy.google.com) response = await jarr_get(article_link, timeout=5) except MissingSchema: split, failed = urlsplit(article_link), False for scheme in 'https', 'http': new_link = urlunsplit(SplitResult(scheme, *split[1:])) try: response = await jarr_get(new_link, timeout=5) except Exception as error: failed = True continue failed = False article_link = new_link break if failed: return article_link, article_title or 'No title' except Exception as error: logger.info( "Unable to get the real URL of %s. Won't fix " "link or title. Error: %s", article_link, error) return article_link, article_title or 'No title' article_link = response.url if not article_title: bs_parsed = BeautifulSoup(response.content, 'html.parser', parse_only=SoupStrainer('head')) try: article_title = bs_parsed.find_all('title')[0].text except IndexError: # no title pass return article_link, article_title or 'No title'
def build(cls, *, scheme='', user='', password='', host='', port=None, path='', query=None, query_string='', fragment=''): """Creates and returns a new URL""" if host and not scheme: raise ValueError( 'Can\'t build URL with "host" but without "scheme".') if not host and scheme: raise ValueError( 'Can\'t build URL with "scheme" but without "host".') if query and query_string: raise ValueError( "Only one of \"query\" or \"query_string\" should be passed") netloc = cls._make_netloc(user, password, host, port) path = _quote(path, safe='@:', protected='/') if netloc: path = _normalize_path(path) url = cls(SplitResult(scheme, netloc, path, _quote(query_string), fragment), encoded=True) if query: return url.with_query(query) else: return url
def __init__(self, contentURL, fileName): ''' Initialize the content. @param contentURL: string The URL of the icon to be downloaded. @param fileName: string The name of file under that the icon should be saved. ''' Content.__init__(self, fileName, 'image', 'binary', 0) (scheme, netloc, path, query, fragment) = urlsplit(contentURL if not isinstance( contentURL, Request) else contentURL.full_url) if not scheme: scheme = 'http' parsed = SplitResult(scheme, netloc, quote(path), quote(query), fragment) if isinstance(contentURL, Request): contentURL.full_url = urlunsplit(parsed) else: contentURL = urlunsplit(parsed) self._url = contentURL self._response = None
def _urlsplit(url, scheme='', allow_fragments=True): """Templating safe version of urllib.parse.urlsplit Ignores '?' and '#' inside {{}} templating tags. Caching disabled. """ url, scheme, _coerce_result = _coerce_args(url, scheme) allow_fragments = bool(allow_fragments) netloc = query = fragment = '' i = url.find(':') if i > 0: for c in url[:i]: if c not in scheme_chars: # pragma: no cover break # https://github.com/nedbat/coveragepy/issues/198 else: scheme, url = url[:i].lower(), url[i + 1:] if url[:2] == '//': netloc, url = _splitnetloc(url, 2) if (('[' in netloc and ']' not in netloc) or # noqa: W504, W503 (']' in netloc and '[' not in netloc)): raise ValueError("Invalid IPv6 URL") if allow_fragments and '#' in url: result = re.split(r'#(?![^{{}}]*}})', url, maxsplit=1) url = result[0] if len(result) > 1: fragment = result[1] if '?' in url: result = re.split(r'\?(?![^{{}}]*}})', url, maxsplit=1) url = result[0] if len(result) > 1: query = result[1] v = SplitResult(scheme, netloc, url, query, fragment) return _coerce_result(v)
def build(cls, *, scheme="", authority="", user=None, password=None, host="", port=None, path="", query=None, query_string="", fragment="", encoded=False): """Creates and returns a new URL""" if authority and (user or password or host or port): raise ValueError( 'Can\'t mix "authority" with "user", "password", "host" or "port".' ) if port and not host: raise ValueError( 'Can\'t build URL with "port" but without "host".') if query and query_string: raise ValueError( 'Only one of "query" or "query_string" should be passed') if (scheme is None or authority is None or path is None or query_string is None or fragment is None): raise TypeError( 'NoneType is illegal for "scheme", "authority", "path", ' '"query_string", and "fragment" args, use empty string instead.' ) if authority: if encoded: netloc = authority else: tmp = SplitResult("", authority, "", "", "") netloc = cls._make_netloc(tmp.username, tmp.password, tmp.hostname, tmp.port, encode=True) elif not user and not password and not host and not port: netloc = "" else: netloc = cls._make_netloc(user, password, host, port, encode=not encoded) if not encoded: path = cls._PATH_QUOTER(path) if netloc: path = cls._normalize_path(path) cls._validate_authority_uri_abs_path(host=host, path=path) query_string = cls._QUERY_QUOTER(query_string) fragment = cls._FRAGMENT_QUOTER(fragment) url = cls(SplitResult(scheme, netloc, path, query_string, fragment), encoded=True) if query: return url.with_query(query) else: return url
def test_split_result_non_decoded(): with pytest.raises(ValueError): URL(SplitResult('http', 'example.com', 'path', 'qs', 'frag'))
def handle_distrib(self, url): """React to a file dispatch message.""" url = urlsplit(url) dummy, filename = os.path.split(url.path) logger.debug("filename = %s", filename) # TODO: Should not make any assumptions on filename formats, should # load a description of it from a config file instead. if filename.endswith(".hmf"): risestr, satellite = filename[:-4].split("_", 1) risetime = datetime.strptime(risestr, "%Y%m%d%H%M%S") pname = pass_name(risetime, satellite) satellite = satellite.replace("_", "-") if satellite in self._excluded_platforms: return None swath = self._received_passes.get(pname, {}).copy() swath.pop('satellite', None) swath["platform_name"] = satellite swath["start_time"] = risetime swath["type"] = "binary" swath["format"] = "HRPT" if satellite == "NOAA-15": swath["sensor"] = ("avhrr/3", "amsu-a", "amsu-b", "hirs/3") elif satellite in ["NOAA-18", "NOAA-19"]: swath["sensor"] = ("avhrr/3", "mhs", "amsu-a", "hirs/4") swath["data_processing_level"] = "0" elif filename.startswith("P042") or filename.startswith("P154"): pds = {} pds["format"] = filename[0] pds["apid1"] = filename[1:8] pds["apid2"] = filename[8:15] pds["apid3"] = filename[15:22] pds["time"] = datetime.strptime(filename[22:33], "%y%j%H%M%S") pds["nid"] = filename[33] pds["ufn"] = filename[34:36] pds["extension"] = filename[36:40] risetime = pds["time"] if pds["apid1"][:3] == "042": satellite = "EOS-Terra" pname = pass_name(risetime, 'TERRA') elif pds["apid1"][:3] == "154": satellite = "EOS-Aqua" pname = pass_name(risetime, 'AQUA') else: raise ValueError( "Unrecognized satellite ID: " + pds["apid1"][:3]) if not satellite or satellite in self._excluded_platforms: logger.debug("Platform name %s is excluded...", str(satellite)) return None swath = self._received_passes.get(pname, {}).copy() swath.pop('satellite', None) swath['platform_name'] = satellite swath['start_time'] = risetime instruments = {"0064": "modis", "0141": "ceres+y", "0157": "ceres-y", "0261": "amsu-a1", "0262": "amsu-a1", "0290": "amsu-a2", "0342": "hsb", "0402": "amsr-e", "0404": "airs", "0405": "airs", "0406": "airs", "0407": "airs", "0414": "airs", "0415": "airs", "0419": "airs", "0957": "gbad", } swath["sensor"] = instruments.get(pds["apid1"][3:], pds["apid1"][3:]) swath["format"] = "PDS" swath["type"] = "binary" swath["data_processing_level"] = "0" # NPP/JPSS RDRs elif filename.startswith("R") and filename.endswith(".h5"): # Occassionaly RT-STPS produce files with a nonstandard file # naming, lacking the 'RNSCA' field. We will try to deal with this # below (Adam - 2013-06-04): mda = {} mda["format"] = filename[0] file_ok = False for prefix in JPSS_INSTRUMENTS_FROM_FILENAMES: if filename.startswith(prefix): mda["sensor"] = JPSS_INSTRUMENTS_FROM_FILENAMES[prefix] start_time_items = filename.strip(prefix).split('_')[1:3] end_time_item = filename.strip(prefix).split('_')[3] satellite = JPSS_PLATFORM_NAME.get(filename.strip(prefix).split('_')[0], None) orbit = filename.strip(prefix).split('_')[4].strip('b') file_ok = True break if not file_ok: logger.warning("Seems to be a NPP/JPSS RDR " "file but name is not standard!") logger.warning("filename = %s", filename) return None # satellite = "Suomi-NPP, NOAA-20, NOAA-21,..." if not satellite or satellite in self._excluded_platforms: logger.debug("Platform name %s is excluded...", str(satellite)) return None mda["start_time"] = \ datetime.strptime(start_time_items[0] + start_time_items[1], "d%Y%m%dt%H%M%S%f") end_time = \ datetime.strptime(start_time_items[0] + end_time_item, "d%Y%m%de%H%M%S%f") if mda["start_time"] > end_time: end_time += timedelta(days=1) mda["orbit"] = orbit # FIXME: swath start and end time is granule dependent. # Get the end time as well! - Adam 2013-06-03: start_time = mda["start_time"] pname = pass_name(start_time, SCISYS_NAMES.get(satellite)) swath = self._received_passes.get(pname, {}).copy() swath.pop("satellite", None) swath["platform_name"] = satellite swath["start_time"] = start_time swath['end_time'] = end_time swath["sensor"] = mda["sensor"] swath["format"] = "RDR" swath["type"] = "HDF5" swath["data_processing_level"] = "0" # metop elif filename[4:12] == "_HRP_00_": # "AVHR": "avhrr", instruments = {"ASCA": "ascat", "AMSA": "amsu-a", "ATOV": "atovs", "AVHR": "avhrr/3", "GOME": "gome", "GRAS": "gras", "HIRS": "hirs/4", "IASI": "iasi", "MHSx": "mhs", "SEMx": "sem", "ADCS": "adcs", "SBUV": "sbuv", "HKTM": "vcdu34"} satellites = {"M02": "Metop-A", "M01": "Metop-B", "M03": "Metop-C"} satellite = satellites[filename[12:15]] risetime = datetime.strptime(filename[16:31], "%Y%m%d%H%M%SZ") falltime = datetime.strptime(filename[32:47], "%Y%m%d%H%M%SZ") pname = pass_name(risetime, satellite.upper()) logger.debug("pname= % s", str(pname)) swath = self._received_passes.get(pname, {}).copy() swath.pop('satellite', None) swath["start_time"] = risetime swath["end_time"] = falltime swath["platform_name"] = satellite swath["sensor"] = instruments[filename[:4]] swath["format"] = "EPS" swath["type"] = "binary" swath["data_processing_level"] = "0" else: return None if url.scheme in ["", "file"]: scheme = "ssh" netloc = self._emitter uri = urlunsplit(SplitResult(scheme, netloc, url.path, url.query, url.fragment)) elif url.scheme == "ftp": scheme = "ssh" netloc = url.hostname uri = urlunsplit(SplitResult(scheme, netloc, url.path, url.query, url.fragment)) else: logger.debug("url.scheme not expected: %s", url.scheme) swath["uid"] = os.path.split(url.path)[1] swath["uri"] = uri swath['variant'] = 'DR' return swath
def spliturl(url): o = urlsplit(url) apiurl = SplitResult(o.scheme, o.netloc, '', '', '').geturl() path = SplitResult('', '', o.path, o.query, '').geturl() return (apiurl, path)
def addNewParaToLink(link: str, key: str, value: str) -> str: r = urlsplit(link) l = parse_qsl(r.query) # noqa: E741 l.append((str(key), str(value))) r2 = SplitResult(r.scheme, r.netloc, r.path, urlencode(l), r.fragment) return urlunsplit(r2)
def test_split_result_non_decoded(): with pytest.raises(ValueError): URL(SplitResult("http", "example.com", "path", "qs", "frag"))
def report_error( request: HttpRequest, user_profile: UserProfile, message: str = REQ(), stacktrace: str = REQ(), ui_message: bool = REQ(json_validator=check_bool), user_agent: str = REQ(), href: str = REQ(), log: str = REQ(), more_info: Mapping[str, Any] = REQ(json_validator=check_dict([]), default={}), ) -> HttpResponse: """Accepts an error report and stores in a queue for processing. The actual error reports are later handled by do_report_error""" if not settings.BROWSER_ERROR_REPORTING: return json_success(request) more_info = dict(more_info) js_source_map = get_js_source_map() if js_source_map: stacktrace = js_source_map.annotate_stacktrace(stacktrace) try: version: Optional[str] = subprocess.check_output( ["git", "show", "-s", "--oneline"], text=True, ) except (FileNotFoundError, subprocess.CalledProcessError): version = None # Get the IP address of the request remote_ip = request.META["REMOTE_ADDR"] # For the privacy of our users, we remove any actual text content # in draft_content (from drafts rendering exceptions). See the # comment on privacy_clean_markdown for more details. if more_info.get("draft_content"): more_info["draft_content"] = privacy_clean_markdown( more_info["draft_content"]) if user_profile.is_authenticated: email = user_profile.delivery_email full_name = user_profile.full_name else: email = "*****@*****.**" full_name = "Anonymous User" queue_json_publish( "error_reports", dict( type="browser", report=dict( host=SplitResult("", request.get_host(), "", "", "").hostname, ip_address=remote_ip, user_email=email, user_full_name=full_name, user_visible=ui_message, server_path=settings.DEPLOY_ROOT, version=version, user_agent=user_agent, href=href, message=message, stacktrace=stacktrace, log=log, more_info=more_info, ), ), ) return json_success(request)
def __init__(self, url: typing.Optional[str] = None) -> None: if url is None: self.splitted = SplitResult("", "", "", "", "") else: self.splitted = urlsplit(url) self.query = parse_qsl(self.splitted.query)
def deport(netloc: str) -> str: """Remove the port from a hostname:port string. Brackets on a literal IPv6 address are included.""" r = SplitResult("", netloc, "", "", "") assert r.hostname is not None return "[" + r.hostname + "]" if ":" in r.hostname else r.hostname
def handle(self, *args: Any, **options: Any) -> None: interactive_debug_listen() addrport = options["addrport"] assert isinstance(addrport, str) from tornado import httpserver if addrport.isdigit(): addr, port = "", int(addrport) else: r = SplitResult("", addrport, "", "", "") if r.port is None: raise CommandError( f"{addrport!r} does not have a valid port number.") addr, port = r.hostname or "", r.port if not addr: addr = "127.0.0.1" if settings.DEBUG: logging.basicConfig( level=logging.INFO, format="%(asctime)s %(levelname)-8s %(message)s") async def inner_run() -> None: from django.utils import translation AsyncIOMainLoop().install() loop = asyncio.get_running_loop() stop_fut = loop.create_future() def stop() -> None: if not stop_fut.done(): stop_fut.set_result(None) def add_signal_handlers() -> None: loop.add_signal_handler(signal.SIGINT, stop), loop.add_signal_handler(signal.SIGTERM, stop), def remove_signal_handlers() -> None: loop.remove_signal_handler(signal.SIGINT), loop.remove_signal_handler(signal.SIGTERM), async with AsyncExitStack() as stack: stack.push_async_callback( sync_to_async(remove_signal_handlers, thread_sensitive=True)) await sync_to_async(add_signal_handlers, thread_sensitive=True)() translation.activate(settings.LANGUAGE_CODE) # We pass display_num_errors=False, since Django will # likely display similar output anyway. self.check(display_num_errors=False) print(f"Tornado server (re)started on port {port}") if settings.USING_RABBITMQ: queue_client = TornadoQueueClient() set_queue_client(queue_client) # Process notifications received via RabbitMQ queue_name = notify_tornado_queue_name(port) stack.callback(queue_client.close) queue_client.start_json_consumer( queue_name, get_wrapped_process_notification(queue_name)) # Application is an instance of Django's standard wsgi handler. application = create_tornado_application() # start tornado web server in single-threaded mode http_server = httpserver.HTTPServer(application, xheaders=True) stack.push_async_callback(http_server.close_all_connections) stack.callback(http_server.stop) http_server.listen(port, address=addr) from zerver.tornado.ioloop_logging import logging_data logging_data["port"] = str(port) await setup_event_queue(http_server, port) stack.callback(dump_event_queues, port) add_client_gc_hook(missedmessage_hook) if settings.USING_RABBITMQ: setup_tornado_rabbitmq(queue_client) if hasattr(__main__, "add_reload_hook"): autoreload.start() await stop_fut # Monkey patch tornado.autoreload to prevent it from continuing # to watch for changes after catching our SystemExit. Otherwise # the user needs to press Ctrl+C twice. __main__.wait = lambda: None async_to_sync(inner_run, force_new_loop=True)()
def get_base_url(url): split_url_dict = dict(urlsplit(url)._asdict()) split_url_dict['path'] = '/' split_url_dict['query'] = '' split_url_dict['fragment'] = '' return urlunsplit(SplitResult(**split_url_dict))
def changeFileNameForLink(link: str, name: str) -> str: r = urlsplit(link) t = splitfn(r.path) r2 = SplitResult(r.scheme, r.netloc, f"{t[0]}/{name}", r.query, r.fragment) return urlunsplit(r2)
function_name, arguments) print("assert", call, "==", call_value(result)) except Exception: continue from urllib.parse import SplitResult, ParseResult, urlparse, urlsplit if __name__ == "__main__": assert urlparse(url='http://www.example.com', scheme='', allow_fragments=True) == ParseResult( scheme='http', netloc='www.example.com', path='', params='', query='', fragment='') assert urlsplit(url='http://www.example.com', scheme='', allow_fragments=True) == SplitResult( scheme='http', netloc='www.example.com', path='', query='', fragment='') # ### Exercise 2: Abstracting Arguments if __name__ == "__main__": print('\n### Exercise 2: Abstracting Arguments')
def handle(self, addrport: str, **options: bool) -> None: interactive_debug_listen() import django from tornado import httpserver if addrport.isdigit(): addr, port = "", int(addrport) else: r = SplitResult("", addrport, "", "", "") if r.port is None: raise CommandError( f"{addrport!r} does not have a valid port number.") addr, port = r.hostname or "", r.port if not addr: addr = '127.0.0.1' xheaders = options.get('xheaders', True) no_keep_alive = options.get('no_keep_alive', False) if settings.DEBUG: logging.basicConfig( level=logging.INFO, format='%(asctime)s %(levelname)-8s %(message)s') def inner_run() -> None: from django.conf import settings from django.utils import translation translation.activate(settings.LANGUAGE_CODE) # We pass display_num_errors=False, since Django will # likely display similar output anyway. self.check(display_num_errors=False) print(f"Tornado server is running at http://{addr}:{port}/") if settings.USING_RABBITMQ: queue_client = get_queue_client() # Process notifications received via RabbitMQ queue_name = notify_tornado_queue_name(port) queue_client.register_json_consumer( queue_name, get_wrapped_process_notification(queue_name)) try: # Application is an instance of Django's standard wsgi handler. application = create_tornado_application(port) if settings.AUTORELOAD: zulip_autoreload_start() # start tornado web server in single-threaded mode http_server = httpserver.HTTPServer( application, xheaders=xheaders, no_keep_alive=no_keep_alive) http_server.listen(port, address=addr) from zerver.tornado.ioloop_logging import logging_data logging_data['port'] = str(port) setup_event_queue(port) add_client_gc_hook(missedmessage_hook) setup_tornado_rabbitmq() instance = ioloop.IOLoop.instance() if django.conf.settings.DEBUG: instance.set_blocking_log_threshold(5) instance.handle_callback_exception = handle_callback_exception instance.start() except KeyboardInterrupt: sys.exit(0) inner_run()
def __new__(cls, value='', encoded=False): """ Creates a new ``URL`` instance from the given `value` Parameters ---------- value : ``URL``, `str`, `urllib.parse.SplitResult`, Optional The value to create ``URL`` from. Defaults to empty string. encoded : `bool`, Optional Whether the given `value` is already encoded. Defaults to `False`. Raises ------- ValueError - If `value` is given as `urllib.parse.SplitResult` instance, but `encoded` was given as `False`. - If `value` is not `encoded` and the URL is absolute, but `host` is not given. TypeError If `value` was not given neither as ``URL``, `str` nor `urllib.parse.SplitResult` instance. """ if isinstance(value, cls): return value if isinstance(value, str): value = url_split(value) elif isinstance(value, SplitResult): if not encoded: raise ValueError(f'Cannot apply decoding to `{SplitResult.__name__}`.') else: raise TypeError(f'`value` should have be given as `{cls.__name__}`, `str` or `{SplitResult.__name__}` ' f'instance, got {value.__class__.__name__}.') if not encoded: if value.netloc: netloc = value.hostname if netloc is None: raise ValueError('Invalid URL: host is required for absolute urls.') try: netloc.encode('ascii') except UnicodeEncodeError: netloc = netloc.encode('idna').decode('ascii') else: try: ip = ip_address(netloc) except: pass else: if ip.version == 6: netloc = f'[{netloc}]' value_port = value.port if value_port: netloc = f'{netloc}:{value_port}' value_username = value.username if value_username: user = quote(value_username) value_password = value.password if value_password: user = f'{user}:{quote(value_password)}' netloc = f'{user}@{netloc}' else: netloc = '' value = SplitResult(value.scheme, netloc, quote(value.path, safe='@:', protected='/'), quote(value.query, safe='=+&?/:@', protected='=+&', query_string=True), quote(value.fragment, safe='?/:@'), ) self = object.__new__(cls) self._value = value self._cache = {} return self
def test_to_bytestring(): assert util.to_bytestring('test_str', 'ascii') == b'test_str' assert util.to_bytestring('test_str®') == b'test_str\xc2\xae' assert util.to_bytestring(b'byte_test_str') == b'byte_test_str' with pytest.raises(TypeError) as err: util.to_bytestring(100) msg = '100 is not a string' assert msg in str(err) @pytest.mark.parametrize('test_input, expected', [ ('https://example.org/a/b?c=1#d', SplitResult(scheme='https', netloc='example.org', path='/a/b', query='c=1', fragment='d')), ('a/b?c=1#d', SplitResult(scheme='', netloc='', path='a/b', query='c=1', fragment='d')), ('/a/b?c=1#d', SplitResult(scheme='', netloc='', path='/a/b', query='c=1', fragment='d')), ('//a/b?c=1#d', SplitResult(scheme='', netloc='', path='//a/b', query='c=1', fragment='d')), ('///a/b?c=1#d', SplitResult( scheme='', netloc='', path='///a/b', query='c=1', fragment='d')), ]) def test_split_request_uri(test_input, expected):
def __init__(self, val='', *, encoded=False, strict=False): if isinstance(val, URL): self._val = val._val self._cache = val._cache self._strict = val._strict return if isinstance(val, str): val = urlsplit(val) elif isinstance(val, SplitResult): if not encoded: raise ValueError("Cannot apply decoding to SplitResult") else: raise TypeError("Constructor parameter should be str") self._strict = strict if not encoded: if not val[1]: # netloc netloc = '' else: netloc = val.hostname if netloc is None: raise ValueError( "Invalid URL: host is required for abolute urls.") try: netloc.encode('ascii') except UnicodeEncodeError: netloc = netloc.encode('idna').decode('ascii') else: try: ip = ip_address(netloc) except: pass else: if ip.version == 6: netloc = '[' + netloc + ']' if val.port: netloc += ':{}'.format(val.port) if val.username: user = _quote(val.username) else: user = '' if val.password: user += ':' + _quote(val.password) if user: netloc = user + '@' + netloc path = _quote(val[2], safe='+@:', protected='/+', strict=strict) if netloc: path = _normalize_path(path) query = _quote(val[3], safe='=+&?/:@', protected=PROTECT_CHARS, qs=True, strict=strict) fragment = _quote(val[4], safe='?/:@', strict=strict) val = SplitResult(val[0], netloc, path, query, fragment) self._val = val self._cache = {}
def __init__(self): self.splitted = SplitResult("", "", "", "", "") self.query = list()
def urlsplit(url): scheme, netloc, path, query, fragment = _urlsplit(url) if "#" in path: path, fragment = path.split("#", 1) return SplitResult(scheme, netloc, path, query, fragment)
def run(self): """ Run node, spawning entity and doing other actions as configured in program arguments. Returns exit code, 1 for failure, 0 for success """ # Wait for entity to exist if wait flag is enabled if self.args.wait: self.entity_exists = False def entity_cb(entity): self.entity_exists = self.args.wait in entity.name self.subscription = self.create_subscription( ModelStates, '%s/model_states' % self.args.gazebo_namespace, entity_cb, 10) self.get_logger().info( 'Waiting for entity {} before proceeding.'.format( self.args.wait)) while rclpy.ok() and not self.entity_exists: rclpy.spin_once(self) pass # Load entity XML from file if self.args.file: self.get_logger().info('Loading entity XML from file %s' % self.args.file) if not os.path.exists(self.args.file): self.get_logger().error( 'Error: specified file %s does not exist', self.args.file) return 1 if not os.path.isfile(self.args.file): self.get_logger().error( 'Error: specified file %s is not a file', self.args.file) return 1 # load file try: f = open(self.args.file, 'r') entity_xml = f.read() except IOError as e: self.get_logger().error('Error reading file {}: {}'.format( self.args.file, e)) return 1 if entity_xml == '': self.get_logger().error('Error: file %s is empty', self.args.file) return 1 # Load entity XML published on topic specified elif self.args.topic: self.get_logger().info('Loading entity published on topic %s' % self.args.topic) entity_xml = '' def entity_xml_cb(msg): nonlocal entity_xml entity_xml = msg.data self.subscription = self.create_subscription( String, self.args.topic, entity_xml_cb, QoSDurabilityPolicy.RMW_QOS_POLICY_DURABILITY_TRANSIENT_LOCAL) while rclpy.ok() and entity_xml == '': self.get_logger().info('Waiting for entity xml on %s' % self.args.topic) rclpy.spin_once(self) pass # Generate entity XML by putting requested entity name into request template elif self.args.database: self.get_logger().info( 'Loading entity XML from Gazebo Model Database') entity_xml = self.MODEL_DATABASE_TEMPLATE.format( self.args.database) elif self.args.stdin: self.get_logger().info('Loading entity XML from stdin') entity_xml = sys.stdin.read() if entity_xml == '': self.get_logger().error('Error: stdin buffer was empty') return 1 # Parse xml to detect invalid xml before sending to gazebo try: xml_parsed = ElementTree.fromstring(entity_xml) except ElementTree.ParseError as e: self.get_logger().error('Invalid XML: {}'.format(e)) return 1 # Replace package:// with model:// for mesh tags if flag is set if self.args.package_to_model: for element in xml_parsed.iterfind('.//mesh'): filename_tag = element.get('filename') if filename_tag is None: continue url = urlsplit(filename_tag) if url.scheme == 'package': url = SplitResult('model', *url[1:]) element.set('filename', url.geturl()) # Encode xml object back into string for service call entity_xml = ElementTree.tostring(xml_parsed) # Form requested Pose from arguments initial_pose = Pose() initial_pose.position.x = float(self.args.x) initial_pose.position.y = float(self.args.y) initial_pose.position.z = float(self.args.z) q = quaternion_from_euler(self.args.R, self.args.P, self.args.Y) initial_pose.orientation.w = q[0] initial_pose.orientation.x = q[1] initial_pose.orientation.y = q[2] initial_pose.orientation.z = q[3] success = self._spawn_entity(entity_xml, initial_pose) if not success: self.get_logger().error('Spawn service failed. Exiting.') return 1 # TODO(shivesh): Wait for /set_model_configuration # (https://github.com/ros-simulation/gazebo_ros_pkgs/issues/779) # Apply joint positions if any specified # if len(self.args.joints) != 0: # joint_names = [joint[0] for joint in self.args.joints] # joint_positions = [joint[1] for joint in self.args.joints] # success = _set_model_configuration(joint_names, joint_positions) # if not success: # self.get_logger().error('SetModelConfiguration service failed. Exiting.') # return 1 # Unpause physics if user requested if self.args.unpause: client = self.create_client( Empty, '%s/unpause_physics' % self.args.gazebo_namespace) if client.wait_for_service(timeout_sec=self.args.timeout): self.get_logger().info('Calling service %s/unpause_physics' % self.args.gazebo_namespace) client.call_async(Empty.Request()) else: self.get_logger().error( 'Service %s/unpause_physics unavailable. \ Was Gazebo started with GazeboRosInit?' ) # If bond enabled, setup shutdown callback and wait for shutdown if self.args.bond: self.get_logger().info( 'Waiting for shutdown to delete entity [{}]'.format( self.args.entity)) try: rclpy.spin(self) except KeyboardInterrupt: self.get_logger().info('Ctrl-C detected') self._delete_entity() return 0