Ejemplo n.º 1
0
    def __new__(cls, val='', *, encoded=False, strict=None):
        if strict is not None:  # pragma: no cover
            warnings.warn("strict parameter is ignored")
        if type(val) is cls:
            return val
        if type(val) is str:
            val = urlsplit(val)
        elif type(val) is SplitResult:
            if not encoded:
                raise ValueError("Cannot apply decoding to SplitResult")
        else:
            raise TypeError("Constructor parameter should be str")

        if not encoded:
            if not val[1]:  # netloc
                netloc = ''
            else:
                host = val.hostname
                if host is None:
                    raise ValueError(
                        "Invalid URL: host is required for abolute urls.")
                netloc = cls._make_netloc(val.username,
                                          val.password,
                                          host,
                                          val.port,
                                          encode=True)
            path = cls._PATH_QUOTER(val[2])
            if netloc:
                path = cls._normalize_path(path)

            query = cls._QUERY_QUOTER(val[3])
            fragment = cls._FRAGMENT_QUOTER(val[4])
            val = SplitResult(val[0], netloc, path, query, fragment)

        self = object.__new__(cls)
        self._val = val
        self._cache = {}
        return self
Ejemplo n.º 2
0
def parse_origin(url):
    """
    Return the origin of a URL or None if empty or invalid.

    Per https://tools.ietf.org/html/rfc6454#section-7 :
    Return ``<scheme> + '://' + <host> + <port>``
    for a URL.

    :param url: URL string
    :rtype: str or None
    """

    if url is None:
        return None

    parsed = urlsplit(url)

    if not parsed.scheme or not parsed.netloc:
        return None

    # netloc contains both host and port
    origin = SplitResult(parsed.scheme, parsed.netloc, "", "", "")
    return origin.geturl() or None
Ejemplo n.º 3
0
    def preprocess_url(self, referrer, url):
        ''' 
        Clean and filter URLs before scraping.
        '''
        if not url:
            return None

        fields = urlsplit(urljoin(referrer, url))._asdict()
        fields['path'] = re.sub(r'/$', '', fields['path'])
        fields['fragment'] = ''
        fields = SplitResult(**fields)
        if fields.netloc == self.domain:
            # Scrape pages of current domain only
            if fields.scheme == 'http':
                httpurl = cleanurl = fields.geturl()
                httpsurl = httpurl.replace('http:', 'https:', 1)
            else:
                httpsurl = cleanurl = fields.geturl()
                httpurl = httpsurl.replace('https:', 'http:', 1)
            if httpurl not in self.urls and httpsurl not in self.urls:
                return cleanurl

        return None
Ejemplo n.º 4
0
def host_port_split(host_port):
    """Like urllib.parse.splitport, but return port as int, and as None if not
    given. Also, it allows giving IPv6 addresses like a netloc:

    >> host_port_split('foo')
    ('foo', None)
    >> host_port_split('foo:5683')
    ('foo', 5683)
    >> host_port_split('[::1%eth0]:56830')
    ('::1%eth0', 56830)
    """

    pseudoparsed = SplitResult(None, host_port, None, None, None)
    try:
        return pseudoparsed.hostname, pseudoparsed.port
    except ValueError:
        if '[' not in host_port and host_port.count(':') > 1:
            raise ValueError(
                "Could not parse network location. "
                "Beware that when IPv6 literals are expressed in URIs, they "
                "need to be put in square brackets to distinguish them from "
                "port numbers.")
        raise
Ejemplo n.º 5
0
 def list_files(self, first_date: dt.date, last_date: dt.date,
                test_type: str, country: str) -> Iterable[FileEntry]:
     if last_date < dt.date(2020, 10, 20):
         return
     paginator = self._s3_client.get_paginator('list_objects_v2')
     pages = paginator.paginate(
         Bucket=_2020OoniClient._BUCKET,
         Delimiter='/',
         Prefix=_2020OoniClient._PREFIX,
         StartAfter=
         f'{_2020OoniClient._PREFIX}{first_date.strftime("%Y%m%d")}')
     for page in pages:
         self.num_list_requests += 1
         for entry in page.get('CommonPrefixes', []):
             date_dir = entry['Prefix']
             date_str = posixpath.basename(posixpath.dirname(date_dir))
             date = dt.datetime.strptime(date_str, "%Y%m%d").date()
             if date > last_date:
                 return
             for hour in range(24):
                 prefix = f'''{date_dir}{hour:02}/{country}/'''
                 if test_type:
                     prefix += f'{test_type}/'
                 for page in paginator.paginate(Bucket=page['Name'],
                                                Prefix=prefix):
                     self.num_list_requests += 1
                     for entry in page.get('Contents', []):
                         key = entry['Key']
                         file_path = PosixPath(key)
                         if file_path.name.endswith('.jsonl.gz'):
                             file_test_type = file_path.parent.name
                             url = SplitResult('s3', page['Name'], key,
                                               None, None)
                             yield FileEntry(
                                 lambda: self._get_measurements(url),
                                 file_test_type, country, date, url,
                                 entry['Size'])
Ejemplo n.º 6
0
    def preprocess_url(self, referrer, url):
        ''' Clean and filter URLs before scraping.
        '''
        if not url:
            return None

        fields = urlsplit(urljoin(referrer, url))._asdict() # convert to absolute URLs and split
        fields['path'] = re.sub(r'/$', '', fields['path']) # remove trailing /
        fields['fragment'] = '' # remove targets within a page
        fields = SplitResult(**fields)
        if fields.netloc == self.domain:
            # Scrape pages of current domain only
            cleanurl = ''
            if fields.scheme == 'http':
                cleanurl = fields.geturl()
                cleanurl = cleanurl.replace('http:', 'https:', 1)
            else:
                cleanurl = fields.geturl()
                
            if cleanurl not in self.urls and cleanurl not in self.urls:
                # Return URL only if it's not already in list
                return cleanurl

        return None
Ejemplo n.º 7
0
async def get_article_details(entry, fetch=True):
    article_link = entry.get('link')
    article_title = html.unescape(entry.get('title', ''))
    if fetch and conf.CRAWLER_RESOLV and article_link or not article_title:
        try:
            # resolves URL behind proxies (like feedproxy.google.com)
            response = await jarr_get(article_link, timeout=5)
        except MissingSchema:
            split, failed = urlsplit(article_link), False
            for scheme in 'https', 'http':
                new_link = urlunsplit(SplitResult(scheme, *split[1:]))
                try:
                    response = await jarr_get(new_link, timeout=5)
                except Exception as error:
                    failed = True
                    continue
                failed = False
                article_link = new_link
                break
            if failed:
                return article_link, article_title or 'No title'
        except Exception as error:
            logger.info(
                "Unable to get the real URL of %s. Won't fix "
                "link or title. Error: %s", article_link, error)
            return article_link, article_title or 'No title'
        article_link = response.url
        if not article_title:
            bs_parsed = BeautifulSoup(response.content,
                                      'html.parser',
                                      parse_only=SoupStrainer('head'))
            try:
                article_title = bs_parsed.find_all('title')[0].text
            except IndexError:  # no title
                pass
    return article_link, article_title or 'No title'
Ejemplo n.º 8
0
    def build(cls,
              *,
              scheme='',
              user='',
              password='',
              host='',
              port=None,
              path='',
              query=None,
              query_string='',
              fragment=''):
        """Creates and returns a new URL"""

        if host and not scheme:
            raise ValueError(
                'Can\'t build URL with "host" but without "scheme".')
        if not host and scheme:
            raise ValueError(
                'Can\'t build URL with "scheme" but without "host".')
        if query and query_string:
            raise ValueError(
                "Only one of \"query\" or \"query_string\" should be passed")

        netloc = cls._make_netloc(user, password, host, port)
        path = _quote(path, safe='@:', protected='/')
        if netloc:
            path = _normalize_path(path)

        url = cls(SplitResult(scheme, netloc, path, _quote(query_string),
                              fragment),
                  encoded=True)

        if query:
            return url.with_query(query)
        else:
            return url
Ejemplo n.º 9
0
    def __init__(self, contentURL, fileName):
        '''
        Initialize the content.

        @param contentURL: string
            The URL of the icon to be downloaded.
        @param fileName: string
            The name of file under that the icon should be saved.
        '''
        Content.__init__(self, fileName, 'image', 'binary', 0)

        (scheme, netloc, path, query,
         fragment) = urlsplit(contentURL if not isinstance(
             contentURL, Request) else contentURL.full_url)
        if not scheme: scheme = 'http'
        parsed = SplitResult(scheme, netloc, quote(path), quote(query),
                             fragment)
        if isinstance(contentURL, Request):
            contentURL.full_url = urlunsplit(parsed)
        else:
            contentURL = urlunsplit(parsed)

        self._url = contentURL
        self._response = None
Ejemplo n.º 10
0
def _urlsplit(url, scheme='', allow_fragments=True):
    """Templating safe version of urllib.parse.urlsplit

    Ignores '?' and '#' inside {{}} templating tags.

    Caching disabled.
    """

    url, scheme, _coerce_result = _coerce_args(url, scheme)
    allow_fragments = bool(allow_fragments)
    netloc = query = fragment = ''
    i = url.find(':')
    if i > 0:
        for c in url[:i]:
            if c not in scheme_chars:  # pragma: no cover
                break  # https://github.com/nedbat/coveragepy/issues/198
        else:
            scheme, url = url[:i].lower(), url[i + 1:]

    if url[:2] == '//':
        netloc, url = _splitnetloc(url, 2)
        if (('[' in netloc and ']' not in netloc) or  # noqa: W504, W503
            (']' in netloc and '[' not in netloc)):
            raise ValueError("Invalid IPv6 URL")
    if allow_fragments and '#' in url:
        result = re.split(r'#(?![^{{}}]*}})', url, maxsplit=1)
        url = result[0]
        if len(result) > 1:
            fragment = result[1]
    if '?' in url:
        result = re.split(r'\?(?![^{{}}]*}})', url, maxsplit=1)
        url = result[0]
        if len(result) > 1:
            query = result[1]
    v = SplitResult(scheme, netloc, url, query, fragment)
    return _coerce_result(v)
Ejemplo n.º 11
0
    def build(cls,
              *,
              scheme="",
              authority="",
              user=None,
              password=None,
              host="",
              port=None,
              path="",
              query=None,
              query_string="",
              fragment="",
              encoded=False):
        """Creates and returns a new URL"""

        if authority and (user or password or host or port):
            raise ValueError(
                'Can\'t mix "authority" with "user", "password", "host" or "port".'
            )
        if port and not host:
            raise ValueError(
                'Can\'t build URL with "port" but without "host".')
        if query and query_string:
            raise ValueError(
                'Only one of "query" or "query_string" should be passed')
        if (scheme is None or authority is None or path is None
                or query_string is None or fragment is None):
            raise TypeError(
                'NoneType is illegal for "scheme", "authority", "path", '
                '"query_string", and "fragment" args, use empty string instead.'
            )

        if authority:
            if encoded:
                netloc = authority
            else:
                tmp = SplitResult("", authority, "", "", "")
                netloc = cls._make_netloc(tmp.username,
                                          tmp.password,
                                          tmp.hostname,
                                          tmp.port,
                                          encode=True)
        elif not user and not password and not host and not port:
            netloc = ""
        else:
            netloc = cls._make_netloc(user,
                                      password,
                                      host,
                                      port,
                                      encode=not encoded)
        if not encoded:
            path = cls._PATH_QUOTER(path)
            if netloc:
                path = cls._normalize_path(path)

            cls._validate_authority_uri_abs_path(host=host, path=path)
            query_string = cls._QUERY_QUOTER(query_string)
            fragment = cls._FRAGMENT_QUOTER(fragment)

        url = cls(SplitResult(scheme, netloc, path, query_string, fragment),
                  encoded=True)

        if query:
            return url.with_query(query)
        else:
            return url
Ejemplo n.º 12
0
def test_split_result_non_decoded():
    with pytest.raises(ValueError):
        URL(SplitResult('http', 'example.com', 'path', 'qs', 'frag'))
Ejemplo n.º 13
0
    def handle_distrib(self, url):
        """React to a file dispatch message."""
        url = urlsplit(url)
        dummy, filename = os.path.split(url.path)
        logger.debug("filename = %s", filename)
        # TODO: Should not make any assumptions on filename formats, should
        # load a description of it from a config file instead.
        if filename.endswith(".hmf"):
            risestr, satellite = filename[:-4].split("_", 1)
            risetime = datetime.strptime(risestr, "%Y%m%d%H%M%S")
            pname = pass_name(risetime, satellite)
            satellite = satellite.replace("_", "-")
            if satellite in self._excluded_platforms:
                return None
            swath = self._received_passes.get(pname, {}).copy()
            swath.pop('satellite', None)
            swath["platform_name"] = satellite
            swath["start_time"] = risetime
            swath["type"] = "binary"
            swath["format"] = "HRPT"
            if satellite == "NOAA-15":
                swath["sensor"] = ("avhrr/3", "amsu-a", "amsu-b", "hirs/3")
            elif satellite in ["NOAA-18", "NOAA-19"]:
                swath["sensor"] = ("avhrr/3", "mhs", "amsu-a", "hirs/4")
            swath["data_processing_level"] = "0"

        elif filename.startswith("P042") or filename.startswith("P154"):
            pds = {}
            pds["format"] = filename[0]
            pds["apid1"] = filename[1:8]
            pds["apid2"] = filename[8:15]
            pds["apid3"] = filename[15:22]
            pds["time"] = datetime.strptime(filename[22:33], "%y%j%H%M%S")
            pds["nid"] = filename[33]
            pds["ufn"] = filename[34:36]
            pds["extension"] = filename[36:40]
            risetime = pds["time"]
            if pds["apid1"][:3] == "042":
                satellite = "EOS-Terra"
                pname = pass_name(risetime, 'TERRA')
            elif pds["apid1"][:3] == "154":
                satellite = "EOS-Aqua"
                pname = pass_name(risetime, 'AQUA')
            else:
                raise ValueError(
                    "Unrecognized satellite ID: " + pds["apid1"][:3])

            if not satellite or satellite in self._excluded_platforms:
                logger.debug("Platform name %s is excluded...", str(satellite))
                return None

            swath = self._received_passes.get(pname, {}).copy()
            swath.pop('satellite', None)
            swath['platform_name'] = satellite
            swath['start_time'] = risetime
            instruments = {"0064": "modis",
                           "0141": "ceres+y",
                           "0157": "ceres-y",
                           "0261": "amsu-a1",
                           "0262": "amsu-a1",
                           "0290": "amsu-a2",
                           "0342": "hsb",
                           "0402": "amsr-e",
                           "0404": "airs",
                           "0405": "airs",
                           "0406": "airs",
                           "0407": "airs",
                           "0414": "airs",
                           "0415": "airs",
                           "0419": "airs",
                           "0957": "gbad",
                           }
            swath["sensor"] = instruments.get(pds["apid1"][3:],
                                              pds["apid1"][3:])
            swath["format"] = "PDS"
            swath["type"] = "binary"
            swath["data_processing_level"] = "0"

        # NPP/JPSS RDRs
        elif filename.startswith("R") and filename.endswith(".h5"):
            # Occassionaly RT-STPS produce files with a nonstandard file
            # naming, lacking the 'RNSCA' field. We will try to deal with this
            # below (Adam - 2013-06-04):
            mda = {}
            mda["format"] = filename[0]
            file_ok = False
            for prefix in JPSS_INSTRUMENTS_FROM_FILENAMES:
                if filename.startswith(prefix):
                    mda["sensor"] = JPSS_INSTRUMENTS_FROM_FILENAMES[prefix]
                    start_time_items = filename.strip(prefix).split('_')[1:3]
                    end_time_item = filename.strip(prefix).split('_')[3]
                    satellite = JPSS_PLATFORM_NAME.get(filename.strip(prefix).split('_')[0], None)
                    orbit = filename.strip(prefix).split('_')[4].strip('b')
                    file_ok = True
                    break

            if not file_ok:
                logger.warning("Seems to be a NPP/JPSS RDR "
                               "file but name is not standard!")
                logger.warning("filename = %s", filename)
                return None

            # satellite = "Suomi-NPP, NOAA-20, NOAA-21,..."
            if not satellite or satellite in self._excluded_platforms:
                logger.debug("Platform name %s is excluded...", str(satellite))
                return None

            mda["start_time"] = \
                datetime.strptime(start_time_items[0] + start_time_items[1],
                                  "d%Y%m%dt%H%M%S%f")
            end_time = \
                datetime.strptime(start_time_items[0] + end_time_item,
                                  "d%Y%m%de%H%M%S%f")
            if mda["start_time"] > end_time:
                end_time += timedelta(days=1)
            mda["orbit"] = orbit

            # FIXME: swath start and end time is granule dependent.
            # Get the end time as well! - Adam 2013-06-03:
            start_time = mda["start_time"]
            pname = pass_name(start_time, SCISYS_NAMES.get(satellite))

            swath = self._received_passes.get(pname, {}).copy()
            swath.pop("satellite", None)
            swath["platform_name"] = satellite
            swath["start_time"] = start_time
            swath['end_time'] = end_time
            swath["sensor"] = mda["sensor"]
            swath["format"] = "RDR"
            swath["type"] = "HDF5"
            swath["data_processing_level"] = "0"

        # metop
        elif filename[4:12] == "_HRP_00_":
            # "AVHR": "avhrr",
            instruments = {"ASCA": "ascat",
                           "AMSA": "amsu-a",
                           "ATOV": "atovs",
                           "AVHR": "avhrr/3",
                           "GOME": "gome",
                           "GRAS": "gras",
                           "HIRS": "hirs/4",
                           "IASI": "iasi",
                           "MHSx": "mhs",
                           "SEMx": "sem",
                           "ADCS": "adcs",
                           "SBUV": "sbuv",
                           "HKTM": "vcdu34"}

            satellites = {"M02": "Metop-A",
                          "M01": "Metop-B",
                          "M03": "Metop-C"}

            satellite = satellites[filename[12:15]]
            risetime = datetime.strptime(filename[16:31], "%Y%m%d%H%M%SZ")
            falltime = datetime.strptime(filename[32:47], "%Y%m%d%H%M%SZ")

            pname = pass_name(risetime, satellite.upper())
            logger.debug("pname= % s", str(pname))
            swath = self._received_passes.get(pname, {}).copy()
            swath.pop('satellite', None)
            swath["start_time"] = risetime
            swath["end_time"] = falltime
            swath["platform_name"] = satellite
            swath["sensor"] = instruments[filename[:4]]
            swath["format"] = "EPS"
            swath["type"] = "binary"
            swath["data_processing_level"] = "0"
        else:
            return None

        if url.scheme in ["", "file"]:
            scheme = "ssh"
            netloc = self._emitter
            uri = urlunsplit(SplitResult(scheme,
                                         netloc,
                                         url.path,
                                         url.query,
                                         url.fragment))
        elif url.scheme == "ftp":
            scheme = "ssh"
            netloc = url.hostname
            uri = urlunsplit(SplitResult(scheme,
                                         netloc,
                                         url.path,
                                         url.query,
                                         url.fragment))
        else:
            logger.debug("url.scheme not expected: %s", url.scheme)

        swath["uid"] = os.path.split(url.path)[1]
        swath["uri"] = uri
        swath['variant'] = 'DR'
        return swath
Ejemplo n.º 14
0
 def spliturl(url):
     o = urlsplit(url)
     apiurl = SplitResult(o.scheme, o.netloc, '', '', '').geturl()
     path = SplitResult('', '', o.path, o.query, '').geturl()
     return (apiurl, path)
Ejemplo n.º 15
0
def addNewParaToLink(link: str, key: str, value: str) -> str:
    r = urlsplit(link)
    l = parse_qsl(r.query)  # noqa: E741
    l.append((str(key), str(value)))
    r2 = SplitResult(r.scheme, r.netloc, r.path, urlencode(l), r.fragment)
    return urlunsplit(r2)
Ejemplo n.º 16
0
def test_split_result_non_decoded():
    with pytest.raises(ValueError):
        URL(SplitResult("http", "example.com", "path", "qs", "frag"))
Ejemplo n.º 17
0
def report_error(
    request: HttpRequest,
    user_profile: UserProfile,
    message: str = REQ(),
    stacktrace: str = REQ(),
    ui_message: bool = REQ(json_validator=check_bool),
    user_agent: str = REQ(),
    href: str = REQ(),
    log: str = REQ(),
    more_info: Mapping[str, Any] = REQ(json_validator=check_dict([]),
                                       default={}),
) -> HttpResponse:
    """Accepts an error report and stores in a queue for processing.  The
    actual error reports are later handled by do_report_error"""
    if not settings.BROWSER_ERROR_REPORTING:
        return json_success(request)
    more_info = dict(more_info)

    js_source_map = get_js_source_map()
    if js_source_map:
        stacktrace = js_source_map.annotate_stacktrace(stacktrace)

    try:
        version: Optional[str] = subprocess.check_output(
            ["git", "show", "-s", "--oneline"],
            text=True,
        )
    except (FileNotFoundError, subprocess.CalledProcessError):
        version = None

    # Get the IP address of the request
    remote_ip = request.META["REMOTE_ADDR"]

    # For the privacy of our users, we remove any actual text content
    # in draft_content (from drafts rendering exceptions).  See the
    # comment on privacy_clean_markdown for more details.
    if more_info.get("draft_content"):
        more_info["draft_content"] = privacy_clean_markdown(
            more_info["draft_content"])

    if user_profile.is_authenticated:
        email = user_profile.delivery_email
        full_name = user_profile.full_name
    else:
        email = "*****@*****.**"
        full_name = "Anonymous User"

    queue_json_publish(
        "error_reports",
        dict(
            type="browser",
            report=dict(
                host=SplitResult("", request.get_host(), "", "", "").hostname,
                ip_address=remote_ip,
                user_email=email,
                user_full_name=full_name,
                user_visible=ui_message,
                server_path=settings.DEPLOY_ROOT,
                version=version,
                user_agent=user_agent,
                href=href,
                message=message,
                stacktrace=stacktrace,
                log=log,
                more_info=more_info,
            ),
        ),
    )

    return json_success(request)
Ejemplo n.º 18
0
 def __init__(self, url: typing.Optional[str] = None) -> None:
     if url is None:
         self.splitted = SplitResult("", "", "", "", "")
     else:
         self.splitted = urlsplit(url)
     self.query = parse_qsl(self.splitted.query)
Ejemplo n.º 19
0
def deport(netloc: str) -> str:
    """Remove the port from a hostname:port string.  Brackets on a literal
    IPv6 address are included."""
    r = SplitResult("", netloc, "", "", "")
    assert r.hostname is not None
    return "[" + r.hostname + "]" if ":" in r.hostname else r.hostname
Ejemplo n.º 20
0
    def handle(self, *args: Any, **options: Any) -> None:
        interactive_debug_listen()
        addrport = options["addrport"]
        assert isinstance(addrport, str)

        from tornado import httpserver

        if addrport.isdigit():
            addr, port = "", int(addrport)
        else:
            r = SplitResult("", addrport, "", "", "")
            if r.port is None:
                raise CommandError(
                    f"{addrport!r} does not have a valid port number.")
            addr, port = r.hostname or "", r.port

        if not addr:
            addr = "127.0.0.1"

        if settings.DEBUG:
            logging.basicConfig(
                level=logging.INFO,
                format="%(asctime)s %(levelname)-8s %(message)s")

        async def inner_run() -> None:
            from django.utils import translation

            AsyncIOMainLoop().install()
            loop = asyncio.get_running_loop()
            stop_fut = loop.create_future()

            def stop() -> None:
                if not stop_fut.done():
                    stop_fut.set_result(None)

            def add_signal_handlers() -> None:
                loop.add_signal_handler(signal.SIGINT, stop),
                loop.add_signal_handler(signal.SIGTERM, stop),

            def remove_signal_handlers() -> None:
                loop.remove_signal_handler(signal.SIGINT),
                loop.remove_signal_handler(signal.SIGTERM),

            async with AsyncExitStack() as stack:
                stack.push_async_callback(
                    sync_to_async(remove_signal_handlers,
                                  thread_sensitive=True))
                await sync_to_async(add_signal_handlers,
                                    thread_sensitive=True)()

                translation.activate(settings.LANGUAGE_CODE)

                # We pass display_num_errors=False, since Django will
                # likely display similar output anyway.
                self.check(display_num_errors=False)
                print(f"Tornado server (re)started on port {port}")

                if settings.USING_RABBITMQ:
                    queue_client = TornadoQueueClient()
                    set_queue_client(queue_client)
                    # Process notifications received via RabbitMQ
                    queue_name = notify_tornado_queue_name(port)
                    stack.callback(queue_client.close)
                    queue_client.start_json_consumer(
                        queue_name,
                        get_wrapped_process_notification(queue_name))

                # Application is an instance of Django's standard wsgi handler.
                application = create_tornado_application()

                # start tornado web server in single-threaded mode
                http_server = httpserver.HTTPServer(application, xheaders=True)
                stack.push_async_callback(http_server.close_all_connections)
                stack.callback(http_server.stop)
                http_server.listen(port, address=addr)

                from zerver.tornado.ioloop_logging import logging_data

                logging_data["port"] = str(port)
                await setup_event_queue(http_server, port)
                stack.callback(dump_event_queues, port)
                add_client_gc_hook(missedmessage_hook)
                if settings.USING_RABBITMQ:
                    setup_tornado_rabbitmq(queue_client)

                if hasattr(__main__, "add_reload_hook"):
                    autoreload.start()

                await stop_fut

                # Monkey patch tornado.autoreload to prevent it from continuing
                # to watch for changes after catching our SystemExit. Otherwise
                # the user needs to press Ctrl+C twice.
                __main__.wait = lambda: None

        async_to_sync(inner_run, force_new_loop=True)()
Ejemplo n.º 21
0
def get_base_url(url):
    split_url_dict = dict(urlsplit(url)._asdict())
    split_url_dict['path'] = '/'
    split_url_dict['query'] = ''
    split_url_dict['fragment'] = ''
    return urlunsplit(SplitResult(**split_url_dict))
Ejemplo n.º 22
0
def changeFileNameForLink(link: str, name: str) -> str:
    r = urlsplit(link)
    t = splitfn(r.path)
    r2 = SplitResult(r.scheme, r.netloc, f"{t[0]}/{name}", r.query, r.fragment)
    return urlunsplit(r2)
Ejemplo n.º 23
0
                    function_name, arguments)
                print("assert", call, "==", call_value(result))
            except Exception:
                continue

from urllib.parse import SplitResult, ParseResult, urlparse, urlsplit

if __name__ == "__main__":
    assert urlparse(url='http://www.example.com',
                    scheme='',
                    allow_fragments=True) == ParseResult(
                        scheme='http',
                        netloc='www.example.com',
                        path='',
                        params='',
                        query='',
                        fragment='')
    assert urlsplit(url='http://www.example.com',
                    scheme='',
                    allow_fragments=True) == SplitResult(
                        scheme='http',
                        netloc='www.example.com',
                        path='',
                        query='',
                        fragment='')

# ### Exercise 2: Abstracting Arguments

if __name__ == "__main__":
    print('\n### Exercise 2: Abstracting Arguments')
Ejemplo n.º 24
0
    def handle(self, addrport: str, **options: bool) -> None:
        interactive_debug_listen()

        import django
        from tornado import httpserver

        if addrport.isdigit():
            addr, port = "", int(addrport)
        else:
            r = SplitResult("", addrport, "", "", "")
            if r.port is None:
                raise CommandError(
                    f"{addrport!r} does not have a valid port number.")
            addr, port = r.hostname or "", r.port

        if not addr:
            addr = '127.0.0.1'

        xheaders = options.get('xheaders', True)
        no_keep_alive = options.get('no_keep_alive', False)

        if settings.DEBUG:
            logging.basicConfig(
                level=logging.INFO,
                format='%(asctime)s %(levelname)-8s %(message)s')

        def inner_run() -> None:
            from django.conf import settings
            from django.utils import translation
            translation.activate(settings.LANGUAGE_CODE)

            # We pass display_num_errors=False, since Django will
            # likely display similar output anyway.
            self.check(display_num_errors=False)
            print(f"Tornado server is running at http://{addr}:{port}/")

            if settings.USING_RABBITMQ:
                queue_client = get_queue_client()
                # Process notifications received via RabbitMQ
                queue_name = notify_tornado_queue_name(port)
                queue_client.register_json_consumer(
                    queue_name, get_wrapped_process_notification(queue_name))

            try:
                # Application is an instance of Django's standard wsgi handler.
                application = create_tornado_application(port)
                if settings.AUTORELOAD:
                    zulip_autoreload_start()

                # start tornado web server in single-threaded mode
                http_server = httpserver.HTTPServer(
                    application,
                    xheaders=xheaders,
                    no_keep_alive=no_keep_alive)
                http_server.listen(port, address=addr)

                from zerver.tornado.ioloop_logging import logging_data
                logging_data['port'] = str(port)
                setup_event_queue(port)
                add_client_gc_hook(missedmessage_hook)
                setup_tornado_rabbitmq()

                instance = ioloop.IOLoop.instance()

                if django.conf.settings.DEBUG:
                    instance.set_blocking_log_threshold(5)
                    instance.handle_callback_exception = handle_callback_exception
                instance.start()
            except KeyboardInterrupt:
                sys.exit(0)

        inner_run()
Ejemplo n.º 25
0
    def __new__(cls, value='', encoded=False):
        """
        Creates a new ``URL`` instance from the given `value`
        
        Parameters
        ----------
        value : ``URL``, `str`, `urllib.parse.SplitResult`, Optional
            The value to create ``URL`` from. Defaults to empty string.
        encoded : `bool`, Optional
            Whether the given `value` is already encoded. Defaults to `False`.

        Raises
        -------
        ValueError
            - If `value` is given as `urllib.parse.SplitResult` instance, but `encoded` was given as `False`.
            - If `value` is not `encoded` and the URL is absolute, but `host` is not given.
        TypeError
            If `value` was not given neither as ``URL``, `str` nor `urllib.parse.SplitResult` instance.
        """
        if isinstance(value, cls):
            return value
        
        if isinstance(value, str):
            value = url_split(value)
        elif isinstance(value, SplitResult):
            if not encoded:
                raise ValueError(f'Cannot apply decoding to `{SplitResult.__name__}`.')
        else:
            raise TypeError(f'`value` should have be given as `{cls.__name__}`, `str` or `{SplitResult.__name__}` '
                f'instance, got {value.__class__.__name__}.')
        
        if not encoded:
            if value.netloc:
                netloc = value.hostname
                if netloc is None:
                    raise ValueError('Invalid URL: host is required for absolute urls.')
                
                try:
                    netloc.encode('ascii')
                except UnicodeEncodeError:
                    netloc = netloc.encode('idna').decode('ascii')
                else:
                    try:
                        ip = ip_address(netloc)
                    except:
                        pass
                    else:
                        if ip.version == 6:
                            netloc = f'[{netloc}]'
                
                value_port = value.port
                if value_port:
                    netloc = f'{netloc}:{value_port}'
                
                value_username = value.username
                if value_username:
                    user = quote(value_username)
                    value_password = value.password
                    if value_password:
                        user = f'{user}:{quote(value_password)}'
                    
                    netloc = f'{user}@{netloc}'
            else:
                netloc = ''
            
            value = SplitResult(value.scheme, netloc,
                quote(value.path, safe='@:', protected='/'),
                quote(value.query, safe='=+&?/:@', protected='=+&', query_string=True),
                quote(value.fragment, safe='?/:@'),
                    )
        
        self = object.__new__(cls)
        self._value = value
        self._cache = {}
        return self
def test_to_bytestring():
    assert util.to_bytestring('test_str', 'ascii') == b'test_str'
    assert util.to_bytestring('test_str®') == b'test_str\xc2\xae'
    assert util.to_bytestring(b'byte_test_str') == b'byte_test_str'
    with pytest.raises(TypeError) as err:
        util.to_bytestring(100)
    msg = '100 is not a string'
    assert msg in str(err)


@pytest.mark.parametrize('test_input, expected', [
    ('https://example.org/a/b?c=1#d',
     SplitResult(scheme='https',
                 netloc='example.org',
                 path='/a/b',
                 query='c=1',
                 fragment='d')),
    ('a/b?c=1#d',
     SplitResult(scheme='', netloc='', path='a/b', query='c=1', fragment='d')),
    ('/a/b?c=1#d',
     SplitResult(scheme='', netloc='', path='/a/b', query='c=1',
                 fragment='d')),
    ('//a/b?c=1#d',
     SplitResult(scheme='', netloc='', path='//a/b', query='c=1',
                 fragment='d')),
    ('///a/b?c=1#d',
     SplitResult(
         scheme='', netloc='', path='///a/b', query='c=1', fragment='d')),
])
def test_split_request_uri(test_input, expected):
Ejemplo n.º 27
0
    def __init__(self, val='', *, encoded=False, strict=False):
        if isinstance(val, URL):
            self._val = val._val
            self._cache = val._cache
            self._strict = val._strict
            return
        if isinstance(val, str):
            val = urlsplit(val)
        elif isinstance(val, SplitResult):
            if not encoded:
                raise ValueError("Cannot apply decoding to SplitResult")
        else:
            raise TypeError("Constructor parameter should be str")

        self._strict = strict

        if not encoded:
            if not val[1]:  # netloc
                netloc = ''
            else:
                netloc = val.hostname
                if netloc is None:
                    raise ValueError(
                        "Invalid URL: host is required for abolute urls.")
                try:
                    netloc.encode('ascii')
                except UnicodeEncodeError:
                    netloc = netloc.encode('idna').decode('ascii')
                else:
                    try:
                        ip = ip_address(netloc)
                    except:
                        pass
                    else:
                        if ip.version == 6:
                            netloc = '[' + netloc + ']'
                if val.port:
                    netloc += ':{}'.format(val.port)
                if val.username:
                    user = _quote(val.username)
                else:
                    user = ''
                if val.password:
                    user += ':' + _quote(val.password)
                if user:
                    netloc = user + '@' + netloc

            path = _quote(val[2], safe='+@:', protected='/+', strict=strict)
            if netloc:
                path = _normalize_path(path)

            query = _quote(val[3],
                           safe='=+&?/:@',
                           protected=PROTECT_CHARS,
                           qs=True,
                           strict=strict)
            fragment = _quote(val[4], safe='?/:@', strict=strict)
            val = SplitResult(val[0], netloc, path, query, fragment)

        self._val = val
        self._cache = {}
Ejemplo n.º 28
0
 def __init__(self):
     self.splitted = SplitResult("", "", "", "", "")
     self.query = list()
Ejemplo n.º 29
0
def urlsplit(url):
    scheme, netloc, path, query, fragment = _urlsplit(url)
    if "#" in path:
        path, fragment = path.split("#", 1)
    return SplitResult(scheme, netloc, path, query, fragment)
Ejemplo n.º 30
0
    def run(self):
        """
        Run node, spawning entity and doing other actions as configured in program arguments.

        Returns exit code, 1 for failure, 0 for success
        """
        # Wait for entity to exist if wait flag is enabled
        if self.args.wait:
            self.entity_exists = False

            def entity_cb(entity):
                self.entity_exists = self.args.wait in entity.name

            self.subscription = self.create_subscription(
                ModelStates, '%s/model_states' % self.args.gazebo_namespace,
                entity_cb, 10)

            self.get_logger().info(
                'Waiting for entity {} before proceeding.'.format(
                    self.args.wait))

            while rclpy.ok() and not self.entity_exists:
                rclpy.spin_once(self)
                pass

        # Load entity XML from file
        if self.args.file:
            self.get_logger().info('Loading entity XML from file %s' %
                                   self.args.file)
            if not os.path.exists(self.args.file):
                self.get_logger().error(
                    'Error: specified file %s does not exist', self.args.file)
                return 1
            if not os.path.isfile(self.args.file):
                self.get_logger().error(
                    'Error: specified file %s is not a file', self.args.file)
                return 1
            # load file
            try:
                f = open(self.args.file, 'r')
                entity_xml = f.read()
            except IOError as e:
                self.get_logger().error('Error reading file {}: {}'.format(
                    self.args.file, e))
                return 1
            if entity_xml == '':
                self.get_logger().error('Error: file %s is empty',
                                        self.args.file)
                return 1
        # Load entity XML published on topic specified
        elif self.args.topic:
            self.get_logger().info('Loading entity published on topic %s' %
                                   self.args.topic)
            entity_xml = ''

            def entity_xml_cb(msg):
                nonlocal entity_xml
                entity_xml = msg.data

            self.subscription = self.create_subscription(
                String, self.args.topic, entity_xml_cb,
                QoSDurabilityPolicy.RMW_QOS_POLICY_DURABILITY_TRANSIENT_LOCAL)

            while rclpy.ok() and entity_xml == '':
                self.get_logger().info('Waiting for entity xml on %s' %
                                       self.args.topic)
                rclpy.spin_once(self)
                pass

        # Generate entity XML by putting requested entity name into request template
        elif self.args.database:
            self.get_logger().info(
                'Loading entity XML from Gazebo Model Database')
            entity_xml = self.MODEL_DATABASE_TEMPLATE.format(
                self.args.database)
        elif self.args.stdin:
            self.get_logger().info('Loading entity XML from stdin')
            entity_xml = sys.stdin.read()
            if entity_xml == '':
                self.get_logger().error('Error: stdin buffer was empty')
                return 1

        # Parse xml to detect invalid xml before sending to gazebo
        try:
            xml_parsed = ElementTree.fromstring(entity_xml)
        except ElementTree.ParseError as e:
            self.get_logger().error('Invalid XML: {}'.format(e))
            return 1

        # Replace package:// with model:// for mesh tags if flag is set
        if self.args.package_to_model:
            for element in xml_parsed.iterfind('.//mesh'):
                filename_tag = element.get('filename')
                if filename_tag is None:
                    continue
                url = urlsplit(filename_tag)
                if url.scheme == 'package':
                    url = SplitResult('model', *url[1:])
                    element.set('filename', url.geturl())

        # Encode xml object back into string for service call
        entity_xml = ElementTree.tostring(xml_parsed)

        # Form requested Pose from arguments
        initial_pose = Pose()
        initial_pose.position.x = float(self.args.x)
        initial_pose.position.y = float(self.args.y)
        initial_pose.position.z = float(self.args.z)

        q = quaternion_from_euler(self.args.R, self.args.P, self.args.Y)
        initial_pose.orientation.w = q[0]
        initial_pose.orientation.x = q[1]
        initial_pose.orientation.y = q[2]
        initial_pose.orientation.z = q[3]

        success = self._spawn_entity(entity_xml, initial_pose)
        if not success:
            self.get_logger().error('Spawn service failed. Exiting.')
            return 1

        # TODO(shivesh): Wait for /set_model_configuration
        # (https://github.com/ros-simulation/gazebo_ros_pkgs/issues/779)
        # Apply joint positions if any specified
        # if len(self.args.joints) != 0:
        #     joint_names = [joint[0] for joint in self.args.joints]
        #     joint_positions = [joint[1] for joint in self.args.joints]
        #     success = _set_model_configuration(joint_names, joint_positions)
        #     if not success:
        #         self.get_logger().error('SetModelConfiguration service failed. Exiting.')
        #         return 1

        # Unpause physics if user requested
        if self.args.unpause:
            client = self.create_client(
                Empty, '%s/unpause_physics' % self.args.gazebo_namespace)
            if client.wait_for_service(timeout_sec=self.args.timeout):
                self.get_logger().info('Calling service %s/unpause_physics' %
                                       self.args.gazebo_namespace)
                client.call_async(Empty.Request())
            else:
                self.get_logger().error(
                    'Service %s/unpause_physics unavailable. \
                                         Was Gazebo started with GazeboRosInit?'
                )

        # If bond enabled, setup shutdown callback and wait for shutdown
        if self.args.bond:
            self.get_logger().info(
                'Waiting for shutdown to delete entity [{}]'.format(
                    self.args.entity))
            try:
                rclpy.spin(self)
            except KeyboardInterrupt:
                self.get_logger().info('Ctrl-C detected')
            self._delete_entity()

        return 0