Esempio n. 1
0
    def preprocess_url(self, referrer, url):
        ''' Clean and filter URLs before scraping.
        '''

        ignoreList = ['.pdf', '.jpg', 'tel:', '.dmg']

        if not url:
            return None

        fields = urlsplit(urljoin(
            referrer, url))._asdict()  # convert to absolute URLs and split
        fields['path'] = re.sub(r'/$', '', fields['path'])  # remove trailing /
        fields['fragment'] = ''  # remove targets within a page
        fields = SplitResult(**fields)
        if fields.netloc == self.domain:
            # Scrape pages of current domain only
            if fields.scheme == 'http':
                httpurl = cleanurl = fields.geturl()
                httpsurl = httpurl.replace('http:', 'https:', 1)
            else:
                httpsurl = cleanurl = fields.geturl()
                httpurl = httpsurl.replace('https:', 'http:', 1)

            for item in ignoreList:
                if item in httpsurl or item in httpurl:
                    return None

            if httpurl not in self.urls and httpsurl not in self.urls:
                # Return URL only if it's not already in list
                return cleanurl

        return None
Esempio n. 2
0
def normalize(uristr):
    """
    Translate the given URI into a normalized form.

    :type uristr: unicode
    :rtype: unicode
    """

    # Strip proxy prefix for proxied URLs
    for scheme in URL_SCHEMES:
        if uristr.startswith(VIA_PREFIX + scheme + ":"):
            uristr = uristr[len(VIA_PREFIX):]
            break

    # Try to extract the scheme
    uri = urlsplit(uristr)

    # If this isn't a URL, we don't perform any normalization
    if uri.scheme.lower() not in URL_SCHEMES:
        return uristr

    # Don't perform normalization on URLs with no hostname.
    if uri.hostname is None:
        return uristr

    scheme = _normalize_scheme(uri)
    netloc = _normalize_netloc(uri)
    path = _normalize_path(uri)
    query = _normalize_query(uri)
    fragment = None

    uri = SplitResult(scheme, netloc, path, query, fragment)

    return uri.geturl()
Esempio n. 3
0
    def _add_params_to_url(url, params):
        """Adds parameters as a query part of the URL

        :param url: URL
        :type url: string

        :param params: Dictionary containing parameters
        :type params: Dict

        :return: URL with parameters formatted as a query string
        :rtype: string
        """
        url_parts = urlsplit(url)

        # Extract the existing parameters specified in the redirection URI
        existing_params = parse_qs(url_parts.query)

        # Enrich our custom parameters with the existing ones
        params.update(existing_params)

        new_query = urlencode(params, True)
        url_parts = SplitResult(
            url_parts.scheme,
            url_parts.netloc,
            url_parts.path,
            new_query,
            url_parts.fragment,
        )
        url = url_parts.geturl()

        return url
Esempio n. 4
0
 def url(self):
     protocol = self.get('HTTP_X_FORWARDED_PROTO') or self.get(
         'wsgi.url_scheme', 'http')
     host = self.get('HTTP_X_FORWARDED_HOST') or self.get('HTTP_HOST')
     query_params = self.get("QUERY_STRING")
     url_split_result = SplitResult(protocol, host, self.path, query_params,
                                    '')
     return url_split_result.geturl()
Esempio n. 5
0
class UrlBuilder:
    def __init__(self, url: typing.Optional[str] = None) -> None:
        if url is None:
            self.splitted = SplitResult("", "", "", "", "")
        else:
            self.splitted = urlsplit(url)
        self.query = parse_qsl(self.splitted.query)

    def set(self,
            scheme: str = None,
            netloc: str = None,
            path: str = None,
            query: typing.List[typing.Tuple[str, str]] = None,
            fragment: str = None) -> "UrlBuilder":
        kwargs = dict()
        if scheme is not None:
            kwargs['scheme'] = scheme
        if netloc is not None:
            kwargs['netloc'] = netloc
        if path is not None:
            kwargs['path'] = path
        if query is not None:
            self.query = query
        if fragment is not None:
            kwargs['fragment'] = fragment
        self.splitted = self.splitted._replace(**kwargs)

        return self

    def has_query(self, needle_query_name: str) -> bool:
        """Returns True iff the URL being constructed has a query field with name `needle_query_name`."""
        for query_name, _ in self.query:
            if query_name == needle_query_name:
                return True
        return False

    def add_query(self, query_name: str, query_value: str) -> "UrlBuilder":
        self.query.append((query_name, query_value))

        return self

    def replace_query(self, query_name: str, query_value: str) -> "UrlBuilder":
        """
        Given a query name, remove all instances of that query name in this UrlBuilder.  Then append an instance with
        the name set to `query_name` and the value set to `query_value`.
        """
        self.query = [(q_name, q_value) for q_name, q_value in self.query
                      if q_name != query_name]
        self.query.append((query_name, query_value))

        return self

    def __str__(self) -> str:
        result = self.splitted._replace(
            query=urlencode(self.query, doseq=True))

        return urlunsplit(result)
Esempio n. 6
0
    def urlsplit(url, scheme='', allow_fragments=True):
        """Parse a URL into 5 components:
        <scheme>://<netloc>/<path>?<query>#<fragment>
        Return a 5-tuple: (scheme, netloc, path, query, fragment).
        Note that we don't break the components up in smaller bits
        (e.g. netloc is a single string) and we don't expand % escapes."""
        allow_fragments = bool(allow_fragments)
        key = url, scheme, allow_fragments, type(url), type(scheme)
        cached = _parse_cache.get(key, None)
        if cached:
            return cached
        if len(_parse_cache) >= MAX_CACHE_SIZE:  # avoid runaway growth
            clear_cache()
        netloc = query = fragment = ''
        i = url.find(':')
        if i > 0:
            if url[:i] == 'http':  # optimize the common case
                scheme = url[:i].lower()
                url = url[i + 1:]
                if url[:2] == '//':
                    netloc, url = _splitnetloc(url, 2)
                    if (('[' in netloc and ']' not in netloc)
                            or (']' in netloc and '[' not in netloc)):
                        raise ValueError("Invalid IPv6 URL")
                if allow_fragments and '#' in url:
                    url, fragment = url.split('#', 1)
                if '?' in url:
                    url, query = url.split('?', 1)
                v = SplitResult(scheme, netloc, url, query, fragment)
                _parse_cache[key] = v
                return v
            for c in url[:i]:
                if c not in scheme_chars:
                    break
            else:
                # make sure "url" is not actually a port number (in which case
                # "scheme" is really part of the path)
                rest = url[i + 1:]
                if not rest or any(c not in '0123456789' for c in rest):
                    # not a port number
                    scheme, url = url[:i].lower(), rest

        if url[:2] == '//':
            netloc, url = _splitnetloc(url, 2)
            if (('[' in netloc and ']' not in netloc)
                    or (']' in netloc and '[' not in netloc)):
                raise ValueError("Invalid IPv6 URL")
        if allow_fragments and '#' in url:
            url, fragment = url.split('#', 1)
        if '?' in url:
            url, query = url.split('?', 1)
        v = SplitResult(scheme, netloc, url, query, fragment)
        _parse_cache[key] = v
        return v
Esempio n. 7
0
def _urlsplit(url: str, scheme: str = '', allow_fragments: bool = True):
    """Templating safe version of urllib.parse.urlsplit

    Ignores '?' and '#' inside {{}} templating tags.

    Caching disabled.
    """

    url, scheme, _coerce_result = _coerce_args(url, scheme)
    allow_fragments = bool(allow_fragments)
    netloc = query = fragment = ''
    i = url.find(':')
    if i > 0:
        scheme, url = _urlsplit_scheme(url, i)

    if url[:2] == '//':
        netloc, url = _urlsplit_netloc(url)

    if allow_fragments and '#' in url:
        fragment, url = _urlsplit_fragment(url)

    if '?' in url:
        query, url = _urlsplit_query(url)

    v = SplitResult(scheme, netloc, url, query, fragment)
    return _coerce_result(v)
Esempio n. 8
0
 def parse_element(platform: str, element: str):
     if platform == 'github':
         return {
             'href': f'https://github.com/sponsors/{element}',
             'text': f'{element} on Github'
         }
     elif platform == 'patreon':
         return {
             'href': f'https://www.patreon.com/{element}',
             'text': f'{element} on Patreon'
         }
     elif platform == 'ko_fi':
         return {
             'href': f'https://ko-fi.com/{element}',
             'text': f'{element} on Ko-fi'
         }
     elif platform == 'custom':
         comps = urlsplit(element)
         if not comps.scheme:
             netloc, path = (comps.path.split('/', 1) + [''])[:2]
             comps = SplitResult(scheme='https',
                                 netloc=netloc,
                                 path=path,
                                 query=comps.query,
                                 fragment=comps.fragment)
         return {
             'href': urlunsplit(comps),
             'text': f'{comps.netloc}/{comps.path}'.rstrip('/')
         }
     else:
         return None
Esempio n. 9
0
    def test_customer_missing_name(self):
        """Should return error message if there is not enough information about customer name"""
        splittedURL = SplitResult("http", "fsecure.com", "/", "", "")

        result = signer._process_signed_url(splittedURL, "Lebron", "aaa")

        self.assertEqual(result, "Not enough customer's name information")
Esempio n. 10
0
def get_local_referrer(request):
    """Get the referrer URL if it is not external to this application"""

    if "HTTP_REFERER" in request.META:
        # Adopted from here:
        # https://github.com/django/django/blob/7fc317ae736e8fda1aaf4d4ede84d95fffaf5281/django/http/request.py#L124-L130
        allowed_hosts = settings.ALLOWED_HOSTS
        if settings.DEBUG and not allowed_hosts:
            allowed_hosts = [".localhost", "127.0.0.1", "[::1]"]

        referrer = urlsplit(request.META["HTTP_REFERER"])

        if validate_host(referrer.hostname, allowed_hosts):
            # If the referrer header points to "ourselves", return a "safe copy"
            # removing everything but the path+query+fragment part
            return SplitResult(
                scheme="",
                netloc="",
                path=referrer.path,
                query=referrer.query,
                fragment=referrer.fragment,
            ).geturl()
        else:
            return None
    else:
        return None
Esempio n. 11
0
 def list_files(self, first_date: dt.date, last_date: dt.date,
                test_type: str, country: str) -> Iterable[FileEntry]:
     if first_date > dt.date(2020, 10, 21):
         return
     paginator = self._s3_client.get_paginator('list_objects_v2')
     pages = paginator.paginate(
         Bucket=_LegacyOoniClient._BUCKET,
         Delimiter='/',
         Prefix=_LegacyOoniClient._PREFIX,
         StartAfter=
         f'{_LegacyOoniClient._PREFIX}{first_date.strftime("%Y-%m-%d")}')
     for page in pages:
         self.num_list_requests += 1
         for entry in page.get('CommonPrefixes', []):
             date_dir = entry['Prefix']
             date_str = posixpath.basename(posixpath.dirname(date_dir))
             date = dt.datetime.strptime(date_str, "%Y-%m-%d").date()
             if date > last_date:
                 return
             for file_entry in self._list_files_with_index(
                     date_dir, test_type, country):
                 url = SplitResult(
                     's3', _LegacyOoniClient._BUCKET,
                     f'{_LegacyOoniClient._PREFIX}{file_entry["filename"]}',
                     None, None)
                 yield FileEntry(
                     lambda: self._get_measurements(file_entry), test_type,
                     country, date, url,
                     _LegacyOoniClient._frame_bytes(file_entry['frames']))
Esempio n. 12
0
def build_url(netloc,
              port=None,
              scheme=None,
              path="",
              query=None,
              fragment=""):
    """ Builds a url given all its parts
    :netloc: string
    :port: int
    :scheme: string
    :path: string
    :query: A dictionary with any GET parameters
    :fragment: string
    :return: URL string
    """
    if query:
        query = quote_plus(urlencode(query))

    if scheme is None:
        scheme = GlobalRequestMiddleware.get_current_request().scheme

    if port is not None:
        netloc = replace_netloc_port(netloc, port)

    return SplitResult(
        scheme=scheme,
        netloc=netloc,
        path=path,
        query=query or "",
        fragment=fragment,
    ).geturl()
Esempio n. 13
0
    def build(cls,
              *,
              scheme='',
              user='',
              password='',
              host='',
              port=None,
              path='',
              query=None,
              query_string='',
              fragment='',
              strict=False):
        """Creates and returns a new URL"""

        if host and not scheme:
            raise ValueError(
                'Can\'t build URL with "host" but without "scheme".')
        if not host and scheme:
            raise ValueError(
                'Can\'t build URL with "scheme" but without "host".')
        if query and query_string:
            raise ValueError(
                "Only one of \"query\" or \"query_string\" should be passed")

        url = cls(SplitResult(scheme,
                              cls._make_netloc(user, password, host, port),
                              _quote(path, safe='@:', protected='/'),
                              _quote(query_string), fragment),
                  strict=strict,
                  encoded=True)

        if query:
            return url.with_query(query)
        else:
            return url
Esempio n. 14
0
    def __init__(
            self, url, token=None, username=None, password=None, timeout=DEFAULT_TIMEOUT, verify=True,
            user_agent=ZMON_USER_AGENT):
        """Initialize ZMON client."""
        self.timeout = timeout

        split = urlsplit(url)
        self.base_url = urlunsplit(SplitResult(split.scheme, split.netloc, '', '', ''))
        self.url = urljoin(self.base_url, self._join_path(['api', API_VERSION, '']))

        self._session = requests.Session()

        self._timeout = timeout
        self.user_agent = user_agent

        if username and password and token is None:
            self._session.auth = (username, password)

        self._session.headers.update({'User-Agent': user_agent, 'Content-Type': 'application/json'})

        if token:
            self._session.headers.update({'Authorization': 'Bearer {}'.format(token)})

        if not verify:
            logger.warning('ZMON client will skip SSL verification!')
            requests.packages.urllib3.disable_warnings()
            self._session.verify = False
Esempio n. 15
0
    def __init__(self, schema, netloc, port, path, query, fragment, userinfo):
        self._strict = False

        if port:
            netloc += ':{}'.format(port)
        if userinfo:
            netloc = yarl.quote(
                userinfo, safe='@:', protected=':',
                strict=False) + '@' + netloc

        if path:
            path = yarl.quote(path, safe='@:', protected='/', strict=False)

        if query:
            query = yarl.quote(query,
                               safe='=+&?/:@',
                               protected=yarl.PROTECT_CHARS,
                               qs=True,
                               strict=False)

        if fragment:
            fragment = yarl.quote(fragment, safe='?/:@', strict=False)

        self._val = SplitResult(
            schema or '',  # scheme
            netloc=netloc,
            path=path,
            query=query,
            fragment=fragment)
        self._cache = {}
Esempio n. 16
0
 def human_repr(self):
     return url_unsplit(
         SplitResult(
             self.scheme,
             self._make_netloc(self.user, self.password, self.host,
                               self._val.port), self.path,
             self.query_string, self.fragment))
Esempio n. 17
0
 def human_repr(self):
     """Return decoded human readable string for URL representation."""
     user = _human_quote(self.user, "#/:?@")
     password = _human_quote(self.password, "#/:?@")
     host = self.host
     if host:
         host = self._encode_host(self.host, human=True)
     path = _human_quote(self.path, "#?")
     query_string = "&".join(
         "{}={}".format(_human_quote(k, "#&+;="), _human_quote(v, "#&+;="))
         for k, v in self.query.items())
     fragment = _human_quote(self.fragment, "")
     return urlunsplit(
         SplitResult(
             self.scheme,
             self._make_netloc(
                 user,
                 password,
                 host,
                 self._val.port,
                 encode_host=False,
             ),
             path,
             query_string,
             fragment,
         ))
Esempio n. 18
0
    def delete(url):
        url = unquote(url)
        match, project = Cache.match(url)
        if match:
            path = Cache.path(url, project, include_file=True)

            # Rather then wait for last updated statistics to expire, remove the
            # project cache if applicable.
            if project:
                apiurl, _ = Cache.spliturl(url)
                if project.isdigit():
                    # Clear target project cache upon request acceptance.
                    project = osc.core.get_request(
                        apiurl, project).actions[0].tgt_project
                Cache.delete_project(apiurl, project)

            if os.path.exists(path):
                if conf.config['debug']:
                    print('CACHE_DELETE', url, file=sys.stderr)
                os.remove(path)

        # Also delete version without query. This does not handle other
        # variations using different query strings. Handy for PUT with ?force=1.
        o = urlsplit(url)
        if o.query != '':
            url_plain = SplitResult(o.scheme, o.netloc, o.path, '',
                                    o.fragment).geturl()
            Cache.delete(url_plain)
Esempio n. 19
0
def presign_v4(
    method,
    url,
    region,
    credentials,
    date,
    expires,
):
    """Do signature V4 of given presign request."""

    scope = _get_scope(date, region, "s3")
    canonical_request_hash, url = _get_presign_canonical_request_hash(
        method,
        url,
        credentials.access_key,
        scope,
        date,
        expires,
    )
    string_to_sign = _get_string_to_sign(date, scope, canonical_request_hash)
    signing_key = _get_signing_key(credentials.secret_key, date, region, "s3")
    signature = _get_signature(signing_key, string_to_sign)

    parts = list(url)
    parts[3] = url.query + "&X-Amz-Signature=" + queryencode(signature)
    url = SplitResult(*parts)
    return url
Esempio n. 20
0
def _urlsplit(url, scheme="", allow_fragments=True):
    """Parse a URL into 5 components:
    <scheme>://<netloc>/<path>?<query>#<fragment>
    Return a 5-tuple: (scheme, netloc, path, query, fragment).
    Note that we don't break the components up in smaller bits
    (e.g. netloc is a single string) and we don't expand % escapes."""
    url, scheme, _coerce_result = _coerce_args(url, scheme)
    allow_fragments = bool(allow_fragments)
    netloc = query = fragment = ""
    i = url.find(":")
    if i > 0:
        for c in url[:i]:
            if c not in scheme_chars:
                break
        else:
            scheme, url = url[:i].lower(), url[i + 1:]

    if url[:2] == "//":
        netloc, url = _splitnetloc(url, 2)
        if ("[" in netloc and "]" not in netloc) or ("]" in netloc
                                                     and "[" not in netloc):
            raise ValueError("Invalid IPv6 URL")
    if allow_fragments and "#" in url:
        url, fragment = url.split("#", 1)
    if "?" in url:
        url, query = url.split("?", 1)
    v = SplitResult(scheme, netloc, url, query, fragment)
    return _coerce_result(v)
 def __init__(self, arg1051, arg1000, arg682, arg2280, arg192, arg1897,
              arg551):
     self.attribute1287 = False
     if arg682:
         arg1000 += ':{}'.format(arg682)
     if arg551:
         arg1000 = (
             (yarl.quote(arg551, safe='@:', protected=':', strict=False) +
              '@') + arg1000)
     if arg2280:
         arg2280 = yarl.quote(arg2280,
                              safe='@:',
                              protected='/',
                              strict=False)
     if arg192:
         arg192 = yarl.quote(arg192,
                             safe='=+&?/:@',
                             protected=yarl.PROTECT_CHARS,
                             qs=True,
                             strict=False)
     if arg1897:
         arg1897 = yarl.quote(arg1897, safe='?/:@', strict=False)
     self.attribute914 = SplitResult((schema or ''),
                                     netloc=arg1000,
                                     path=arg2280,
                                     query=arg192,
                                     fragment=arg1897)
     self.attribute233 = {}
Esempio n. 22
0
def search_pagination_link_url(context, start):
    """
    This simple tag analyses the full path of the request (which caused that the template with this
    search_pagination_url tag was rendered). This tag returns the link of the search request with the given start
    parameter (the old one will be overridden, if the start parameter already exists). Except of the start parameter the
    returned link is the same as the search request url.

    This tag can be used to generate the links of the search pagination.

    :param context: the used context.
    :param start: the value, which the start parameter of the search request shall have.
    :return: the given url with the new given start parameter.
    """
    scheme, netloc, path, query, fragment = urlsplit(
        context['request'].get_full_path())
    url_params = parse_qs(query, strict_parsing=False)
    if 'search_phrase' in url_params:
        url_params['search_for'] = url_params['search_for'][0]
        url_params['search_phrase'] = url_params['search_phrase'][0]
        url_params['start'] = start
        query = urlencode(url_params)
        return urlunsplit(
            SplitResult(scheme=scheme,
                        netloc=netloc,
                        path=path,
                        query=query,
                        fragment=fragment))
    else:
        raise ValueError(
            'The pagination link can only be computed for search result requests !'
        )
Esempio n. 23
0
File: url.py Progetto: vit-001/fget3
    def add_query(self, pair_list):
        split = urlsplit(self.url)
        qs = parse_qs(split[3]).keys()
        qsl = parse_qsl(split[3])

        added = set()
        for (add_name, add_value) in pair_list:
            if add_name not in qs:
                added.add(add_name)

        new_qsl = list()
        for (name, value) in qsl:
            for (add_name, add_value) in pair_list:
                if add_name == name:
                    value = add_value
            new_qsl.append(tuple([name, value]))
        for (add_name, add_value) in pair_list:
            if add_name in added:
                new_qsl.append(tuple([add_name, add_value]))

        new_query = urlencode(new_qsl)
        self.url = urlunsplit(
            SplitResult(scheme=split[0],
                        netloc=split[1],
                        path=split[2],
                        query=new_query,
                        fragment=split[4]))
Esempio n. 24
0
def _urlsplit(url, scheme='', allow_fragments=True):
    """Parse a URL into 5 components:
    <scheme>://<netloc>/<path>?<query>#<fragment>
    Return a 5-tuple: (scheme, netloc, path, query, fragment).
    Note that we don't break the components up in smaller bits
    (e.g. netloc is a single string) and we don't expand % escapes."""
    url, scheme, _coerce_result = _coerce_args(url, scheme)
    netloc = query = fragment = ''
    i = url.find(':')
    if i > 0:
        for c in url[:i]:
            if c not in scheme_chars:
                break
        else:
            scheme, url = url[:i].lower(), url[i + 1:]

    if url[:2] == '//':
        netloc, url = _splitnetloc(url, 2)
        if (('[' in netloc and ']' not in netloc) or
                (']' in netloc and '[' not in netloc)):
            raise ValueError("Invalid IPv6 URL")
    if allow_fragments and '#' in url:
        url, fragment = url.split('#', 1)
    if '?' in url:
        url, query = url.split('?', 1)
    v = SplitResult(scheme, netloc, url, query, fragment)
    return _coerce_result(v)
Esempio n. 25
0
    def __new__(cls, val='', encoded=False):
        if isinstance(val, cls):
            return val

        self = object.__new__(cls)

        if isinstance(val, str):
            val = url_split(val)
        elif isinstance(val, SplitResult):
            if not encoded:
                raise ValueError('Cannot apply decoding to SplitResult')
        else:
            raise TypeError(
                f'Constructor parameter should be str, got {val.__class__.__name__}'
            )

        if not encoded:
            if not val[1]:
                netloc = ''
            else:
                netloc = val.hostname
                if netloc is None:
                    raise ValueError(
                        'Invalid URL: host is required for abolute urls.')
                try:
                    netloc.encode('ascii')
                except UnicodeEncodeError:
                    netloc = netloc.encode('idna').decode('ascii')
                else:
                    try:
                        ip = ip_address(netloc)
                    except:
                        pass
                    else:
                        if ip.version == 6:
                            netloc = f'[{netloc}]'
                val_port = val.port
                if val_port:
                    netloc = f'{netloc}:{val_port}'
                val_username = val.username
                if val_username:
                    user = quote(val_username)
                    val_password = val.password
                    if val_password:
                        user = f'{user}:{quote(val_password)}'
                    netloc = f'{user}@{netloc}'

            val = SplitResult(val[0],
                              netloc,
                              quote(val[2], safe='@:', protected='/'),
                              query=quote(val[3],
                                          safe='=+&?/:@',
                                          protected='=+&',
                                          query_string=True),
                              fragment=quote(val[4], safe='?/:@'))

        self._val = val
        self._cache = {}
        return self
Esempio n. 26
0
def report_error(request: HttpRequest, user_profile: UserProfile, message: str=REQ(),
                 stacktrace: str=REQ(), ui_message: bool=REQ(validator=check_bool),
                 user_agent: str=REQ(), href: str=REQ(), log: str=REQ(),
                 more_info: Mapping[str, Any]=REQ(validator=check_dict([]), default={}),
                 ) -> HttpResponse:
    """Accepts an error report and stores in a queue for processing.  The
    actual error reports are later handled by do_report_error"""
    if not settings.BROWSER_ERROR_REPORTING:
        return json_success()
    more_info = dict(more_info)

    js_source_map = get_js_source_map()
    if js_source_map:
        stacktrace = js_source_map.annotate_stacktrace(stacktrace)

    try:
        version: Optional[str] = subprocess.check_output(
            ["git", "show", "-s", "--oneline"],
            universal_newlines=True,
        )
    except (FileNotFoundError, subprocess.CalledProcessError):
        version = None

    # Get the IP address of the request
    remote_ip = request.META['REMOTE_ADDR']

    # For the privacy of our users, we remove any actual text content
    # in draft_content (from drafts rendering exceptions).  See the
    # comment on privacy_clean_markdown for more details.
    if more_info.get('draft_content'):
        more_info['draft_content'] = privacy_clean_markdown(more_info['draft_content'])

    if user_profile.is_authenticated:
        email = user_profile.delivery_email
        full_name = user_profile.full_name
    else:
        email = "*****@*****.**"
        full_name = "Anonymous User"

    queue_json_publish('error_reports', dict(
        type = "browser",
        report = dict(
            host = SplitResult("", request.get_host(), "", "", "").hostname,
            ip_address = remote_ip,
            user_email = email,
            user_full_name = full_name,
            user_visible = ui_message,
            server_path = settings.DEPLOY_ROOT,
            version = version,
            user_agent = user_agent,
            href = href,
            message = message,
            stacktrace = stacktrace,
            log = log,
            more_info = more_info,
        ),
    ))

    return json_success()
Esempio n. 27
0
    def __init__(self, val='', *, encoded=False, strict=None):
        if strict is not None:  # pragma: no cover
            warnings.warn("strict parameter is ignored")
        if isinstance(val, URL):
            self._val = val._val
            self._cache = val._cache
            return
        if isinstance(val, str):
            val = urlsplit(val)
        elif isinstance(val, SplitResult):
            if not encoded:
                raise ValueError("Cannot apply decoding to SplitResult")
        else:
            raise TypeError("Constructor parameter should be str")

        if not encoded:
            if not val[1]:  # netloc
                netloc = ''
            else:
                netloc = val.hostname
                if netloc is None:
                    raise ValueError(
                        "Invalid URL: host is required for abolute urls.")
                try:
                    netloc.encode('ascii')
                except UnicodeEncodeError:
                    netloc = netloc.encode('idna').decode('ascii')
                else:
                    try:
                        ip = ip_address(netloc)
                    except ValueError:
                        pass
                    else:
                        if ip.version == 6:
                            netloc = '[' + netloc + ']'
                if val.port:
                    netloc += ':{}'.format(val.port)
                if val.username:
                    user = _quote(val.username)
                else:
                    user = ''
                if val.password:
                    user += ':' + _quote(val.password)
                if user:
                    netloc = user + '@' + netloc

            path = _quote(val[2], safe='+@:', protected='/+')
            if netloc:
                path = _normalize_path(path)

            query = _quote(val[3],
                           safe='=+&?/:@',
                           protected=PROTECT_CHARS,
                           qs=True)
            fragment = _quote(val[4], safe='?/:@')
            val = SplitResult(val[0], netloc, path, query, fragment)

        self._val = val
        self._cache = {}
Esempio n. 28
0
def process_url_query(text):
    """url参数过滤:处理url参数不同但页面相同的情况"""
    from urllib.parse import urlsplit, urlunsplit, SplitResult, parse_qs, urlencode

    res = urlsplit(text)
    query = parse_qs(res.query)
    query = {k: v[0] for k, v in query.items() if k not in ["p", "ret"]}
    return urlunsplit(SplitResult(*res[:3], urlencode(query), *res[4:]))
Esempio n. 29
0
def origin(url):
    """
    Return a copy of ``url`` with the path, query string and fragment removed.

    ``url`` is assumed to be an HTTP(S) URL.
    """
    url_parts = urlsplit(url)
    return SplitResult(url_parts.scheme, url_parts.netloc, "", "", "").geturl()
Esempio n. 30
0
def get_ui_url(path_extention):
    split = urlsplit(conf.app.url)
    split = SplitResult(scheme=split.scheme,
                        netloc=split.netloc,
                        path=split.path + path_extention,
                        query=split.query,
                        fragment=split.fragment)
    return urlunsplit(split)
Esempio n. 31
0
 def url(self):
     protocol = self.get('HTTP_X_FORWARDED_PROTO') or self.get('wsgi.url_scheme', 'http')
     host = self.get('HTTP_X_FORWARDED_HOST') or self.get('HTTP_HOST')
     query_params = self.get("QUERY_STRING")
     url_split_result = SplitResult(protocol, host, self.path, query_params, '')
     return url_split_result.geturl()