Esempio n. 1
0
def urlsplit(url: str,
             scheme: str = '',
             allow_fragments: bool = True) -> urllib_parse.SplitResult:
    """Wrapper function for :func:`urllib.parse.urlsplit`.

    Args:
        url: URL to be split
        scheme: URL scheme
        allow_fragments: if allow fragments

    Returns:
        The split result.

    Note:
        The function suppressed possible errors when calling
        :func:`urllib.parse.urlsplit`. If any, it will return
        ``urllib.parse.SplitResult(scheme=scheme, netloc='', path=url, params='', query='', fragment='')``
        directly.

    """
    with contextlib.suppress(ValueError):
        return urllib_parse.urlsplit(url,
                                     scheme,
                                     allow_fragments=allow_fragments)
    return urllib_parse.SplitResult(scheme=scheme,
                                    netloc='',
                                    path=url,
                                    query='',
                                    fragment='')
Esempio n. 2
0
    def _decode_url(self, url, path):
        # Make sure urlsplit() doesn't choke on scheme-less URLs, like 'localhost:9200'
        if '//' not in url:
            url = '//' + url

        url = urlparse.urlsplit(url)
        if not url.netloc:
            raise ValueError('Could not parse the given URL.')

        # If the scheme isn't explicitly provided by now, try to deduce it
        # from the port number
        scheme = url.scheme
        if not scheme:
            if 9500 <= url.port <= 9600:
                scheme = 'thrift'
            else:
                scheme = 'http'

        # Use path if provided
        if not path:
            path = url.path

        # Set default ports
        netloc = url.netloc
        if not url.port:
            if url.scheme == 'http':
                netloc = "{0}:{1}".format(netloc, 9200)
            elif url.scheme == 'https':
                netloc = "{0}:{1}".format(netloc, 443)
            elif url.scheme == 'thrift':
                netloc = "{0}:{1}".format(netloc, 9500)

        # Return new url. 
        return urlparse.SplitResult(scheme=scheme, netloc=netloc, path=path, query='', fragment='')
Esempio n. 3
0
 def _get_encoded_url(self) -> str:
     """Convert any UTF-8 char in :obj:`File.file_path` into a url encoded ASCII string."""
     sres = urllib_parse.urlsplit(self.file_path)
     return urllib_parse.urlunsplit(
         urllib_parse.SplitResult(sres.scheme, sres.netloc,
                                  urllib_parse.quote(sres.path), sres.query,
                                  sres.fragment))
Esempio n. 4
0
    def normalize(self, url):
        """Normalize a URL.

        Returns (domain, normpath), where both the domain and the path are
        normalized strings. The path includes a query string if one was
        provided in the source URL. Any URL fragment is discarded.

        """
        info = parse.urlsplit(url)

        dname = self._domain_aliases.get(info.netloc)
        if dname is None:
            raise Exception(f'illegal domain name {info.netloc!r} for URL {url!r}')

        # The main WWT web server, being IIS, is case-insensitive in its URL
        # paths. We define the downcased path as the normal form. We do not
        # currently normalize the query parts of the URL, which *might* be
        # case-insensitive depending on how a given API is implemented.
        normpath = info.path
        domain = self._get_domain(dname)

        if not domain.has_case_sensitive_paths():
            normpath = normpath.lower()

        # Note that we discard the fragment (for now?).
        normpath = parse.SplitResult('', '', normpath, info.query, '')
        normpath = normpath.geturl()
        normpath = url_normalize(normpath)

        return (dname, normpath)
Esempio n. 5
0
 def extract_path(url):
     '''Extracts path (with parameters) from given url, if url is already a
     path (starts with /) it's rerurned without modifications. In case url
     is empty or only contains domain without trailing slash, returns a
     single slash.'''
     # This method was actually contributed by Wiktor Bachnik (wbachnik)
     # but because I forgot to rebase before my branch, this is going to
     # appear in a different commit :-/
     if len(url) == 0:
         # empty, assume /
         path = '/'
     elif url[0] == '/':
         # url is already a path
         path = url
     else:
         # url is a proper url scheme://host/...
         parts = urlparse.urlsplit(url)
         needed_parts = urlparse.SplitResult(scheme='',
                                             netloc='',
                                             path=parts.path,
                                             query=parts.query,
                                             fragment='')
         path = needed_parts.geturl()
         if len(path) == 0:
             # case for http://example.com
             path = '/'
         # If there was a question mark in the url, but no query string
         # then we must still preserve the question mark.
         if (not parts.query) and ('?' in url):
             path = path + '?'
     return path
Esempio n. 6
0
def path2url(path):
    sr = parse.SplitResult(scheme='file',
                           netloc='',
                           path=path.replace("\\", "/"),
                           query='',
                           fragment='')
    sr = parse.urlunsplit(sr)
    return sr
Esempio n. 7
0
def make_url(scheme: typing.Optional[str] = None,
             netloc: typing.Optional[str] = None,
             path: typing.Optional[str] = None) -> str:
    return parse.SplitResult(scheme=(scheme or 'http').lower(),
                             netloc=netloc or '',
                             path=path or '',
                             query='',
                             fragment='').geturl()
Esempio n. 8
0
 def test_svn_plus_http(self):
     self.assertEquals(
         urlparse.SplitResult(scheme='svn+http',
                              netloc='*****@*****.**',
                              path='/svn/trunk',
                              query='',
                              fragment=''),
         urlparse.urlsplit('svn+http://[email protected]/svn/trunk'))
Esempio n. 9
0
 def _scrub_action_url(action):
     """Remove trust ID from a URL."""
     url = netutils.urlsplit(action)
     if Alarm._is_trust_url(url):
         netloc = url.netloc.rsplit('@', 1)[-1]
         url = urlparse.SplitResult(url.scheme, netloc, url.path, url.query,
                                    url.fragment)
     return url.geturl()
 def test_parse_endpoint(self):
     endpoint = 'http://example.com:9292'
     test_client = http.HTTPClient(endpoint, token=u'adc123')
     actual = test_client.parse_endpoint(endpoint)
     expected = parse.SplitResult(scheme='http',
                                  netloc='example.com:9292', path='',
                                  query='', fragment='')
     self.assertEqual(expected, actual)
Esempio n. 11
0
 def canonical_url(self, url):
     url_split = parse.urlsplit(url)
     without_query = parse.SplitResult(url_split.scheme, url_split.netloc,
                                       url_split.path, '',
                                       url_split.fragment).geturl()
     switcher = {
         "archive.ph": lambda: self.follow_archive_link(url),
         "www.tiktok.com": lambda: without_query
     }
     return switcher.get(url_split.hostname, lambda: url)()
Esempio n. 12
0
 def wp_mock(url: str, request: requests.PreparedRequest):
     assert url == parse.SplitResult(scheme='https', netloc='example.org',
                                     path='/wp-json/mt-wp-photo-analysis/v1/text/3001', query='', fragment='')
     assert request.method == 'PUT'
     assert request.headers['Authorization'] == 'Bearer asd561'
     assert request.headers['Content-Type'] == 'application/json'
     assert request.body == b'{"textAnnotations": "some Text Message (e)"}'
     return {
         'status_code': 200,
         'content': 'OK'
     }
Esempio n. 13
0
def sanitize_url(url, mask_url_query=True, mask_url_path=False):
    parsed = parse.urlsplit(url)

    # masking - may be give some hints in masking query and path instead of '?' ??
    host = '{}:{}'.format(parsed.hostname, parsed.port) if parsed.port else parsed.hostname
    query = str(parse.urlencode({k: '?' for k in parse.parse_qs(parsed.query).keys()})) if \
        mask_url_query else parsed.query
    path = '/??/' if parsed.path and mask_url_path else parsed.path

    components = parse.SplitResult(parsed.scheme, host, path, query, parsed.fragment)

    return parse.urlunsplit(components)
Esempio n. 14
0
def merge_url_params(redir_url):
    """
    Merge the URL params of our incoming URL with the ones in our saved URL
    If there is a conflict, the ones in our saved URL always win.
    """
    redir_parsed = parse.urlsplit(redir_url)
    file_redir_params = parse.parse_qs(redir_parsed.query)
    file_redir_params = {k: ' '.join(v) for k, v in file_redir_params.items()}
    merged_params = {**request.args, **file_redir_params}
    query_string = parse.urlencode(merged_params)
    dest_split = parse.SplitResult(*redir_parsed[0:3], query_string, '')
    return dest_split.geturl()
Esempio n. 15
0
def expand_uri(uri, subpath):
    if uri.scheme == "gh":
        # https://github.com/fholmer/make/archive/master.zip
        path_parts = uri.path.split("/")
        master = "{0[1]}-master".format(path_parts)

        if not subpath:
            subpath = master
            if len(path_parts) > 2:
                subpath = "/".join([subpath] + path_parts[2:])

        uri = parse.SplitResult("https", "github.com",
                                "{}/archive/master.zip".format(uri.path), "",
                                "")

    elif uri.scheme == "gl":
        # https://gitlab.com/fholmer/make/-/archive/master/make-master.zip
        # https://gitlab.com/fholmer/make/-/archive/master/make-master.zip?path=tests%2Fmake%2Fmake_project
        # make-master-tests-make-make_project
        path_parts = uri.path.split("/")
        master = "{0[1]}-master".format(path_parts)
        if subpath:
            qs = parse.urlencode({"path": subpath})
        elif len(path_parts) > 2:
            qs = parse.urlencode({"path": "/".join(path_parts[2:])})
            subpath_root = "-".join([master] + path_parts[2:])
            subpath = "/".join([subpath_root] + path_parts[2:])
        else:
            qs = ""
            subpath = master
        uri = parse.SplitResult(
            "https",
            "gitlab.com",
            "/{0[0]}/{0[1]}/-/archive/master/{1}.zip".format(
                path_parts, master),
            qs,
            "",
        )
    return uri, subpath
Esempio n. 16
0
    def url(self, value: parse.SplitResult):
        final_path = value.path
        if not value.hostname:
            final_path = os.path.abspath(final_path)

        if not final_path.endswith(os.path.sep):
            final_path += os.path.sep

        if final_path != value.path:
            value = parse.SplitResult(scheme=value.scheme,
                                      netloc=value.netloc,
                                      path=final_path,
                                      query=value.query,
                                      fragment=None)
        self.__url = value
Esempio n. 17
0
    def _get_base_url(self, url):
        """ Prepare the host url to get the root folder /."""
        parsed_url = URLParse.urlparse(url)
        path = parsed_url.path if is_url(parsed_url) else ""
        netloc = parsed_url.netloc if is_url(parsed_url) else url

        url_params = {
            "scheme": "http",
            "netloc": netloc,
            "path": path,
            "query": "",
            "fragment": "",
        }

        self.base_url = URLParse.SplitResult(**url_params)
Esempio n. 18
0
    def update_actions(self, old_alarm=None):
        trustor_user_id = pecan.request.headers.get('X-User-Id')
        trustor_project_id = pecan.request.headers.get('X-Project-Id')
        roles = pecan.request.headers.get('X-Roles', '')
        if roles:
            roles = roles.split(',')
        else:
            roles = []
        auth_plugin = pecan.request.environ.get('keystone.token_auth')

        if old_alarm:
            prev_trust_ids = set(old_alarm._get_existing_trust_ids())
        else:
            prev_trust_ids = set()
        trust_id = prev_trust_ids.pop() if prev_trust_ids else None
        trust_id_used = False

        for actions in (self.ok_actions, self.alarm_actions,
                        self.insufficient_data_actions):
            if actions is not None:
                for index, action in enumerate(actions[:]):
                    url = netutils.urlsplit(action)
                    if self._is_trust_url(url):
                        if '@' in url.netloc:
                            errmsg = _("trust URL cannot contain a trust ID.")
                            raise base.ClientSideError(errmsg)
                        if trust_id is None:
                            # We have a trust action without a trust ID,
                            # create it
                            trust_id = keystone_client.create_trust_id(
                                pecan.request.cfg, trustor_user_id,
                                trustor_project_id, roles, auth_plugin)
                        if trust_id_used:
                            pw = ''
                        else:
                            pw = ':delete'
                            trust_id_used = True
                        netloc = '%s%s@%s' % (trust_id, pw, url.netloc)
                        url = urlparse.SplitResult(url.scheme, netloc,
                                                   url.path, url.query,
                                                   url.fragment)
                        actions[index] = url.geturl()
        if trust_id is not None and not trust_id_used:
            prev_trust_ids.add(trust_id)
        for old_trust_id in prev_trust_ids:
            keystone_client.delete_trust_id(pecan.request.cfg, old_trust_id,
                                            auth_plugin)
Esempio n. 19
0
    def notify(self, action, alarm_id, alarm_name, severity, previous, current,
               reason, reason_data):
        trust_id = action.username

        client = keystone_client.get_trusted_client(self.conf, trust_id)

        # Remove the fake user
        netloc = action.netloc.split("@")[1]
        # Remove the trust prefix
        scheme = action.scheme[6:]

        action = parse.SplitResult(scheme, netloc, action.path, action.query,
                                   action.fragment)

        headers = {'X-Auth-Token': keystone_client.get_auth_token(client)}
        super(TrustAlarmNotifierMixin,
              self).notify(action, alarm_id, alarm_name, severity, previous,
                           current, reason, reason_data, headers)
Esempio n. 20
0
def open(url):
    """Parse a storage url, then locate and initialize a backend for it."""
    parsed_url = urlparse.urlsplit(url)

    # If there is no scheme, fall back to treating the string as local path and
    # construct a file:/// URL.
    if not parsed_url.scheme:
        parsed_url = urlparse.SplitResult("file", "", quote(url), "", "")

    try:
        # TODO: Support a registry for schemes that don't map to a module.
        if re.match(r"^\w+$", parsed_url.scheme):
            handler = importlib.import_module("cumulus.store.%s" %
                                              parsed_url.scheme)
            obj = handler.Store(parsed_url)
            return obj
    except ImportError:
        # Fall through to error below
        pass

    raise NotImplementedError("Scheme %s not implemented" % scheme)
Esempio n. 21
0
    def add_query(self, pair_list):
        split = up.urlsplit(self.url)
        qs = up.parse_qs(split[3]).keys()
        qsl = up.parse_qsl(split[3])

        added = set()
        for (add_name, add_value) in pair_list:
            if add_name not in qs:
                added.add(add_name)

        new_qsl = list()
        for (name, value) in qsl:
            for (add_name, add_value) in pair_list:
                if add_name == name:
                    value = add_value
            new_qsl.append(tuple([name, value]))
        for (add_name, add_value) in pair_list:
            if add_name in added:
                new_qsl.append(tuple([add_name, add_value]))

        new_query = up.urlencode(new_qsl)
        self.url = up.urlunsplit(
            up.SplitResult(scheme=split[0], netloc=split[1], path=split[2], query=new_query, fragment=split[4]))
Esempio n. 22
0
    def read_configuration(self) -> 'JobLocation':
        """
        Read configuration file from container subvolume
        :return: Corresponding location
        """
        # Read configuration file
        out = self.exec_check_output('cat "%s"' % self.configuration_filename)
        file = out.decode().splitlines()

        corresponding_location = None

        parser = ConfigParser()
        parser.read_file(file)

        section = parser.sections()[0]

        # Section name implies location type
        if section == JobLocation.TYPE_SOURCE:
            location_type = JobLocation.TYPE_SOURCE
        elif section == JobLocation.TYPE_DESTINATION:
            location_type = JobLocation.TYPE_DESTINATION
        else:
            raise ValueError('invalid section name/location type [%s]' % section)

        # Parse config string values
        location_uuid = parser.get(section, self.__KEY_UUID, fallback=None)
        source = parser.get(section, self.__KEY_SOURCE, fallback=None)
        source_container = parser.get(section, self.__KEY_SOURCE_CONTAINER, fallback=None)
        destination = parser.get(section, self.__KEY_DESTINATION, fallback=None)
        # Keep has been renamed to retention.
        # Supporting the old name for backward compatibility.
        retention = parser.get(section, self.__KEY_RETENTION, fallback=None)
        if not retention:
            retention = parser.get(section, self.__KEY_KEEP, fallback=None)

        # Convert to instances where applicable
        location_uuid = UUID(location_uuid) if location_uuid else None
        source = parse.urlsplit(source) if source else None
        source_container = source_container if source_container else None
        destination = parse.urlsplit(destination) if destination else None
        retention = RetentionExpression(retention) if retention else None
        compress = True if distutils.util.strtobool(parser.get(section, self.__KEY_COMPRESS, fallback='False')) \
            else False

        if location_type == JobLocation.TYPE_SOURCE:
            # Amend url/container relpath from current path for source locations
            # if container relative path was not provided
            if not self.container_subvolume_relpath:
                source_container = os.path.basename(self.container_subvolume_path.rstrip(os.path.sep))
                source = parse.SplitResult(scheme=self.url.scheme,
                                           netloc=self.url.netloc,
                                           path=os.path.abspath(os.path.join(self.url.path, os.path.pardir)),
                                           query=self.url.query,
                                           fragment=None)

                self.url = source
                self.container_subvolume_relpath = source_container

            if destination:
                corresponding_location = JobLocation(destination,
                                                     location_type=JobLocation.TYPE_DESTINATION)

        elif location_type == JobLocation.TYPE_DESTINATION:
            if source:
                corresponding_location = JobLocation(source,
                                                     location_type=JobLocation.TYPE_SOURCE,
                                                     container_subvolume_relpath=source_container)

        self.location_type = location_type
        self.uuid = location_uuid
        self.retention = retention
        self.compress = compress

        return corresponding_location
Esempio n. 23
0
File: c.py Progetto: 89z/autumn
from urllib import parse

u = parse.SplitResult('http',
                      'docs.python.org',
                      '/library',
                      'west=left',
                      fragment='')

s = parse.urlunsplit(u)
print(s)
def make_abs_url(url):
    pr = parse.urlsplit(url)
    return parse.urlunsplit(parse.SplitResult(pr.scheme, pr.netloc, path.abspath(pr.path), '',''))
Esempio n. 25
0
 def to_uri(self):
     scheme, key, query = self._get_uri_scheme_path_and_query()
     uri = parse.SplitResult(scheme, "", key,
                             parse.urlencode(query).replace("&", ";"), "")
     return uri.geturl()