Ejemplo n.º 1
0
    def get_url(self):
        """Return the base URL of the Foreman deployment being tested.

        The following values from the config file are used to build the URL:

        * ``[server] scheme`` (default: https)
        * ``[server] hostname`` (required)
        * ``[server] port`` (default: none)

        Setting ``port`` to 80 does *not* imply that ``scheme`` is 'https'. If
        ``port`` is 80 and ``scheme`` is unset, ``scheme`` will still default
        to 'https'.

        :return: A URL.
        :rtype: str

        """
        if not self.scheme:
            scheme = "https"
        else:
            scheme = self.scheme
        # All anticipated error cases have been handled at this point.
        if not self.port:
            return urlunsplit((scheme, self.hostname, "", "", ""))
        else:
            return urlunsplit((scheme, "{0}:{1}".format(self.hostname, self.port), "", "", ""))
Ejemplo n.º 2
0
    def __init__(self, address, name=""):
        """Bind the publisher class to a port.
        """
        # pylint: disable=E1103
        self.name = name
        self.destination = address
        self.publish = get_context().socket(zmq.PUB)

        # Check for port 0 (random port)
        u__ = urlsplit(self.destination)
        port = u__.port

        if port == 0:
            dest = urlunsplit((u__.scheme, u__.hostname,
                               u__.path, u__.query, u__.fragment))
            self.port_number = self.publish.bind_to_random_port(dest)
            netloc = u__.hostname + ":" + str(self.port_number)
            self.destination = urlunsplit((u__.scheme, netloc, u__.path,
                                           u__.query, u__.fragment))
        else:
            self.publish.bind(self.destination)
            self.port_number = port

        LOGGER.info("publisher started on port %s", str(self.port_number))

        # Initialize no heartbeat
        self._heartbeat = None
        self._pub_lock = Lock()
Ejemplo n.º 3
0
Archivo: dap.py Proyecto: pydap/pydap
    def __init__(self, url, application=None, session=None, output_grid=True):
        # download DDS/DAS
        scheme, netloc, path, query, fragment = urlsplit(url)

        ddsurl = urlunsplit((scheme, netloc, path + ".dds", query, fragment))
        r = GET(ddsurl, application, session)
        raise_for_status(r)
        dds = r.text

        dasurl = urlunsplit((scheme, netloc, path + ".das", query, fragment))
        r = GET(dasurl, application, session)
        raise_for_status(r)
        das = r.text

        # build the dataset from the DDS and add attributes from the DAS
        self.dataset = build_dataset(dds)
        add_attributes(self.dataset, parse_das(das))

        # remove any projection from the url, leaving selections
        projection, selection = parse_ce(query)
        url = urlunsplit((scheme, netloc, path, "&".join(selection), fragment))

        # now add data proxies
        for var in walk(self.dataset, BaseType):
            var.data = BaseProxy(url, var.id, var.dtype, var.shape, application=application, session=session)
        for var in walk(self.dataset, SequenceType):
            template = copy.copy(var)
            var.data = SequenceProxy(url, template, application=application, session=session)

        # apply projections
        for var in projection:
            target = self.dataset
            while var:
                token, index = var.pop(0)
                target = target[token]
                if isinstance(target, BaseType):
                    target.data.slice = fix_slice(index, target.shape)
                elif isinstance(target, GridType):
                    index = fix_slice(index, target.array.shape)
                    target.array.data.slice = index
                    for s, child in zip(index, target.maps):
                        target[child].data.slice = (s,)
                elif isinstance(target, SequenceType):
                    target.data.slice = index

        # retrieve only main variable for grid types:
        for var in walk(self.dataset, GridType):
            var.set_output_grid(output_grid)
Ejemplo n.º 4
0
def remove_version_from_href(href):
    """Removes the first api version from the href.

    Given: 'http://www.nova.com/v1.1/123'
    Returns: 'http://www.nova.com/123'

    Given: 'http://www.nova.com/v1.1'
    Returns: 'http://www.nova.com'

    """
    parsed_url = urlparse.urlsplit(href)
    url_parts = parsed_url.path.split('/', 2)

    # NOTE: this should match vX.X or vX
    expression = re.compile(r'^v([0-9]+|[0-9]+\.[0-9]+)(/.*|$)')
    if expression.match(url_parts[1]):
        del url_parts[1]

    new_path = '/'.join(url_parts)

    if new_path == parsed_url.path:
        LOG.debug('href %s does not contain version' % href)
        raise ValueError(_('href %s does not contain version') % href)

    parsed_url = list(parsed_url)
    parsed_url[2] = new_path
    return urlparse.urlunsplit(parsed_url)
Ejemplo n.º 5
0
def _get_safe_url(url):
    """Gets version of *url* with basic auth passwords obscured. This function
    returns results suitable for printing and logging.

    E.g.: https://user:[email protected] => https://user:********@example.com

    .. note::

       The number of astrisks is invariant in the length of the basic auth
       password, so minimal information is leaked.

    :param url: a url
    :type url: ``str``

    :return: *url* with password obscured
    :rtype: ``str``
    """
    safe_url = url
    url, username, _ = _strip_basic_auth(url)
    if username is not None:
        # case: url contained basic auth creds; obscure password
        url_parts = parse.urlsplit(url)
        safe_netloc = '{0}@{1}'.format(username, url_parts.hostname)
        # replace original netloc w/ obscured version
        frags = list(url_parts)
        frags[1] = safe_netloc
        safe_url = parse.urlunsplit(frags)

    return safe_url
Ejemplo n.º 6
0
    def get_authorization_url(self, redirect_url):
        """
        Get the authorization url based on the client id and the redirect url passed in

        :param redirect_url:
            An HTTPS URI or custom URL scheme where the response will be redirected. Optional if the redirect URI is
            registered with Box already.
        :type redirect_url:
            `unicode` or None
        :return:
            A tuple of the URL of Box's authorization page and the CSRF token.
            This is the URL that your application should forward the user to in first leg of OAuth 2.
        :rtype:
            (`unicode`, `unicode`)
        """
        csrf_token = self._get_state_csrf_token()
        # For the query string parameters, use a sequence of two-element
        # tuples, rather than a dictionary, in order to get a consistent and
        # predictable order of parameters in the output of `urlencode()`.
        params = [
            ('state', csrf_token),
            ('response_type', 'code'),
            ('client_id', self._client_id),
        ]
        if redirect_url:
            params.append(('redirect_uri', redirect_url))
        # `urlencode()` doesn't work with non-ASCII unicode characters, so
        # encode the parameters as ASCII bytes.
        params = [(key.encode('utf-8'), value.encode('utf-8')) for (key, value) in params]
        query_string = urlencode(params)
        return urlunsplit(('', '', API.OAUTH2_AUTHORIZE_URL, query_string, '')), csrf_token
Ejemplo n.º 7
0
    def _proxy_request(self, instance_id, tenant_id, req):
        headers = {
            'X-Forwarded-For': req.headers.get('X-Forwarded-For'),
            'X-Instance-ID': instance_id,
            'X-Tenant-ID': tenant_id,
            'X-Instance-ID-Signature': self._sign_instance_id(instance_id)
        }

        nova_host_port = '%s:%s' % (self.conf.nova_metadata_host,
                                    self.conf.nova_metadata_port)
        url = urlparse.urlunsplit((
            self.conf.nova_metadata_protocol,
            nova_host_port,
            req.path_info,
            req.query_string,
            ''))

        disable_ssl_certificate_validation = self.conf.nova_metadata_insecure
        if self.conf.auth_ca_cert and not disable_ssl_certificate_validation:
            verify_cert = self.conf.auth_ca_cert
        else:
            verify_cert = not disable_ssl_certificate_validation

        client_cert = None
        if self.conf.nova_client_cert and self.conf.nova_client_priv_key:
            client_cert = (self.conf.nova_client_cert,
                           self.conf.nova_client_priv_key)

        resp = requests.request(method=req.method, url=url,
                                headers=headers,
                                data=req.body,
                                cert=client_cert,
                                verify=verify_cert)

        if resp.status_code == 200:
            req.response.content_type = resp.headers['content-type']
            req.response.body = resp.content
            LOG.debug(str(resp))
            return req.response
        elif resp.status_code == 403:
            LOG.warning(
                'The remote metadata server responded with Forbidden. This '
                'response usually occurs when shared secrets do not match.'
            )
            return webob.exc.HTTPForbidden()
        elif resp.status_code == 400:
            return webob.exc.HTTPBadRequest()
        elif resp.status_code == 404:
            return webob.exc.HTTPNotFound()
        elif resp.status_code == 409:
            return webob.exc.HTTPConflict()
        elif resp.status_code == 500:
            msg = _(
                'Remote metadata server experienced an internal server error.'
            )
            LOG.warning(msg)
            explanation = six.text_type(msg)
            return webob.exc.HTTPInternalServerError(explanation=explanation)
        else:
            raise Exception(_('Unexpected response code: %s') % resp.status)
Ejemplo n.º 8
0
    def set_url(self, url):

        path = url.lstrip('/')

        if url.startswith("http://") or url.startswith("https://"):
            u = urlparse.urlsplit(url)
            if u.username is not None:
                password = u.password or ""
                encode = base64.b64encode("%s:%s" % (u.username, password))
                self.headers['Authorization'] = 'Basic %s' %  encode

            self.scheme = u.scheme,
            self.host = u.netloc.split("@")[-1]
            self.path_info = u.path or "/"
            self.query_string = u.query
            url = urlparse.urlunsplit((u.scheme, u.netloc.split("@")[-1],
                u.path, u.query, u.fragment))
        else:

            if '?' in path:
                path, self.query_string = path.split('?', 1)
            self.path_info = '/' + path


            url = self.url
        self.scheme, self.host, self.path_info = urlparse.urlparse(url)[0:3]
Ejemplo n.º 9
0
def download_attachments(output_path, urls):
    """Downloads WordPress attachments and returns a list of paths to
    attachments that can be associated with a post (relative path to output
    directory). Files that fail to download, will not be added to posts"""
    locations = {}
    for url in urls:
        path = urlparse(url).path
        # teardown path and rebuild to negate any errors with
        # os.path.join and leading /'s
        path = path.split('/')
        filename = path.pop(-1)
        localpath = ''
        for item in path:
            if sys.platform != 'win32' or ':' not in item:
                localpath = os.path.join(localpath, item)
        full_path = os.path.join(output_path, localpath)

        # Generate percent-encoded URL
        scheme, netloc, path, query, fragment = urlsplit(url)
        path = quote(path)
        url = urlunsplit((scheme, netloc, path, query, fragment))

        if not os.path.exists(full_path):
            os.makedirs(full_path)
        print('downloading {}'.format(filename))
        try:
            urlretrieve(url, os.path.join(full_path, filename))
            locations[url] = os.path.join(localpath, filename)
        except (URLError, IOError) as e:
            # Python 2.7 throws an IOError rather Than URLError
            logger.warning("No file could be downloaded from %s\n%s", url, e)
    return locations
Ejemplo n.º 10
0
    def authenticate(self):
        magic_tuple = netutils.urlsplit(self.auth_url)
        scheme, netloc, path, query, frag = magic_tuple
        port = magic_tuple.port
        if port is None:
            port = 80
        path_parts = path.split('/')
        for part in path_parts:
            if len(part) > 0 and part[0] == 'v':
                self.version = part
                break

        if self.auth_token and self.management_url:
            self._save_keys()
            return

        # TODO(sandy): Assume admin endpoint is 35357 for now.
        # Ideally this is going to have to be provided by the service catalog.
        new_netloc = netloc.replace(':%d' % port, ':%d' % (35357,))
        admin_url = parse.urlunsplit(
            (scheme, new_netloc, path, query, frag))

        auth_url = self.auth_url
        if self.version == "v2.0":  # FIXME(chris): This should be better.
            while auth_url:
                if not self.auth_system or self.auth_system == 'keystone':
                    auth_url = self._v2_auth(auth_url)
                else:
                    auth_url = self._plugin_auth(auth_url)

            # Are we acting on behalf of another user via an
            # existing token? If so, our actual endpoints may
            # be different than that of the admin token.
            if self.proxy_token:
                if self.bypass_url:
                    self.set_management_url(self.bypass_url)
                else:
                    self._fetch_endpoints_from_auth(admin_url)
                # Since keystone no longer returns the user token
                # with the endpoints any more, we need to replace
                # our service account token with the user token.
                self.auth_token = self.proxy_token
        else:
            try:
                while auth_url:
                    auth_url = self._v1_auth(auth_url)
            # In some configurations nova makes redirection to
            # v2.0 keystone endpoint. Also, new location does not contain
            # real endpoint, only hostname and port.
            except exceptions.AuthorizationFailure:
                if auth_url.find('v2.0') < 0:
                    auth_url = auth_url + '/v2.0'
                self._v2_auth(auth_url)

        if self.bypass_url:
            self.set_management_url(self.bypass_url)
        elif not self.management_url:
            raise exceptions.Unauthorized('Nova Client')

        self._save_keys()
Ejemplo n.º 11
0
 def update_actions(self, old_alarm=None):
     trustor_user_id = pecan.request.headers.get('X-User-Id')
     trustor_project_id = pecan.request.headers.get('X-Project-Id')
     roles = pecan.request.headers.get('X-Roles', '')
     if roles:
         roles = roles.split(',')
     else:
         roles = []
     auth_plugin = pecan.request.environ.get('keystone.token_auth')
     for actions in (self.ok_actions, self.alarm_actions,
                     self.insufficient_data_actions):
         if actions is not None:
             for index, action in enumerate(actions[:]):
                 url = netutils.urlsplit(action)
                 if self._is_trust_url(url):
                     if '@' not in url.netloc:
                         # We have a trust action without a trust ID,
                         # create it
                         trust_id = keystone_client.create_trust_id(
                             trustor_user_id, trustor_project_id, roles,
                             auth_plugin)
                         netloc = '%s:delete@%s' % (trust_id, url.netloc)
                         url = list(url)
                         url[1] = netloc
                         actions[index] = urlparse.urlunsplit(url)
     if old_alarm:
         for key in ('ok_actions', 'alarm_actions',
                     'insufficient_data_actions'):
             for action in getattr(old_alarm, key):
                 url = netutils.urlsplit(action)
                 if (self._is_trust_url(url) and url.password and
                         action not in getattr(self, key)):
                     keystone_client.delete_trust_id(
                         url.username, auth_plugin)
Ejemplo n.º 12
0
 def update_actions(self, old_alarm=None):
     trustor_user_id = pecan.request.headers.get("X-User-Id")
     trustor_project_id = pecan.request.headers.get("X-Project-Id")
     roles = pecan.request.headers.get("X-Roles", "")
     if roles:
         roles = roles.split(",")
     else:
         roles = []
     auth_plugin = pecan.request.environ.get("keystone.token_auth")
     for actions in (self.ok_actions, self.alarm_actions, self.insufficient_data_actions):
         if actions is not None:
             for index, action in enumerate(actions[:]):
                 url = netutils.urlsplit(action)
                 if self._is_trust_url(url):
                     if "@" not in url.netloc:
                         # We have a trust action without a trust ID,
                         # create it
                         trust_id = keystone_client.create_trust_id(
                             pecan.request.cfg, trustor_user_id, trustor_project_id, roles, auth_plugin
                         )
                         netloc = "%s:delete@%s" % (trust_id, url.netloc)
                         url = list(url)
                         url[1] = netloc
                         actions[index] = urlparse.urlunsplit(url)
     if old_alarm:
         new_actions = list(
             itertools.chain(self.ok_actions or [], self.alarm_actions or [], self.insufficient_data_actions or [])
         )
         for action in itertools.chain(
             old_alarm.ok_actions or [], old_alarm.alarm_actions or [], old_alarm.insufficient_data_actions or []
         ):
             if action not in new_actions:
                 self.delete_trust(action)
Ejemplo n.º 13
0
 def __init__(self, base, relative=None):
     self._has_token = False
     self._url = None
     self._url_parts = None
     self._loaded = False
     self._xml = None
     self._url_parts = None
     self._headers = None
     self._config = None
     if isinstance(base, six.string_types):
         base_url = base
         self._url_parts = list(parse.urlsplit(base_url))
     elif isinstance(base, RequestBase):
         base_url = base.url
         self._has_token = base.has_token
         self._url_parts = base._url_parts[:]
         self._headers = base._headers
         self._config = base.config
     if relative:
         scheme, netloc, path, qs, fragment = parse.urlsplit(relative)
         if path:
             self._url_parts[2] = _join_plex(self._url_parts[2], path)
         if qs:
             data = parse.parse_qsl(self._url_parts[3]) + parse.parse_qsl(qs)
             self._url_parts[3] = parse.urlencode(data)
         else:
             # Strip of all non-token parts
             data = parse.parse_qsl(self._url_parts[3])
             self._url_parts[3] = parse.urlencode([(x, y) for x, y in data if x == 'X-Plex-Token'])
     if not self._has_token:
         self._has_token = 'X-Plex-Token' in parse.parse_qs(self._url_parts[3])
     self._url = parse.urlunsplit(self._url_parts)
Ejemplo n.º 14
0
def _strip_basic_auth(url):
    """Returns *url* with basic auth credentials removed. Also returns the
    basic auth username and password if they're present in *url*.

    E.g.: https://user:[email protected] => https://example.com

    *url* need not include basic auth credentials.

    :param url: url which may or may not contain basic auth credentials
    :type url: ``str``

    :return: 3-``tuple`` of:

      * (``str``) -- *url* with any basic auth creds removed
      * (``str`` or ``NoneType``) -- basic auth username or ``None`` if basic
        auth username not given
      * (``str`` or ``NoneType``) -- basic auth password or ``None`` if basic
        auth password not given

    :rtype: ``tuple``
    """
    url_parts = parse.urlsplit(url)
    username = url_parts.username
    password = url_parts.password
    frags = list(url_parts)
    # swap out "user[:pass]@hostname" for "hostname"
    frags[1] = url_parts.hostname
    url = parse.urlunsplit(frags)
    return (url, username, password)
Ejemplo n.º 15
0
    def _get_heat_signal_url(self, project_id=None):
        """Return a heat-api signal URL for this resource.

        This URL is not pre-signed, valid user credentials are required.
        If a project_id is provided, it is used in place of the original
        project_id. This is useful to generate a signal URL that uses
        the heat stack user project instead of the user's.
        """
        stored = self.data().get('heat_signal_url')
        if stored is not None:
            return stored

        if self.id is None:
            # it is too early
            return

        url = self.client_plugin('heat').get_heat_url()
        host_url = urlparse.urlparse(url)
        path = self.identifier().url_path()
        if project_id is not None:
            path = project_id + path[path.find('/'):]

        url = urlparse.urlunsplit(
            (host_url.scheme, host_url.netloc, 'v1/%s/signal' % path, '', ''))

        self.data_set('heat_signal_url', url)
        return url
Ejemplo n.º 16
0
 def _update_link_prefix(self, orig_url, prefix):
     if not prefix:
         return orig_url
     url_parts = list(parse.urlsplit(orig_url))
     prefix_parts = list(parse.urlsplit(prefix))
     url_parts[0:2] = prefix_parts[0:2]
     return parse.urlunsplit(url_parts)
Ejemplo n.º 17
0
def remove_version_from_href(href):
    """Removes the first api version from the href.

    Given: 'http://manila.example.com/v1.1/123'
    Returns: 'http://manila.example.com/123'

    Given: 'http://www.manila.com/v1.1'
    Returns: 'http://www.manila.com'

    Given: 'http://manila.example.com/share/v1.1/123'
    Returns: 'http://manila.example.com/share/123'

    """
    parsed_url = parse.urlsplit(href)
    url_parts = parsed_url.path.split('/')

    # NOTE: this should match vX.X or vX
    expression = re.compile(r'^v([0-9]+|[0-9]+\.[0-9]+)(/.*|$)')
    for x in range(len(url_parts)):
        if expression.match(url_parts[x]):
            del url_parts[x]
            break

    new_path = '/'.join(url_parts)

    if new_path == parsed_url.path:
        msg = 'href %s does not contain version' % href
        LOG.debug(msg)
        raise ValueError(msg)

    parsed_url = list(parsed_url)
    parsed_url[2] = new_path
    return parse.urlunsplit(parsed_url)
Ejemplo n.º 18
0
def remove_version_from_href(href):
    """Removes the first api version from the href.

    Given: 'http://www.manila.com/v1.1/123'
    Returns: 'http://www.manila.com/123'

    Given: 'http://www.manila.com/v1.1'
    Returns: 'http://www.manila.com'

    """
    parsed_url = parse.urlsplit(href)
    url_parts = parsed_url.path.split("/", 2)

    # NOTE: this should match vX.X or vX
    expression = re.compile(r"^v([0-9]+|[0-9]+\.[0-9]+)(/.*|$)")
    if expression.match(url_parts[1]):
        del url_parts[1]

    new_path = "/".join(url_parts)

    if new_path == parsed_url.path:
        msg = "href %s does not contain version" % href
        LOG.debug(msg)
        raise ValueError(msg)

    parsed_url = list(parsed_url)
    parsed_url[2] = new_path
    return parse.urlunsplit(parsed_url)
Ejemplo n.º 19
0
def _send_new_pending_email(instance):
    """ Send an email to settings.API_ACCESS_MANAGER_EMAIL with the contents of this API access request. """
    context = {
        'approval_url': urlunsplit(
            (
                'https' if settings.HTTPS == 'on' else 'http',
                instance.site.domain,
                reverse('admin:api_admin_apiaccessrequest_change', args=(instance.id,)),
                '',
                '',
            )
        ),
        'api_request': instance
    }

    message = render_to_string('api_admin/api_access_request_email_new_request.txt', context)
    try:
        send_mail(
            _(u'API access request from {company}').format(company=instance.company_name),
            message,
            settings.API_ACCESS_FROM_EMAIL,
            [settings.API_ACCESS_MANAGER_EMAIL],
            fail_silently=False
        )
    except SMTPException:
        log.exception(u'Error sending API user notification email for request [%s].', instance.id)
Ejemplo n.º 20
0
def add_or_replace_parameter(url, name, new_value):
    """Add or remove a parameter to a given url

    >>> import w3lib.url
    >>> w3lib.url.add_or_replace_parameter('http://www.example.com/index.php', 'arg', 'v')
    'http://www.example.com/index.php?arg=v'
    >>> w3lib.url.add_or_replace_parameter('http://www.example.com/index.php?arg1=v1&arg2=v2&arg3=v3', 'arg4', 'v4')
    'http://www.example.com/index.php?arg1=v1&arg2=v2&arg3=v3&arg4=v4'
    >>> w3lib.url.add_or_replace_parameter('http://www.example.com/index.php?arg1=v1&arg2=v2&arg3=v3', 'arg3', 'v3new')
    'http://www.example.com/index.php?arg1=v1&arg2=v2&arg3=v3new'
    >>>

    """
    parsed = urlsplit(url)
    args = parse_qsl(parsed.query, keep_blank_values=True)

    new_args = []
    found = False
    for name_, value_ in args:
        if name_ == name:
            new_args.append((name_, new_value))
            found = True
        else:
            new_args.append((name_, value_))

    if not found:
        new_args.append((name, new_value))

    query = urlencode(new_args)
    return urlunsplit(parsed._replace(query=query))
Ejemplo n.º 21
0
    def query_lookupd(self):
        """
        Trigger a query of the configured ``nsq_lookupd_http_addresses``.
        """
        endpoint = self.lookupd_http_addresses[self.lookupd_query_index]
        self.lookupd_query_index = (self.lookupd_query_index + 1) % len(self.lookupd_http_addresses)

        # urlsplit() is faulty if scheme not present
        if '://' not in endpoint:
            endpoint = 'http://' + endpoint

        scheme, netloc, path, query, fragment = urlparse.urlsplit(endpoint)

        if not path or path == "/":
            path = "/lookup"

        params = cgi.parse_qs(query)
        params['topic'] = self.topic
        query = urlparse.urlencode(_utf8_params(params), doseq=1)
        lookupd_url = urlparse.urlunsplit((scheme, netloc, path, query, fragment))

        req = tornado.httpclient.HTTPRequest(
            lookupd_url, method='GET',
            connect_timeout=self.lookupd_connect_timeout,
            request_timeout=self.lookupd_request_timeout)
        callback = functools.partial(self._finish_query_lookupd, lookupd_url=lookupd_url)
        self.http_client.fetch(req, callback=callback)
Ejemplo n.º 22
0
    def get_url(self, path="/", query=None, fragment=None):
        if not self.started:
            return None

        return urlunsplit(("http" if not self.use_ssl else "https",
                           "%s:%s" % (self.host, self.port),
                           path, query, fragment))
Ejemplo n.º 23
0
def fix_links_in_lxml_tree(link):
    """Fix links in an lxml tree.

    This function is called by the rewrite_links method of an lxml tree, and is
    used to normalize links in a few ways. It makes links absolute, works
    around buggy URLs and nukes anchors.

    Example: html_tree.rewrite_links(fix_links_in_lxml_tree, base_href=my_url)

    Some URLS, like the following, make no sense:
     - https://www.appeals2.az.gov/../Decisions/CR20130096OPN.pdf.
                                  ^^^^ -- This makes no sense!
    The fix is to remove any extra '/..' patterns at the beginning of the
    path.

    Others have annoying anchors on the end, like:
     - http://example.com/path/#anchor

    Note that lxml has a method generally for this purpose called
    make_links_absolute, but we cannot use it because it does not work
    around invalid relative URLS, nor remove anchors. This is a limitation
    of Python's urljoin that will be fixed in Python 3.5 according to a bug
    we filed: http://bugs.python.org/issue22118
    """
    url_parts = urlsplit(link)
    url = urlunsplit(
        url_parts[:2] +
        (re.sub('^(/\.\.)+', '', url_parts.path),) +
        url_parts[3:]
    )
    return url.split('#')[0]
Ejemplo n.º 24
0
    def _parse_url(self, url, ssl=False):
        """Create a url from test data.

        If provided with a full URL, just return that. If SSL is requested
        set the scheme appropriately.

        Scheme and netloc are saved for later use in comparisons.
        """
        parsed_url = urlparse.urlsplit(url)
        url_scheme = parsed_url[0]
        scheme = 'http'
        netloc = self.host

        if not url_scheme:
            if self.port:
                netloc = '%s:%s' % (self.host, self.port)
            if ssl:
                scheme = 'https'
            path = parsed_url[2]
            if self.prefix:
                path = '%s%s' % (self.prefix, path)
            full_url = urlparse.urlunsplit((scheme, netloc, path,
                                            parsed_url[3], ''))
            self.scheme = scheme
            self.netloc = netloc
        else:
            full_url = url
            self.scheme = url_scheme
            self.netloc = parsed_url[1]

        return full_url
Ejemplo n.º 25
0
Archivo: case.py Proyecto: cdent/gabbi
    def _parse_url(self, url):
        """Create a url from test data.

        If provided with a full URL, just return that. If SSL is requested
        set the scheme appropriately.

        Scheme and netloc are saved for later use in comparisons.
        """
        query_params = self.test_data['query_parameters']
        ssl = self.test_data['ssl']

        parsed_url = urlparse.urlsplit(url)
        if not parsed_url.scheme:
            full_url = utils.create_url(url, self.host, port=self.port,
                                        prefix=self.prefix, ssl=ssl)
            # parse again to set updated netloc and scheme
            parsed_url = urlparse.urlsplit(full_url)

        self.scheme = parsed_url.scheme
        self.netloc = parsed_url.netloc

        if query_params:
            query_string = self._update_query_params(parsed_url.query,
                                                     query_params)
        else:
            query_string = parsed_url.query

        return urlparse.urlunsplit((parsed_url.scheme, parsed_url.netloc,
                                    parsed_url.path, query_string, ''))
Ejemplo n.º 26
0
def _build_api_url(url, query):
    scheme, netloc, path, base_query, fragment = urlsplit(url)

    if base_query:
        query = '%s&%s' % (base_query, query)

    return urlunsplit((scheme, netloc, path, query, fragment))
Ejemplo n.º 27
0
def _send_decision_email(instance):
    """ Send an email to requesting user with the decision made about their request. """
    context = {
        'name': instance.user.username,
        'api_management_url': urlunsplit(
            (
                'https' if settings.HTTPS == 'on' else 'http',
                instance.site.domain,
                reverse('api_admin:api-status'),
                '',
                '',
            )
        ),
        'authentication_docs_url': settings.AUTH_DOCUMENTATION_URL,
        'api_docs_url': settings.API_DOCUMENTATION_URL,
        'support_email_address': settings.API_ACCESS_FROM_EMAIL,
        'platform_name': configuration_helpers.get_value('PLATFORM_NAME', settings.PLATFORM_NAME)
    }

    message = render_to_string(
        'api_admin/api_access_request_email_{status}.txt'.format(status=instance.status),
        context
    )
    try:
        send_mail(
            _('API access request'),
            message,
            settings.API_ACCESS_FROM_EMAIL,
            [instance.user.email],
            fail_silently=False
        )
        instance.contacted = True
    except SMTPException:
        log.exception(u'Error sending API user notification email for request [%s].', instance.id)
Ejemplo n.º 28
0
def _deprecated_amqp_url():
    """Allow for deprecating amqp_url setting over time.
    This warns and attempts to translate an amqp_url to something
    oslo_messaging can use to load a driver.
    """
    url = cfg.CONF.amqp_url
    if not url:
        return
    LOG.warning(_LW(
        'Use of amqp_url is deprecated. Please instead use options defined in '
        'oslo_messaging_rabbit to declare your AMQP connection.'))
    url = urlparse.urlsplit(url)
    if url.scheme == 'amqp':
        scheme = 'rabbit'
    else:
        scheme = url.scheme
    port = str(url.port or 5672)
    netloc = url.netloc
    if netloc.endswith(':'):
        netloc = netloc[:-1]
    out = urlparse.urlunsplit((
        scheme,
        '%s:%s' % (netloc, port),
        url.path,
        '', ''
    ))
    return out
Ejemplo n.º 29
0
    def urljoin(orig_url, url):  # pragma: no cover
        new_url = urljoin(orig_url, url)
        if '../' not in new_url:
            return new_url

        # only needed in py2 as py3 urljoin resolves '../'
        parts = urlsplit(new_url)
        scheme, netloc, path, query, frag = parts

        path_parts = path.split('/')
        i = 0
        n = len(path_parts) - 1
        while i < n:
            if path_parts[i] == '..':
                del path_parts[i]
                n -= 1
                if i > 0:
                    del path_parts[i - 1]
                    n -= 1
                    i -= 1
            else:
                i += 1

        if path_parts == ['']:
            path = '/'
        else:
            path = '/'.join(path_parts)

        parts = (scheme, netloc, path, query, frag)

        new_url = urlunsplit(parts)
        return new_url
Ejemplo n.º 30
0
def update_url_query(*args, **kwargs):
    """
    Return a new URL with the query parameters of the URL updated based on the
    keyword arguments of the function call. If the argument already exists in the
    URL, it will be overwritten with the new value; if not, it will be added.
    However, if the new value is None, then any existing query parameters with
    that key will be removed without being replaced.

    The URL must be passed as the first positional argument of the function;
    it cannot be passed as a keyword argument.
    """
    if not args:
        raise TypeError("URL must be passed as the first positional argument")
    url = args[0]
    scheme, netloc, path, query, fragment = urlsplit(url)
    qlist = parse_qsl(query)
    for key, value in kwargs.items():
        # remove all key/value pairs from qlist that match this key
        qlist = [pair for pair in qlist if not pair[0] == key]
        # add this key/value pair to the qlist (unless it's None)
        if value is not None:
            qlist.append((key, value))
    # bring it on back
    query = urlencode(qlist)
    return urlunsplit((scheme, netloc, path, query, fragment))
Ejemplo n.º 31
0
    def get_latest_summary_url(cls, election_url):
        election_url = cls._url_ensure_trailing_slash(election_url)
        current_ver = cls.get_current_ver(election_url)

        # If we don't have current_ver, we can't determine a summary URL.
        if current_ver is None:
            return None

        if 'Web02' in election_url:
            election_url_parts = parse.urlsplit(election_url)
            election_url_parts = election_url_parts._replace(path="/".join(
                election_url_parts.path.split('/')[:3]),
                                                             fragment='')
        else:
            election_url_parts = parse.urlsplit(election_url)

        new_paths = [
            election_url_parts.path + '/' + current_ver +
            "/json/en/summary.json",
            election_url_parts.path + current_ver + "/Web01/en/summary.html",
            election_url_parts.path + current_ver + "/en/summary.html",
        ]

        for new_path in new_paths:
            latest_summary_url_parts = election_url_parts._replace(
                path=new_path)

            latest_summary_url = parse.urlunsplit(latest_summary_url_parts)

            latest_summary_url_response = requests.get(latest_summary_url)

            try:
                latest_summary_url_response.raise_for_status()
            except requests.exceptions.HTTPError:
                continue

            return latest_summary_url

        # If none of the expected paths succeed, return None.
        return None
Ejemplo n.º 32
0
    def __getitem__(self, index):
        # build download url
        index = combine_slices(self.slice, fix_slice(index, self.shape))
        scheme, netloc, path, query, fragment = urlsplit(self.baseurl)
        url = urlunsplit((
            scheme, netloc, path + '.dods',
            quote(self.id) + hyperslab(index) + '&' + query,
            fragment)).rstrip('&')

        # download and unpack data
        logger.info("Fetching URL: %s" % url)
        r = GET(url, self.application, self.session)
        raise_for_status(r)
        dds, data = r.body.split(b'\nData:\n', 1)
        dds = dds.decode(r.content_encoding or 'ascii')

        if self.shape:
            # skip size packing
            if self.dtype.char in 'SU':
                data = data[4:]
            else:
                data = data[8:]

        # calculate array size
        shape = tuple(
            int(np.ceil((s.stop-s.start)/float(s.step))) for s in index)
        size = int(np.prod(shape))

        if self.dtype == np.byte:
            return np.fromstring(data[:size], 'B')
        elif self.dtype.char in 'SU':
            out = []
            for word in range(size):
                n = np.fromstring(data[:4], '>I')  # read length
                data = data[4:]
                out.append(data[:n])
                data = data[n + (-n % 4):]
            return np.array([ text_type(x.decode('ascii')) for x in out ], 'S')
        else:
            return np.fromstring(data, self.dtype).reshape(shape)
Ejemplo n.º 33
0
 def _winrm_connect(self):
     '''
     Establish a WinRM connection over HTTP/HTTPS.
     '''
     port = self.port or 5986
     vvv("ESTABLISH WINRM CONNECTION FOR USER: %s on PORT %s TO %s" % \
         (self.user, port, self.host), host=self.host)
     netloc = '%s:%d' % (self.host, port)
     cache_key = '%s:%s@%s:%d' % (self.user, hashlib.md5(self.password).hexdigest(), self.host, port)
     if cache_key in _winrm_cache:
         vvvv('WINRM REUSE EXISTING CONNECTION: %s' % cache_key, host=self.host)
         return _winrm_cache[cache_key]
     transport_schemes = [('plaintext', 'https'), ('plaintext', 'http')] # FIXME: ssl/kerberos
     if port == 5985:
         transport_schemes = reversed(transport_schemes)
     exc = None
     for transport, scheme in transport_schemes:
         endpoint = urlparse.urlunsplit((scheme, netloc, '/wsman', '', ''))
         vvvv('WINRM CONNECT: transport=%s endpoint=%s' % (transport, endpoint),
              host=self.host)
         protocol = Protocol(endpoint, transport=transport,
                             username=self.user, password=self.password)
         try:
             protocol.send_message('')
             _winrm_cache[cache_key] = protocol
             return protocol
         except WinRMTransportError, exc:
             err_msg = str(exc)
             if re.search(r'Operation\s+?timed\s+?out', err_msg, re.I):
                 raise errors.AnsibleError("the connection attempt timed out")
             m = re.search(r'Code\s+?(\d{3})', err_msg)
             if m:
                 code = int(m.groups()[0])
                 if code == 401:
                     raise errors.AnsibleError("the username/password specified for this server was incorrect")
                 elif code == 411:
                     _winrm_cache[cache_key] = protocol
                     return protocol
             vvvv('WINRM CONNECTION ERROR: %s' % err_msg, host=self.host)
             continue
Ejemplo n.º 34
0
def keystoneclient(request, admin=False):
    user = request.user
    if admin:
        endpoint_type = 'adminURL'
    else:
        endpoint_type = getattr(settings, 'OPENSTACK_ENDPOINT_TYPE',
                                'internalURL')

    api_version = VERSIONS.get_active_version()

    # Take care of client connection caching/fetching a new client.
    # Admin vs. non-admin clients are cached separately for token matching.
    cache_attr = "_keystoneclient_admin" if admin \
        else backend.KEYSTONE_CLIENT_ATTR
    if (hasattr(request, cache_attr)
            and (not user.token.id
                 or getattr(request, cache_attr).auth_token == user.token.id)):
        conn = getattr(request, cache_attr)
    else:
        endpoint = _get_endpoint_url(request, endpoint_type)
        magic_tuple = netutils.urlsplit(endpoint)
        scheme, netloc, path, query, frag = magic_tuple
        port = magic_tuple.port
        if port is None:
            port = 80
        new_netloc = netloc.replace(':%d' % port, ':%d' % (35357, ))
        endpoint = urlparse.urlunsplit((scheme, new_netloc, path, query, frag))
        insecure = getattr(settings, 'OPENSTACK_SSL_NO_VERIFY', False)
        cacert = getattr(settings, 'OPENSTACK_SSL_CACERT', None)
        LOG.debug("Creating a new keystoneclient connection to %s." % endpoint)
        remote_addr = request.environ.get('REMOTE_ADDR', '')
        conn = api_version['client'].Client(token=user.token.id,
                                            endpoint=endpoint,
                                            original_ip=remote_addr,
                                            insecure=insecure,
                                            cacert=cacert,
                                            auth_url=endpoint,
                                            debug=settings.DEBUG)
        setattr(request, cache_attr, conn)
    return conn
Ejemplo n.º 35
0
Archivo: dap.py Proyecto: pydap/pydap
    def __getitem__(self, index):
        # build download url
        index = combine_slices(self.slice, fix_slice(index, self.shape))
        scheme, netloc, path, query, fragment = urlsplit(self.baseurl)
        url = urlunsplit((scheme, netloc, path + '.dods',
                          quote(self.id) + hyperslab(index) + '&' + query,
                          fragment)).rstrip('&')

        # download and unpack data
        logger.info("Fetching URL: %s" % url)
        r = GET(url,
                self.application,
                self.session,
                timeout=self.timeout,
                verify=self.verify)
        raise_for_status(r)
        dds, data = safe_dds_and_data(r, self.user_charset)

        # Parse received dataset:
        dataset = build_dataset(dds)
        dataset.data = unpack_data(BytesReader(data), dataset)
        return dataset[self.id].data
Ejemplo n.º 36
0
    def call_for(self, endpoint, to, **values):
        """
        Initiate a Twilio call.

        Parameters
        ----------
        endpoint : `str`
            The view endpoint, as would be passed to :py:func:`flask.url_for`.
        to : `str`
            The destination phone number.
        values : `dict`
            Additional keyword arguments to pass to :py:func:`flask.url_for`.

        Returns
        -------
        call : `twilio.rest.resources.Call`
            An object representing the call in progress.
        """
        # Extract keyword arguments that are intended for `calls.create`
        # instead of `url_for`.
        values = dict(values, _external=True)
        from_ = values.pop('from_', None) or current_app.config['TWILIO_FROM']

        # Construct URL for endpoint.
        url = url_for(endpoint, **values)

        # If we are not in debug or testing mode and a secret key is set, then
        # add HTTP basic auth information to the URL. The username is `twilio`.
        # The password is a random string that has been signed with
        # `itsdangerous`.
        if not(current_app.debug or current_app.testing or self.signer is None):
            urlparts = list(urlsplit(url))
            token = ''.join(rand.choice(letters_and_digits) for i in range(32))
            password = self.signer.sign(token).decode()
            urlparts[1] = 'twilio:' + password + '@' + urlparts[1]
            url = urlunsplit(urlparts)

        # Issue phone call.
        return self.client.calls.create(to=to, from_=from_, url=url)
Ejemplo n.º 37
0
 def create_dest_url(self, msg, client, item):
     """Create the destination URL and the connection parameters."""
     defaults = self.config[client]
     info_dict = dict()
     for key in ['host', 'directory', 'filepattern']:
         try:
             info_dict[key] = item[key]
         except KeyError:
             info_dict[key] = defaults[key]
     connection_parameters = item.get('connection_parameters',
                                      defaults.get('connection_parameters'))
     host = info_dict['host']
     path = os.path.join(info_dict['directory'], info_dict['filepattern'])
     mda = msg.data.copy()
     for key, aliases in defaults.get('aliases', {}).items():
         if key in mda:
             mda[key] = aliases.get(mda[key], mda[key])
     path = compose(path, mda)
     parts = urlsplit(host)
     host_path = urlunsplit(
         (parts.scheme, parts.netloc, path, parts.query, parts.fragment))
     return host_path, connection_parameters
Ejemplo n.º 38
0
def safe_url_string(url, encoding='utf8', path_encoding='utf8'):
    """Convert the given URL into a legal URL by escaping unsafe characters
    according to RFC-3986.

    If a bytes URL is given, it is first converted to `str` using the given
    encoding (which defaults to 'utf-8'). 'utf-8' encoding is used for
    URL path component (unless overriden by path_encoding), and given
    encoding is used for query string or form data.
    When passing an encoding, you should use the encoding of the
    original page (the page from which the URL was extracted from).

    Calling this function on an already "safe" URL will return the URL
    unmodified.

    Always returns a native `str` (bytes in Python2, unicode in Python3).
    """
    # Python3's urlsplit() chokes on bytes input with non-ASCII chars,
    # so let's decode (to Unicode) using page encoding:
    #   - it is assumed that a raw bytes input comes from a document
    #     encoded with the supplied encoding (or UTF8 by default)
    #   - if the supplied (or default) encoding chokes,
    #     percent-encode offending bytes
    parts = urlsplit(to_unicode(url, encoding=encoding,
                                errors='percentencode'))

    # quote() in Python2 return type follows input type;
    # quote() in Python3 always returns Unicode (native str)
    return urlunsplit((
        to_native_str(parts.scheme),
        to_native_str(parts.netloc.encode('idna')),

        # default encoding for path component SHOULD be UTF-8
        quote(to_bytes(parts.path, path_encoding), _safe_chars),

        # encoding of query and fragment follows page encoding
        # or form-charset (if known and passed)
        quote(to_bytes(parts.query, encoding), _safe_chars),
        quote(to_bytes(parts.fragment, encoding), _safe_chars),
    ))
Ejemplo n.º 39
0
    def http_request(self,
                     path="/",
                     method="GET",
                     host=None,
                     port=None,
                     json=False,
                     data=None):
        """
        perform a HTTP request

        :param path: str, path within the reqest, e.g. "/api/version"
        :param method: str, HTTP method
        :param host: str, if None, set self.get_IPv4s()[0]
        :param port: str or int, if None, set to self.get_ports()[0]
        :param json: bool, should we expect json?
        :param data: data to send (can be dict, list, str)
        :return: dict
        """
        host = host or self.get_IPv4s()[0]
        port = port or self.get_ports()[0]
        url = urlunsplit(("http", "%s:%s" % (host, port), path, "", ""))
        return self.http_session.request(method, url, json=json, data=data)
Ejemplo n.º 40
0
def api_url(host, port, endpoint):
    """Returns a joined url.

    Takes host, port and endpoint and generates a valid emby API url.

    :param host: Hostname of the emby server
    :param port: Portnumber of the emby server
    :param endpoint: API endpoint
    :type host: str
    :type port: int
    :type endpoint: str
    :returns: Full API url
    :rtype: str
    """
    joined = urljoin('{0}:{1}'.format(host, port), endpoint)
    scheme, netloc, path, query_string, fragment = urlsplit(joined)
    query_params = parse_qs(query_string)

    query_params['format'] = ['json']
    new_query_string = urlencode(query_params, doseq=True)

    return urlunsplit((scheme, netloc, path, new_query_string, fragment))
Ejemplo n.º 41
0
def mask_password_from_url( url ):
    """
    Masks out passwords from connection urls like the database connection in galaxy.ini

    >>> mask_password_from_url( 'sqlite+postgresql://user:password@localhost/' )
    'sqlite+postgresql://user:********@localhost/'
    >>> mask_password_from_url( 'amqp://*****:*****@localhost' )
    'amqp://*****:*****@localhost'
    >>> mask_password_from_url( 'amqp://localhost')
    'amqp://localhost'
    """
    split = urlparse.urlsplit(url)
    if split.password:
        if url.count(split.password) == 1:
            url = url.replace(split.password, "********")
        else:
            # This can manipulate the input other than just masking password,
            # so the previous string replace method is preferred when the
            # password doesn't appear twice in the url
            split = split._replace(netloc=split.netloc.replace("%s:%s" % (split.username, split.password), '%s:********' % split.username))
            url = urlparse.urlunsplit(split)
    return url
Ejemplo n.º 42
0
    def _cs_request(self, url, method, **kwargs):
        if not self.management_url:
            self.authenticate()
        if url is None:
            # To get API version information, it is necessary to GET
            # a nova endpoint directly without "v2/<tenant-id>".
            magic_tuple = parse.urlsplit(self.management_url)
            scheme, netloc, path, query, frag = magic_tuple
            path = re.sub(r'v[1-9](\.[1-9][0-9]*)?/[a-z0-9]+$', '', path)
            url = parse.urlunsplit((scheme, netloc, path, None, None))
        else:
            if self.service_catalog and not self.bypass_url:
                url = self.get_service_url(self.service_type) + url
            else:
                url = self.management_url + url

        # Perform the request once. If we get a 401 back then it
        # might be because the auth token expired, so try to
        # re-authenticate and try again. If it still fails, bail.
        try:
            kwargs.setdefault('headers', {})['X-Auth-Token'] = self.auth_token
            if self.projectid:
                kwargs['headers']['X-Auth-Project-Id'] = self.projectid

            resp, body = self._time_request(url, method, **kwargs)
            return resp, body
        except exceptions.Unauthorized as e:
            try:
                # first discard auth token, to avoid the possibly expired
                # token being re-used in the re-authentication attempt
                self.unauthenticate()
                # overwrite bad token
                self.keyring_saved = False
                self.authenticate()
                kwargs['headers']['X-Auth-Token'] = self.auth_token
                resp, body = self._time_request(url, method, **kwargs)
                return resp, body
            except exceptions.Unauthorized:
                raise e
Ejemplo n.º 43
0
def remove_version_from_href(href):
    """
    Removes the first api version from the href.
    """
    parsed_url = urlparse.urlsplit(href)
    url_parts = parsed_url.path.split('/', 2)

    # NOTE: this should match vX.X or vX
    expression = re.compile(r'^v([0-9]+|[0-9]+\.[0-9]+)(/.*|$)')
    if expression.match(url_parts[1]):
        del url_parts[1]

    new_path = '/'.join(url_parts)

    if new_path == parsed_url.path:
        msg = _('href %s does not contain version') % href
        LOG.debug(msg)
        raise ValueError(msg)

    parsed_url = list(parsed_url)
    parsed_url[2] = new_path
    return urlparse.urlunsplit(parsed_url)
Ejemplo n.º 44
0
    def verify_hostname(self, req_args, *args, **kwargs):
        net_agent = self.state.get_agent()

        url = urlunsplit(
            ('http', req_args['value'], 'robots.txt', None, None)).encode()

        try:
            resp = yield net_agent.request(b'GET', url)
            body = yield readBody(resp)

            server_h = resp.headers.getRawHeaders(b'Server',
                                                  [None])[-1].lower()
            if not body.startswith(
                    b'User-agent: *') or server_h != b'globaleaks':
                raise EnvironmentError('Response unexpected')

        except Exception as e:
            # Convert networking failures into a generic response
            if is_common_net_error(self.state.tenant_cache[self.request.tid],
                                   e):
                raise errors.ExternalResourceError()
            raise e
Ejemplo n.º 45
0
def clean_url(url):
    """
    Returns an cleaned url starting with a scheme and folder with trailing /
    or an empty string
    """

    if url and url.strip():
        url = xhtml_unescape(url.strip())

        if '://' not in url:
            url = '//' + url

        scheme, netloc, path, query, fragment = parse.urlsplit(url, 'http')
        if not path:
            path += '/'

        cleaned_url = parse.urlunsplit((scheme, netloc, path, query, fragment))

    else:
        cleaned_url = ''

    return cleaned_url
Ejemplo n.º 46
0
    def _get_heat_signal_url(self):
        """Return a heat-api signal URL for this resource.

        This URL is not pre-signed, valid user credentials are required.
        """
        stored = self.data().get('heat_signal_url')
        if stored is not None:
            return stored

        if self.id is None:
            # it is too early
            return

        url = self.client_plugin('heat').get_heat_url()
        host_url = urlparse.urlparse(url)
        path = self.identifier().url_path()

        url = urlparse.urlunsplit(
            (host_url.scheme, host_url.netloc, 'v1/%s/signal' % path, '', ''))

        self.data_set('heat_signal_url', url)
        return url
Ejemplo n.º 47
0
def update_query_parameters(url, query_parameters):
    """
    Return url with updated query parameters.

    Arguments:
        url (str): Original url whose query parameters need to be updated.
        query_parameters (dict): A dictionary containing query parameters to be added to course selection url.

    Returns:
        (slug): slug identifier for the identity provider that can be used for identity verification of
            users associated the enterprise customer of the given user.

    """
    scheme, netloc, path, query_string, fragment = urlsplit(url)
    url_params = parse_qs(query_string)

    # Update url query parameters
    url_params.update(query_parameters)

    return urlunsplit(
        (scheme, netloc, path, urlencode(sorted(url_params.items()),
                                         doseq=True), fragment), )
Ejemplo n.º 48
0
    def is_allowed(self, request, timeout=None):
        s, n, p, q, f = urlsplit(request.url)

        if n in self.domain_blacklist:
            self.logger.error("Blocking request to a blacklisted domain: %r" %
                              n)
            return False

        #: if set to not follow the robots.txt
        if not self.obey_robots_txt:
            return True

        robots_url = urlunsplit((s, n, 'robots.txt', None, None))
        try:
            access_rules = self.robots_registry[robots_url]
        except KeyError:
            access_rules = self.load_rules_from_url(robots_url, timeout)
        if access_rules is None:  # error - everybody welcome
            return True

        user_agent = request.headers.get('User-Agent', '*')
        allowed = access_rules.can_fetch(user_agent, request.url)
        if not allowed:
            return False
        request_rate = access_rules.request_rate(user_agent)
        if request_rate is None:
            #: No worries :)
            return True

        current_time = time.time()
        diff_time = current_time - access_rules.mtime()
        delay = request_rate.seconds / request_rate.requests

        if isinstance(delay, (int, float)) and not diff_time >= delay:
            self.logger.debug("Waiting on request for [%r] seconds!" % delay)
            time.sleep(delay)
        #: Update the access time value
        access_rules.modified()
Ejemplo n.º 49
0
 def hashed_name(self, name, content=None, filename=None):
     parsed_name = urlsplit(unquote(name))
     clean_name = parsed_name.path.strip()
     opened = False
     if content is None:
         absolute_path = finders.find(clean_name)
         print(
             f"Content is none; absolute_path: {absolute_path} / filename: {filename} / name: {name}"
         )
         try:
             content = open(absolute_path, "rb")
         except (IOError, OSError) as e:
             if e.errno == errno.ENOENT:
                 print(e)
                 raise ValueError(
                     "The file '%s' could not be found with %r." %
                     (clean_name, self))
             else:
                 raise
         content = File(content)
         opened = True
     try:
         file_hash = self.file_hash(clean_name, content)
     finally:
         if opened:
             content.close()
     path, filename = os.path.split(clean_name)
     root, ext = os.path.splitext(filename)
     if file_hash is not None:
         file_hash = ".%s" % file_hash
     hashed_name = os.path.join(path, "%s%s%s" % (root, file_hash, ext))
     unparsed_name = list(parsed_name)
     unparsed_name[2] = hashed_name
     # Special casing for a @font-face hack, like url(myfont.eot?#iefix")
     # http://www.fontspring.com/blog/the-new-bulletproof-font-face-syntax
     if "?#" in name and not unparsed_name[3]:
         unparsed_name[2] += "?"
     return urlunsplit(unparsed_name)
Ejemplo n.º 50
0
 def proxy_request(self, req):
     headers = self.get_headers(req)
     url = urlparse.urlunsplit((
         self.get_scheme(req),
         self.get_host(req),
         self.get_path_info(req),
         self.get_query_string(req),
         ''))
     h = self.create_http_client(req)
     resp, content = h.request(
         url,
         method=req.method,
         headers=headers,
         body=req.body
     )
     if resp.status == 200:
         LOG.debug(str(resp))
         return self.create_response(req, resp, content)
     elif resp.status == 403:
         LOG.warning(
             'The remote metadata server responded with Forbidden. This '
             'response usually occurs when shared secrets do not match.')
         return webob.exc.HTTPForbidden()
     elif resp.status == 400:
         return webob.exc.HTTPBadRequest()
     elif resp.status == 404:
         return webob.exc.HTTPNotFound()
     elif resp.status == 409:
         return webob.exc.HTTPConflict()
     elif resp.status == 500:
         msg = (
             'Remote metadata server experienced an internal server error.'
         )
         LOG.warning(msg)
         explanation = six.text_type(msg)
         return webob.exc.HTTPInternalServerError(explanation=explanation)
     else:
         raise Exception(_('Unexpected response code: %s') % resp.status)
Ejemplo n.º 51
0
def create_url(base_url, host, port=None, prefix='', ssl=False):
    """Given pieces of a path-based url, return a fully qualified url."""
    scheme = 'http'

    # A host with : in it at this stage is assumed to be an IPv6
    # address of some kind (they come in many forms). Port should
    # already have been stripped off.
    if ':' in host and not (host.startswith('[') and host.endswith(']')):
        host = '[%s]' % host

    if port and not _port_follows_standard(port, ssl):
        netloc = '%s:%s' % (host, port)
    else:
        netloc = host

    if ssl:
        scheme = 'https'

    parsed_url = urlparse.urlsplit(base_url)
    query_string = parsed_url.query
    path = parsed_url.path

    # Guard against a prefix of None or the url already having the
    # prefix. Without the startswith check, the tests in prefix.yaml
    # fail. This is a pragmatic fix which does this for any URL in a
    # test request that does not have a scheme and does not
    # distinguish between URLs in a gabbi test file and those
    # generated by the server. Idealy we would not mutate nor need
    # to check URLs returned from the server. Doing that, however,
    # would require more complex data handling than we have now and
    # this covers most common cases and will be okay until someone
    # reports a bug.
    if prefix and not path.startswith(prefix):
        prefix = prefix.rstrip('/')
        path = path.lstrip('/')
        path = '%s/%s' % (prefix, path)

    return urlparse.urlunsplit((scheme, netloc, path, query_string, ''))
Ejemplo n.º 52
0
    def base_url(self):
        if self._base_url is None:
            if self.auto_base_url:
                # Derive a base URL based on the authentication information from
                # the server, optionally overriding the protocol, host/port and
                # potentially adding a path fragment before the auth information.
                self._base_url = self.swift_conn.url + '/'
                if self.override_base_url is not None:
                    # override the protocol and host, append any path fragments
                    split_derived = urlparse.urlsplit(self._base_url)
                    split_override = urlparse.urlsplit(self.override_base_url)
                    split_result = [''] * 5
                    split_result[0:2] = split_override[0:2]
                    split_result[2] = (split_override[2] +
                                       split_derived[2]).replace('//', '/')
                    self._base_url = urlparse.urlunsplit(split_result)

                self._base_url = urlparse.urljoin(self._base_url,
                                                  self.container_name)
                self._base_url += '/'
            else:
                self._base_url = self.override_base_url
        return self._base_url
Ejemplo n.º 53
0
    def generate_return_url(self, return_to, uid, path=""):
        """
        :param return_to: If it starts with '/' it's an absolute path otherwise
        a relative path.
        :param uid:
        :param path: The verify path
        """
        if return_to.startswith("http"):
            up = urlsplit(return_to)
            _path = up.path
        else:
            up = None
            _path = return_to

        if not _path.startswith("/"):
            p = path.split("/")
            p[-1] = _path
            _path = "/".join(p)

        if up:
            _path = urlunsplit([up[0], up[1], _path, up[3], up[4]])

        return create_return_url(_path, uid, **{self.query_param: "true"})
Ejemplo n.º 54
0
def _get_safe_url(url):
    """Gets version of *url* with basic auth passwords obscured. This function
    returns results suitable for printing and logging.

    E.g.: https://user:[email protected] => https://[email protected]

    :param url: a url
    :type url: ``str``

    :return: *url* with password removed
    :rtype: ``str``
    """
    parts = urlsplit(url)
    if parts.username is None:
        return url
    else:
        frags = list(parts)
        if parts.port:
            frags[1] = '{0}@{1}:{2}'.format(parts.username, parts.hostname, parts.port)
        else:
            frags[1] = '{0}@{1}'.format(parts.username, parts.hostname)

        return urlunsplit(frags)
Ejemplo n.º 55
0
    def _insertUsernameIntoURL(self, url, username):
        """
            If a particular account has to be used to log into the server
            specified in 'url', you can inject a "<username>@" using this
            function, e.g.:

                url = 'svn+ssh://svnext/Libraries/Spam/42.0'
                url = insertUsernameIntoURL( url, 'monthy' )

            will return 'svn+ssh://monthy@svnext/Libraries/Spam/42.0'.

            Note: The function works with other protocols such as "http://"
                  as well, e.g. as used by Git.
        """
        Any.requireIsTextNonEmpty(url)

        urlData = list(parse.urlsplit(url)[:])
        urlData[1] = '%s@%s' % (username, urlData[1])

        result = parse.urlunsplit(urlData)
        Any.requireIsTextNonEmpty(result)

        return result
Ejemplo n.º 56
0
    def percent_encode_host(url):
        """ Convert the host of uri formatted with to_uri()
        to have a %-encoded host instead of punycode host
        The rest of url should be unchanged
        """

        # only continue if punycode encoded
        if 'xn--' not in url:
            return url

        parts = urlsplit(url)
        domain = parts.netloc.encode('utf-8')
        try:
            domain = domain.decode('idna')
            if six.PY2:
                domain = domain.encode('utf-8', 'ignore')
        except:
            # likely already encoded, so use as is
            pass

        domain = quote(domain)  #, safe=r':\/')

        return urlunsplit((parts[0], domain, parts[2], parts[3], parts[4]))
Ejemplo n.º 57
0
def remove_trailing_version_from_href(href):
    """Removes the api version from the href.

    Given: 'http://www.nova.com/compute/v1.1'
    Returns: 'http://www.nova.com/compute'

    Given: 'http://www.nova.com/v1.1'
    Returns: 'http://www.nova.com'

    """
    parsed_url = urlparse.urlsplit(href)
    url_parts = parsed_url.path.rsplit('/', 1)

    # NOTE: this should match vX.X or vX
    expression = re.compile(r'^v([0-9]+|[0-9]+\.[0-9]+)(/.*|$)')
    if not expression.match(url_parts.pop()):
        LOG.debug('href %s does not contain version', href)
        raise ValueError(_('href %s does not contain version') % href)

    new_path = url_join(*url_parts)
    parsed_url = list(parsed_url)
    parsed_url[2] = new_path
    return urlparse.urlunsplit(parsed_url)
Ejemplo n.º 58
0
def open_dods(url, metadata=False, application=None, session=None,
              timeout=DEFAULT_TIMEOUT, verify=True):
    """Open a `.dods` response directly, returning a dataset."""
    r = GET(url, application, session, timeout=timeout)
    raise_for_status(r)

    dds, data = r.body.split(b'\nData:\n', 1)
    dds = dds.decode(r.content_encoding or 'ascii')
    dataset = build_dataset(dds)
    stream = StreamReader(BytesIO(data))
    dataset.data = unpack_data(stream, dataset)

    if metadata:
        scheme, netloc, path, query, fragment = urlsplit(url)
        dasurl = urlunsplit(
            (scheme, netloc, path[:-4] + 'das', query, fragment))
        r = GET(dasurl, application, session, timeout=timeout,
                verify=verify)
        raise_for_status(r)
        das = r.text
        add_attributes(dataset, parse_das(das))

    return dataset
Ejemplo n.º 59
0
    def get_current_ver(cls, election_url):
        election_url_parts = parse.urlsplit(
            cls._url_ensure_trailing_slash(election_url))
        if 'Web02' in election_url:
            cls.parsed_url = election_url_parts._replace(
                path="/".join(election_url_parts.path.split('/')[:3]) +
                "/current_ver.txt",
                fragment='')
            election_url_parts = cls.parsed_url
        else:
            election_url_parts = election_url_parts._replace(
                path=election_url_parts.path + "current_ver.txt")

        current_ver_url = parse.urlunsplit(election_url_parts)

        current_ver_response = requests.get(current_ver_url)

        try:
            current_ver_response.raise_for_status()
        except requests.exceptions.HTTPError:
            return None

        return current_ver_response.text
Ejemplo n.º 60
0
def split_term_source_accession(val):
    if val is None:
        return '', ''
    if not r_url.match(val):
        # no URL
        if ':' in val:
            val_l = val.split(':')
            vocab = ontology_map.get(val_l[0], val_l[0])
            return vocab, '{}:{}'.format(vocab, val[len(val_l[0]) + 1:])
        else:
            # no idea
            lgr.warn("Could not identify term source REF in: '%s'", val)
            return '', val
    else:
        try:
            # this is a URL, assume simple: last path segment is accession id
            url_s = urlsplit(val)
            urlpath, accession = posixsplit(url_s.path)
            term_source = urlunsplit((url_s[0], url_s[1], urlpath, url_s[3], url_s[4]))
            return ontology_map.get(term_source, term_source), accession
        except Exception as e:
            lgr.warn("Could not identify term source REF in: '%s' [%s]", val, exc_str(e))
            return '', val