コード例 #1
0
 def __init__(self, delay=5):
     super(SearchUrlCrawler, self).__init__(delay=delay)
     with codecs.open(
             os.path.realpath(os.path.join(os.getcwd(), u'keywords.csv')),
             u'rb', u'utf-8') as csvfile:
         csvreader = csv.reader(csvfile, delimiter=',', quotechar='"')
         self.queries = OrderedDict()
         for row in csvreader:
             query = OrderedDict([(u'city', row[1]), (u'state', row[2]),
                                  (u'kw', row[0])])
             parsed = ParseResult(u'https', u'www.manta.com',
                                  u'/api/v1/location', u'',
                                  urlencode(query), u'')
             self.queries[parsed.geturl()] = row
             self.crawl_urls.append(parsed.geturl())
コード例 #2
0
 def _url_and_destination(self, base_url, unit):
     """
     Get the download URL and download destination.
     :param base_url: The base URL.
     :type base_url: str
     :param unit: A content unit.
     :type unit: dict
     :return: (url, destination)
     :rtype: tuple(2)
     """
     storage_path = unit[constants.STORAGE_PATH]
     tar_path = unit.get(constants.TARBALL_PATH)
     if not tar_path:
         # The pulp/nodes/content endpoint provides all content.
         # This replaced the publishing of individual links for each unit.
         parsed = urlparse(base_url)
         relative_path = unit[constants.RELATIVE_PATH]
         path = pathlib.join(constants.CONTENT_PATH, pathlib.quote(relative_path))
         base_url = ParseResult(
             scheme=parsed.scheme,
             netloc=parsed.netloc,
             path=path,
             params=parsed.params,
             query=parsed.query,
             fragment=parsed.fragment)
         return base_url.geturl(), storage_path
     else:
         return pathlib.url_join(base_url, pathlib.quote(tar_path)),\
             pathlib.join(os.path.dirname(storage_path), os.path.basename(tar_path))
コード例 #3
0
def build_url(host, scheme=None, port=None):
    """
    Build a valid URL. IPv6 addresses specified in host will be enclosed in brackets
    automatically.

    >>> build_url('example.com', 'https', 443)
    'https://example.com:443'

    >>> build_url(host='example.com', port=443)
    '//example.com:443'

    >>> build_url('fce:9af7:a667:7286:4917:b8d3:34df:8373', port=80, scheme='http')
    'http://[fce:9af7:a667:7286:4917:b8d3:34df:8373]:80'

    :param scheme: The scheme, e.g. http, https or ftp.
    :type scheme: str
    :param host: Consisting of either a registered name (including but not limited to
                 a hostname) or an IP address.
    :type host: str
    :type port: int
    :rtype: str
    """
    netloc = host if not is_valid_ipv6_address(host) else '[{}]'.format(host)
    if port:
        netloc += ':{}'.format(port)
    pr = ParseResult(scheme=scheme,
                     netloc=netloc,
                     path='',
                     params='',
                     query='',
                     fragment='')
    return pr.geturl()
コード例 #4
0
    def url_for_first_service_with_name(self, name, scheme=None):
        locator = self.credentials_locator
        find = locator.find_credentials_for_first_service_with_name
        credentials = find(name)
        if not credentials:
            return None

        if 'uri' in credentials:
            return credentials['uri']

        scheme = scheme or ''
        username = credentials.get('username', None)
        hostname = credentials.get('hostname', None)
        password = credentials.get('password', None)
        username_password = ''
        if username or password:
            username_password = "******" % (username or '',
                                            password or '')
        user_pass_and_hostname = '%s%s' % (username_password, hostname)

        port = credentials.get('port', None)
        port = '' if not port else ':%s' % port
        netloc = '%s%s' % (user_pass_and_hostname, port)
        parse_result = ParseResult(scheme=scheme,
                                   netloc=netloc,
                                   path='',
                                   params='',
                                   query='',
                                   fragment='')
        return parse_result.geturl()
コード例 #5
0
ファイル: users.py プロジェクト: qisanstudio/qstudio-core
def url_build(scheme='http', netloc='', path='',
              params='', query='', fragment=''):
    with app.app_context():
        netloc=app.config['SERVER_NAME']
    u = ParseResult(scheme=scheme, netloc=netloc, path=path,
                    params=params, query=query, fragment=fragment)

    return u.geturl()
コード例 #6
0
 def pre_resolve_request(self, request):
     uri = urlparse(request.url)
     ip, port = CachingResolver.get(uri.hostname, uri.port or 80)
     if uri.scheme == "https": port = 443
     request.headers["Host"] = uri.hostname
     pr = ParseResult(uri.scheme, "%s:%s" % (ip, port), uri.path,
                      uri.params, uri.query, uri.fragment)
     request.url = pr.geturl()
     return request
コード例 #7
0
 def __init__(self, delay=5):
     super(DetailListCrawler, self).__init__(delay=delay)
     with codecs.open(
             os.path.realpath(
                 os.path.join(os.getcwd(), u'url_list_pagenum.csv')), u'rb',
             u'utf-8') as csvfile:
         csvreader = csv.reader(csvfile, delimiter=',', quotechar='"')
         self.queries = OrderedDict()
         for row in csvreader:
             parsed = urlparse(row[0])
             query = OrderedDict(parse_qsl(parsed.query))
             for page in xrange(int(row[1])):
                 query[u'pg'] = page + 1
                 newParsed = ParseResult(parsed.scheme,
                                         parsed.netloc, parsed.path, u'',
                                         urlencode(query), u'')
                 self.queries[newParsed.geturl()] = row
                 self.crawl_urls.append(newParsed.geturl())
コード例 #8
0
ファイル: GetKeySection.py プロジェクト: mweinelt/geysigning
 def download_key_http(self, address, port):
     url = ParseResult(
         scheme='http',
         # This seems to work well enough with both IPv6 and IPv4
         netloc="[[%s]]:%d" % (address, port),
         path='/',
         params='',
         query='',
         fragment='')
     return requests.get(url.geturl()).text
コード例 #9
0
ファイル: Sections.py プロジェクト: andreimacavei/geysigning
 def download_key_http(self, address, port):
     url = ParseResult(
         scheme='http',
         # This seems to work well enough with both IPv6 and IPv4
         netloc="[[%s]]:%d" % (address, port),
         path='/',
         params='',
         query='',
         fragment='')
     return requests.get(url.geturl()).text
コード例 #10
0
 def uriWithoutSuffix(self):
     from urlparse import ParseResult
     uriCopy = ParseResult(scheme=self._parsedUri.scheme,
                           netloc=self._parsedUri.netloc,
                           path=self._parsedUri.path,
                           params='',
                           query=self._parsedUri.query,
                           fragment='')
     uriCopy = quoteParseResults(uriCopy) if self._doUnquote else uriCopy
     return uriCopy.geturl()
コード例 #11
0
ファイル: sso.py プロジェクト: Yellowen/daarmaan
    def authenticate(self, request):
        """
        Check the request for authenticated user. If user is not authenticated
        then redirect user to login view.
        """

        next_url = request.GET.get("next", None)

        # Get the service name from request
        service = self._get_service(request)

        if not service:
            return HttpResponseForbidden("Invalid service")

        validator = DefaultValidation(service.key)

        try:
            next_url = urlparse(urllib.unquote(next_url).decode("utf8"))

        except AttributeError:
            if "HTTP_REFERER" in request.META:
                next_url = urlparse(request.META["REFERER"])

            else:
                next_url = urlparse(service.default_url)

        # Retreive the referer GET parameters and make a new one
        params = dict(parse_qsl(next_url[4]))

        # Does user authenticated before?
        if request.user.is_authenticated():
            logger.debug("User is authenticated.")
            # If user is authenticated in Daarmaan then a ticket
            # (user session ID) will send back to service

            # IMPORTANT: is using session id of daarmaan as ticket ok?
            ticket = request.session.session_key
            logger.debug("[TICKET]: %s" % ticket)
            params.update({'ticket': ticket,
                           "hash": validator.sign(ticket)})

        else:
            # If user is not authenticated simple ack answer will return
            logger.debug("User is NOT authenticated.")
            params.update({"ack": " "})

        next_url = ParseResult(next_url[0],
                               next_url[1],
                               next_url[2],
                               next_url[3],
                               urllib.urlencode(params),
                               next_url[5])

        next_url = next_url.geturl()
        return HttpResponseRedirect(next_url)
コード例 #12
0
ファイル: auth.py プロジェクト: skaaptjop/pymonnit
 def __call__(self, request):
     """Insert the token after the path element of the URL"""
     url = urlparse(request.url)
     new_url = ParseResult(scheme=url.scheme,
                           netloc=url.netloc,
                           path=join(url.path, self.token),
                           params=url.params,
                           query=url.query,
                           fragment=url.fragment)
     request.url = new_url.geturl()
     return request
コード例 #13
0
 def should_follow(self, response, spider):
     parsed = urlparse(response.url)
     url = ParseResult(
         parsed.scheme,
         parsed.netloc,
         parsed.path,
         parsed.params,
         None,
         None
     )
     url = url.geturl()
     return url not in spider.disallow_urls
コード例 #14
0
 def download_key_http(self, address, port):
     url = ParseResult(
         scheme='http',
         # This seems to work well enough with both IPv6 and IPv4
         netloc="[[%s]]:%d" % (address, port),
         path='/',
         params='',
         query='',
         fragment='')
     self.log.debug("Starting HTTP request")
     data = requests.get(url.geturl(), timeout=5).content
     self.log.debug("finished downloading %d bytes", len(data))
     return data
コード例 #15
0
ファイル: GetKeySection.py プロジェクト: muelli/geysigning
 def download_key_http(self, address, port):
     url = ParseResult(
         scheme='http',
         # This seems to work well enough with both IPv6 and IPv4
         netloc="[[%s]]:%d" % (address, port),
         path='/',
         params='',
         query='',
         fragment='')
     self.log.debug("Starting HTTP request")
     data = requests.get(url.geturl(), timeout=5).content
     self.log.debug("finished downloading %d bytes", len(data))
     return data
コード例 #16
0
def handle_authcode(request, client, redirection_uri, state=None):
    parts = urlparse(redirection_uri.uri)
    qparams = dict(parse_qsl(parts.query))

    user_id = authenticated_userid(request)
    auth_code = Oauth2Code(client, user_id, redirection_uri)
    db.add(auth_code)
    db.flush()

    qparams["code"] = auth_code.authcode
    if state:
        qparams["state"] = state
    parts = ParseResult(parts.scheme, parts.netloc, parts.path, parts.params, urlencode(qparams), "")
    return HTTPFound(location=parts.geturl())
コード例 #17
0
ファイル: utils.py プロジェクト: superisaac/status-engine
def url_add_query(url, **kw):
    """
    In python2.6 urlparse parses a url into a ParseResult object while
    in prior version urlparse's result is a tuple of six elements.
    """
    u = urlparse(url)
    added_query = urllib.urlencode(kw)
    query = u.query
    if u.query:
        query = added_query + '&' + query
    else:
        query = added_query
    p = ParseResult(u.scheme, u.netloc, u.path,
                    u.params, query, u.fragment)
    return p.geturl()
コード例 #18
0
def get_token_url(code):
    args = deepcopy(request.args.to_dict())
    args.update(request.view_args)
    qs = urlencode({
        'appid': config.WECHAT_APP_ID,
        'secret': config.WECHAT_APP_SECRET,
        'code': code,
        'grant_type': 'authorization_code',
    })
    o = ParseResult('https',
                    'api.weixin.qq.com',
                    '/sns/oauth2/access_token',
                    '',
                    query=qs,
                    fragment='wechat_redirect')
    return o.geturl()
コード例 #19
0
def handle_authcode(request, client, redirection_uri, state=None):
    parts = urlparse(redirection_uri.uri)
    qparams = dict(parse_qsl(parts.query))

    user_id = authenticated_userid(request)
    auth_code = Oauth2Code(client, user_id)
    db.add(auth_code)
    db.flush()

    qparams['code'] = auth_code.authcode
    if state:
        qparams['state'] = state
    parts = ParseResult(
        parts.scheme, parts.netloc, parts.path, parts.params,
        urlencode(qparams), '')
    return HTTPFound(location=parts.geturl())
コード例 #20
0
def handle_implicit(request, client, redirection_uri, state=None):
    parts = urlparse(redirection_uri.uri)
    fparams = dict(state=None)

    user_id = authenticated_userid(request)
    token = Oauth2Token(client, user_id)
    db.add(token)
    db.flush()

    fparams["access_token"] = token.access_token
    fparams["token_type"] = "bearer"
    fparams["expires_in"] = token.expires_in
    if state:
        fparams["state"] = state

    parts = ParseResult(parts.scheme, parts.netloc, parts.path, parts.params, "", urlencode(fparams))
    return HTTPFound(location=parts.geturl())
コード例 #21
0
def get_oauth_url(endpoint, state):
    args = deepcopy(request.args.to_dict())
    args.update(request.view_args)
    url = url_for(endpoint, _external=True, **args)
    qs = urlencode({
        'appid': config.WECHAT_APP_ID,
        'redirect_uri': url,
        'scope': 'snsapi_userinfo',
        'state': state,
    })
    o = ParseResult('https',
                    'open.weixin.qq.com',
                    '/connect/oauth2/authorize',
                    '',
                    query=qs,
                    fragment='wechat_redirect')
    return o.geturl()
コード例 #22
0
ファイル: repeater.py プロジェクト: wahyuhadi/pappy-proxy
    def geturl(self, include_params=True):
        params = self.params
        query = self.query
        fragment = self.fragment

        if not include_params:
            params = ""
            query = ""
            fragment = ""

        r = ParseResult(scheme=self.scheme,
                        netloc=self.netloc,
                        path=self.path,
                        params=params,
                        query=query,
                        fragment=fragment)
        return r.geturl()
コード例 #23
0
ファイル: urltools.py プロジェクト: majackson/recipe-scrapers
def rel_to_abs(start_path, relative_url):
    """converts a relative url at a specified (absolute) location
    params:
    start_path - the absolute path from which the relative 
    url is being accessed
    relative_url - the relative url on the page"""
    remove_null = lambda x: bool(x)
    parsed_start_url = urlparse(start_path)
    path_items = filter(remove_null, parsed_start_url.path.split('/'))
    path_items += [relative_url]
    new_path = '/'.join(path_items)
    parsed_abs_url = ParseResult(
                        scheme=parsed_start_url.scheme,
                        netloc=parsed_start_url.netloc,
                        path=new_path,
                        params=parsed_start_url.params,
                        query=parsed_start_url.query,
                        fragment=parsed_start_url.fragment)
    return parsed_abs_url.geturl()
コード例 #24
0
ファイル: utils.py プロジェクト: codecwatch/OSCIED
    def replace_netloc(self, netloc):
        u"""
        Replace network location of the media asset URI.

        **Example usage**

        >>> import copy
        >>> from .utils_test import CALLBACK_TEST
        >>> callback = copy.copy(CALLBACK_TEST)
        >>> callback.is_valid(True)
        True
        >>> print(callback.url)
        http://127.0.0.1/media
        >>> callback.replace_netloc(u'129.194.185.47:5003')
        >>> print(callback.url)
        http://129.194.185.47:5003/media
        """
        url = urlparse(self.url)
        url = ParseResult(url.scheme, netloc, url.path, url.params, url.query, url.fragment)
        self.url = url.geturl()
コード例 #25
0
ファイル: JobBuilder.py プロジェクト: purusharths/fts-rest
    def _populate_files(self, file_dict, f_index, shared_hashed_id):
        """
        From the dictionary file_dict, generate a list of transfers for a job
        """
        # Extract matching pairs
        pairs = []
        for source in file_dict['sources']:
            source_url = urlparse(source.strip())
            _validate_url(source_url)
            for destination in file_dict['destinations']:
                dest_url = urlparse(destination.strip())
                _validate_url(dest_url)
                pairs.append((source_url, dest_url))

        # Create one File entry per matching pair
        if self.is_bringonline:
            initial_file_state = 'STAGING'
        else:
            initial_file_state = 'SUBMITTED'

        # Multiple replica job or multihop? Then, the initial state is NOT_USED
        if len(file_dict['sources']) > 1 or self.params['multihop']:
            #if self.is_bringonline:
            #set the first as STAGING and the rest as 'NOT_USED'
            #staging_and_multihop = True
            #raise HTTPBadRequest('Staging with multiple replicas is not allowed')
            # On multiple replica job, we mark all files initially with NOT_USED
            initial_file_state = 'NOT_USED'
            # Multiple replicas, all must share the hashed-id
            if shared_hashed_id is None:
                shared_hashed_id = _generate_hashed_id()
        vo_name = self.user.vos[0]

        for source, destination in pairs:
            if len(file_dict['sources']) > 1 or not _is_dest_surl_uuid_enabled(
                    vo_name):
                dest_uuid = None
            else:
                dest_uuid = str(
                    uuid.uuid5(BASE_ID,
                               destination.geturl().encode('utf-8')))
            if self.is_bringonline:
                # add the new query parameter only for root -> EOS-CTA for now
                if source.scheme == "root":
                    query_p = parse_qsl(source.query)
                    query_p.append(
                        ('activity', file_dict.get('activity', 'default')))
                    query_str = urlencode(query_p)
                    source = ParseResult(scheme=source.scheme,
                                         netloc=source.netloc,
                                         path=source.path,
                                         params=source.params,
                                         query=query_str,
                                         fragment=source.fragment)
            f = dict(job_id=self.job_id,
                     file_index=f_index,
                     dest_surl_uuid=dest_uuid,
                     file_state=initial_file_state,
                     source_surl=source.geturl(),
                     dest_surl=destination.geturl(),
                     source_se=get_storage_element(source),
                     dest_se=get_storage_element(destination),
                     vo_name=None,
                     priority=self.job['priority'],
                     user_filesize=_safe_filesize(file_dict.get('filesize',
                                                                0)),
                     selection_strategy=file_dict.get('selection_strategy',
                                                      'auto'),
                     checksum=file_dict.get('checksum', None),
                     file_metadata=file_dict.get('metadata', None),
                     activity=file_dict.get('activity', 'default'),
                     hashed_id=shared_hashed_id
                     if shared_hashed_id else _generate_hashed_id())
            self.files.append(f)
コード例 #26
0
ファイル: requests.py プロジェクト: neuaer/python-example
class RedirectURI(Validatable):
    """
    wrapper object for the redirect_uri parameter as part of the
    authorization request
    """
    def __init__(self, uri, settings):
        self.raw_uri = uri
        self.parsed_uri = urlparse(uri)
        self.error_message = None
        self.settings = settings
        self.error_responses = settings['error_responses']['redirect_uri']

    # used in super class is_valid
    def validate(self):
        self.error_message = self.error_responses.get(self.determine_errors(), None)

    def determine_errors(self):
        if not self.is_permitted_site():
            return 'invalid'

        if not self.is_absolute():
            return 'not_absolute'


    def add_params(self, params_dict):
        new_query = urlencode(dict(params_dict, **self.parsed_query()))

        # create a new parse result with the altered query
        # TODO there has to be a better way
        self.parsed_uri = ParseResult(query=new_query,
                                      scheme=self.parsed_uri.scheme,
                                      netloc=self.parsed_uri.netloc,
                                      path=self.parsed_uri.path,
                                      params=self.parsed_uri.params,
                                      fragment=self.parsed_uri.fragment)

    # NOTE this will truncate query params that are used more than once
    # generally not a good idea
    def parsed_query(self):
        new_query = parse_qs(self.parsed_uri.query)
        for key, value in new_query.iteritems():
            new_query[key] = value if len(new_query[key]) < 1 else join(value, ",")

        return new_query


    def is_absolute(self):
        return self.parsed_uri.scheme != "" and self.parsed_uri.netloc != ""

    def is_permitted_site(self):
        # the oauth 2 spec recommends validating the redirect uri against
        # a pre defined uri to prevent an open redirect for the sake of simplicity
        # we've chosed to forgo that validation
        #
        #
        # return self.raw_uri.startswith(self.settings['redirect_site'])

        return True

    def get_url(self):
        return self.parsed_uri.geturl()
コード例 #27
0
def main(argv=None):

    c = pycurl.Curl()
    b = StringIO()

    # Filters out the server address
    dls = urlparse(argv.new_dls)

    if not argv.user:
        setattr(argv, "user",
                raw_input("Enter the digest authentication user: "******"password",
                getpass.getpass("Enter the digest authentication password: "******"Excluding URL {}\n".format(url.text)
                        continue

                    # Check whether the URL protocol or server address are correct (i.e. match the download server)...
                    if parsed.scheme != dls.scheme or parsed.netloc != dls.netloc:
                        # Change the fields corresponding the server name and the protocol
                        aux[0] = dls.scheme
                        aux[1] = dls.netloc

                    # Check whether the download path is correct
                    if path.commonprefix([parsed.path, dls.path]) != dls.path:
                        # Extract the download server "mountpoint"
                        # Matterhorn resource URLs in distributed mediapackage take the form:
                        #    distribution-channel/mediapackage-id/element-id/filename.extension
                        # , therefore, anything that is beyond these four levels in the hierarchy
                        # is a part of the download server "mountpoint"
                        for i in range(4):
                            aux[2] = path.dirname(aux[2])
                        # Remove the "mountpoint" from the resource's path and add the final "mountpoint" from the server
                        aux[2] = path.join(dls.path,
                                           path.relpath(parsed.path, aux[2]))

                    new_url = ParseResult(*aux)
                    if new_url != parsed:
                        url.text = new_url.geturl()
                        print "In:  {}\nOut: {}\n".format(
                            parsed.geturl(), url.text)
                        if not modified:
                            modified = True
                    else:
                        print "URL {} NOT modified".format(url.text)

                # Overwrite the mediapackage in the index
                if modified:
                    # Upload the mediapackage back to the search index (overwriting the old version)
                    curl(etree.tostring(mp, encoding="UTF-8"), argv.search_url,
                         argv.add_endpoint, argv.user, argv.password)
                else:
                    pass

        else:
            print b.getvalue()
    except pycurl.error as err:
        raise RuntimeError(c.errstr())
    except Exception as exc:
        print type(exc), exc
        raise
    finally:
        c.close()
        b.close()
コード例 #28
0
def parts_to_url(scheme, netloc, path, params, 
                 query, fragment):
    p = ParseResult(scheme, netloc, path,
                    params, query, fragment)
    return p.geturl()