Beispiel #1
0
 def to_url(self):
     """Serialize as a URL for a GET request."""
     base_url = urlparse.urlparse(self.url)
     try:
         query = base_url.query
     except AttributeError:
         # must be python <2.5
         query = base_url[4]
     query = parse_qs(query)
     for k, v in self.items():
         query.setdefault(k, []).append(v)
     
     try:
         scheme = base_url.scheme
         netloc = base_url.netloc
         path = base_url.path
         params = base_url.params
         fragment = base_url.fragment
     except AttributeError:
         # must be python <2.5
         scheme = base_url[0]
         netloc = base_url[1]
         path = base_url[2]
         params = base_url[3]
         fragment = base_url[5]
     
     try:
         url = (scheme, netloc, path, params,
                urllib.urlencode(query, True), fragment)
         return urllib.urlunparse(url)
     except AttributeError:
         url = (scheme, netloc, path, params,
                urlparse.urlencode(query, True), fragment)
         return urlparse.urlunparse(url)
 def get_bookmarks_by_uri(self, uri):
     parsed = _parse_uri(uri)
     uri = urllib.urlunparse(parsed[:3] + ('', '', ''))
     try:
         return tuple(self.__node_map[uri])
     except KeyError:
         return ()
Beispiel #3
0
    def url2key(self, url, mode, params):
        """Normalize a URL for use as a caching key.

        - change the hostname to all lowercase
        - remove the port if it is the scheme's default port
        - reformat the port using %d
        - get rid of the fragment identifier

        """
        scheme, netloc, path, params, query, fragment = urlparse(url)
        i = str.find(netloc, '@')
        if i > 0:
            userpass = netloc[:i]
            netloc = netloc[i+1:]    # delete the '@'
        else:
            userpass = ""
        scheme = str.lower(scheme)
        netloc = str.lower(netloc)
        i = str.find(netloc, ':')
        if i >= 0:
            try:
                port = int(netloc[i+1:])
            except ValueError:
                port = None
        else:
            port = None
        if scheme == 'http' and port == 80:
            netloc = netloc[:i]
        elif type(port) == type(0):
            netloc = netloc[:i] + ":%d" % port
        return urllib.urlunparse((scheme, netloc, path, params, query, ""))
Beispiel #4
0
 def register_id(self, name):
     """Add page number of element start to internal database."""
     (scheme, netloc, path, params, query, fragment) = \
              urlparse(self.context.get_url())
     netloc = str.lower(netloc)
     url = urllib.urlunparse(
         (scheme, netloc, path, params, query, name))
     pageno = self.formatter.writer.ps.get_pageno()
     self._set_docinfo(url, pageno, '')
Beispiel #5
0
    def _make_url(row, band):

        parse = urlparse(
            'http://storage.googleapis.com/gcp-public-data-landsat/LC08/01/037/029/'
            'LC08_L1TP_037029_20130101_20190131_01_T1/LC08_L1TP_037029_20130101_20190131_01_T1_B9.TIF'
        )

        base = row.BASE_URL.replace('gs://', '')
        path = '{}/{}_{}'.format(base, row.PRODUCT_ID, band)
        url = urlunparse([parse.scheme, parse.netloc, path, '', '', ''])
        return url
Beispiel #6
0
def format_url(template, params={}, **kwparams):
    args = {**get_formatdata(params), **kwparams}
    parts = list(urllib.urlparse(template))

    for i in range(0, len(parts)):
        if i == 4:
            parts[i] = QuerystringFormatter().format(parts[i], **args)
        else:
            parts[i] = format(parts[i], **args)

    return urllib.urlunparse(parts)
 def start_Bookmark(self, node):
     new_node = nodes.Bookmark()
     self.add_node(new_node)
     self.add_describable(new_node, node)
     uri = node.uri()
     new_node.set_uri(uri)
     new_node.set_last_modified(node.last_modified())
     new_node.set_last_visited(node.last_visited())
     key = urllib.urlunparse(_parse_uri(uri)[:3] + ('', '', ''))
     try:
         self.__node_map[key].append(new_node)
     except KeyError:
         self.__node_map[key] = [new_node]
Beispiel #8
0
 def wrapper(*args, **kwargs):
     if args:
         args = list(args)
         first_arg = args.pop(0)
         if not kwargs.get("path"):
             if isinstance(first_arg, UniversalPath):
                 first_arg = str(first_arg)
                 args.insert(0, first_arg)
             args = tuple(args)
         else:
             new_url = self._url.replace(path=kwargs["path"])
             unparsed = urllib.urlunparse(new_url)
             kwargs["path"] = unparsed
     return func(*args, **kwargs)
def norm_uri(uri):
    scheme, netloc, path, params, query, fragment \
            = urlparse(uri)
    if scheme == "http" and ':' in netloc:
        loc = str.split(netloc, ':')
        try:
            port = int(loc[-1], 10)
        except:
            pass
        else:
            if port == 80:
                del loc[-1]
                netloc = str.join(loc, ':')
    return urllib.urlunparse(
        (scheme, str.lower(netloc), path, params, query, fragment))
 def build_info(self, node):
     node_map = {}
     id_map = {}
     ref_map = {}
     need_ids = []
     queue = [node]
     while queue:
         node = queue[0]
         del queue[0]
         nodetype = node.get_nodetype()
         if nodetype == "Bookmark":
             id = node.id()
             if id_map.has_key(id):
                 raise NodeIDError("duplicate ID found: " + repr(id))
             if id:
                 id_map[id] = node
                 if id in need_ids:
                     need_ids.remove(id)
             uri = node.uri()
             key = urllib.urlunparse(_parse_uri(uri)[:3] + ('', '', ''))
             try:
                 node_map[key].append(node)
             except KeyError:
                 node_map[key] = [node]
         elif nodetype == "Folder":
             id = node.id()
             if id_map.has_key(id):
                 raise NodeIDError("duplicate ID found: " + repr(id))
             if id:
                 id_map[id] = node
                 if id in need_ids:
                     need_ids.remove(id)
             # add child nodes to the end of the queue
             queue[len(queue):] = node.children()
         elif nodetype == "Alias":
             idref = node.idref()
             if not id_map.has_key(idref):
                 need_ids.append(idref)
             try:
                 ref_map[idref].append(node)
             except KeyError:
                 ref_map[idref] = [node]
     if need_ids:
         raise NodeIDError("Could not locate IDs", need_ids)
     return node_map, id_map, ref_map
    def restart(self, url):
        self.maxrestarts = self.maxrestarts - 1

        self.viewer = self.last_context.viewer
        self.app = self.last_context.app

        self.parser = None

        tuple = urlparse(url)
        # it's possible that the url send in a 301 or 302 error is a
        # relative URL.  if there's no scheme or netloc in the
        # returned tuple, try joining the URL with the previous URL
        # and retry parsing it.
        if not (tuple[0] and tuple[1]):
            url = urllib.urljoin(self.url, url)
            tuple = urlparse(url)
        self.url = url

        self.fragment = tuple[-1]
        tuple = tuple[:-1] + ("",)
        if self.user_passwd:
            netloc = tuple[1]
            i = str.find(netloc, '@')
            if i >= 0: netloc = netloc[i+1:]
            netloc = self.user_passwd + '@' + netloc
            tuple = (tuple[0], netloc) + tuple[2:]
        realurl = urllib.urlunparse(tuple)

        # Check first to see if the previous Context has any protocol handlers
        api = self.last_context.get_local_api(realurl, self.method,
                                              self.params)
        if not api:
            if self.app:
                api = self.app.open_url(realurl,
                                        self.method, self.params, self.reload,
                                        data=self.data)
            else:
                import protocols
                api = protocols.protocol_access(realurl,
                                                self.method, self.params,
                                                data=self.data)

        BaseReader.__init__(self, self.last_context, api)
 def __call__(self, url, attrs):
     scheme, netloc, path, params, query, frag = urlparse(url)
     if params or query:  # safety restraint
         return url
     netloc = str.lower(netloc)
     if scheme != self.__scheme or netloc != self.__netloc:
         return url
     # check the paths:
     stored_url = urllib.urlunparse((scheme, netloc, path, '', '', ''))
     if self.__docs.has_key(stored_url):
         return url
     if len(path) < len(self.__path):
         return url
     if path[:len(self.__path)] != self.__path:
         return url
     if (not self.__max_levels) \
        or (self.__max_levels and self.__level < self.__max_levels):
         self.__docs[stored_url] = self.__level + 1
         self.insert(stored_url)
     return url
 def __init__(self, master, context, class_="DocumentInfo"):
     root = tktools.make_toplevel(master,
                                  class_=class_,
                                  title="Document Info")
     self.root = root
     self.app = context.app
     page_title = context.page.title()
     if page_title:
         root.title("Document Info: " + page_title)
     destroy = self.destroy
     for seq in ("<Alt-W>", "<Alt-w>", "<Return>"):
         root.bind(destroy)
     root.protocol("WM_DELETE_WINDOW", destroy)
     frame, self.__topfr, botfr = tktools.make_double_frame(root)
     #
     # Info display
     #
     url = context.page.url()
     scheme, netloc, path, params, query, fragment = urlparse(url)
     url = urllib.urlunparse((scheme, netloc, path, '', '', ''))
     self.add_label_field("Title", page_title or "(unknown)", "title")
     self.add_label_field("URI", url, "uri")
     if fragment:
         self.add_label_field("Fragment", fragment, "fragment")
     headers = context.get_headers()
     if headers.has_key("date") and type(headers["date"]) is type(self):
         self.add_label_field("", "(Loaded from local cache.)", "cached")
     items = headers.items()
     items.sort()
     s = ""
     for k, v in items:
         if k == 'date' and type(v) is type(self):
             from . import ht_time
             v = ht_time.unparse(v.get_secs())
         s = "%s%s:\t%s\n" % (s, k, v)
     stretch = self.add_text_field("Response headers", s, "headers")
     if query:
         query = str.translate(query, FIELD_BREAKER)
         stretch = stretch or \
                   self.add_text_field("Query fields", query, "query")
     postdata = context.get_postdata()
     if postdata:
         postdata = str.translate(postdata, FIELD_BREAKER)
         stretch = stretch or \
                   self.add_text_field("POST fields", postdata, "postdata")
     #
     # Bottom button
     #
     fr = tkinter.Frame(botfr, borderwidth=1, relief=tkinter.SUNKEN)
     fr.pack()
     btn = tkinter.Button(fr, text="OK", command=destroy)
     # '2m' is the value from the standard Tk 'tk_dialog' command
     btn.pack(padx='2m', pady='2m')
     btn.focus_set()
     #
     del self.__topfr  # loose the reference
     tktools.set_transient(root, master)
     root.update_idletasks()
     reqwidth = root.winfo_reqwidth()
     reqheight = root.winfo_reqheight()
     root.minsize(reqwidth, reqheight)
     if not stretch:
         root.maxsize(reqwidth, reqheight)
print(response.read())

from urllib.parse import urlparse
from urllib.parse import urljoin

# url拆分
# urllib.parse.urlparse(urlstring, scheme='', allow_fragments=True)
result = urlparse('http://www.baidu.com/index.html;user?id=5#comment')
# 指定请求协议,当然如果前面有,不会强求
result = urlparse('www.baidu.com/index.html;user?id=5#comment', scheme='https')
#  ParseResult(scheme='http', netloc='www.baidu.com', path='/index.html', params='user', query='id=5', fragment='comment')
print(type(result), result)

# url合并
data = ['http', 'www.baidu.com', 'index.html', 'user', 'a=6', 'comment']
print(urllib.urlunparse(data))

# url合并:以后面为主,后面有的看后面,后面没有的前面补
# 少域名的时候可用,
print(urljoin('http://www.baidu.com', 'FAQ.html'))
print(urljoin('http://www.baidu.com', 'https://cuiqingcai.com/FAQ.html'))
print(
    urljoin('http://www.baidu.com/about.html',
            'https://cuiqingcai.com/FAQ.html'))
print(
    urljoin('http://www.baidu.com/about.html',
            'https://cuiqingcai.com/FAQ.html?question=2'))
print(
    urljoin('http://www.baidu.com?wd=abc', 'https://cuiqingcai.com/index.php'))
print(urljoin('http://www.baidu.com', '?category=2#comment'))
print(urljoin('www.baidu.com', '?category=2#comment'))
 def __make_node_key(self, node):
     parsed = _parse_uri(node.uri())[:3] + ('', '', '')
     return urllib.urlunparse(parsed)
Beispiel #16
0
 def __init__(self, master, address, data):
     # query semantics may be used to identify header field values
     scheme, netloc, path, params, query, fragment = urllib(address)
     address = urlunparse((scheme, netloc, path, '', '', ''))
     headers = cgi.parse_qs(query)
     # create widgets
     self.master = master
     self.root = tktools.make_toplevel(self.master, title="Mail Dialog")
     self.root.protocol("WM_DELETE_WINDOW", self.cancel_command)
     self.root.bind("<Alt-w>", self.cancel_command)
     self.root.bind("<Alt-W>", self.cancel_command)
     fr, top, botframe = tktools.make_double_frame(self.root)
     self.text, fr = tktools.make_text_box(top, 80, 24)
     self.text.tag_config('SUSPICIOUS_HEADER', foreground='red')
     self.send_button = Button(botframe,
                               text="Send",
                               command=self.send_command)
     self.send_button.pack(side=LEFT)
     self.cancel_button = Button(botframe,
                                 text="Cancel",
                                 command=self.cancel_command)
     self.cancel_button.pack(side=RIGHT)
     tktools.unify_button_widths(self.send_button, self.cancel_button)
     hinfo = _make_sequence_dict(COMMON_HEADERS)
     variables = {
         'to': address,
         'subject': data and 'Form posted from Grail' or '',
         'mime-version': '1.0',
         'x-mailer': GRAILVERSION,
         'x-url': LAST_CONTEXT and LAST_CONTEXT.get_baseurl() or ''
     }
     if data:
         variables["content-type"] = "application/x-www-form-urlencoded"
     else:
         variables["content-type"] = "text/plain; charset=us-ascii"
         variables["content-transfer-encoding"] = "7bit"
     # move default set of query'd headers into variables
     for header, vlist in headers.items():
         header = str.lower(header)
         if header != 'body':
             if header not in DISALLOWED_HEADERS:
                 variables[header] = vlist[0]  # toss duplicates
                 if not hinfo.has_key(header):
                     hinfo[header] = 15
             del headers[header]
     # insert user-specified extra headers
     variables = self.add_user_headers(variables)
     for header in variables.keys():
         if not hinfo.has_key(header):
             hinfo[header] = 19
     # write the headers into the buffer
     variables['date'] = time.ctime(time.time())
     hseq = _make_dict_sequence(hinfo)
     for x, header in hseq:
         if variables.has_key(header):
             s = "%s: %s\n" \
                 % (str.title(header, '-'), variables[header])
             self.text.insert(END, s)
     # insert newline
     self.text.insert(END, '\n', ())
     # insert data
     if data:
         self.text.insert(END, data)
     elif headers.has_key('body'):
         self.text.insert(END, headers['body'][0] + '\n')
     else:
         self.add_user_signature()
     self.text.focus_set()
Beispiel #17
0
 def set_url(self, url):
     parsed = urlparse(url)[:3] + ('', '', '')
     self._url_cooked = cook(urllib.urlunparse(parsed))