Esempio n. 1
0
def urljoin_rfc(base, ref, encoding='utf-8'):
    r"""
    .. warning::

        This function is deprecated and will be removed in future.
        Please use ``urlparse.urljoin`` instead.

    Same as urlparse.urljoin but supports unicode values in base and ref
    parameters (in which case they will be converted to str using the given
    encoding).

    Always returns a str.

    >>> import w3lib.url
    >>> w3lib.url.urljoin_rfc('http://www.example.com/path/index.html', u'/otherpath/index2.html')
    'http://www.example.com/otherpath/index2.html'
    >>>

    >>> w3lib.url.urljoin_rfc('http://www.example.com/path/index.html', u'fran\u00e7ais/d\u00e9part.htm')
    'http://www.example.com/path/fran\xc3\xa7ais/d\xc3\xa9part.htm'
    >>>


    """

    warnings.warn("w3lib.url.urljoin_rfc is deprecated, use urlparse.urljoin instead",
        DeprecationWarning)

    str_base = unicode_to_str(base, encoding)
    str_ref = unicode_to_str(ref, encoding)
    return moves.urllib.parse.urljoin(str_base, str_ref)
Esempio n. 2
0
def urljoin_rfc(base, ref, encoding='utf-8'):
    """Same as urlparse.urljoin but supports unicode values in base and ref
    parameters (in which case they will be converted to str using the given
    encoding).

    Always returns a str.
    """
    return urlparse.urljoin(unicode_to_str(base, encoding), \
        unicode_to_str(ref, encoding))
Esempio n. 3
0
def urljoin_rfc(base, ref, encoding="utf-8"):
    """Same as urlparse.urljoin but supports unicode values in base and ref
    parameters (in which case they will be converted to str using the given
    encoding).

    Always returns a str.
    """
    warnings.warn("w3lib.url.urljoin_rfc is deprecated, use urlparse.urljoin instead", DeprecationWarning)
    return urlparse.urljoin(unicode_to_str(base, encoding), unicode_to_str(ref, encoding))
Esempio n. 4
0
def urljoin_rfc(base, ref, encoding='utf-8'):
    """Same as urlparse.urljoin but supports unicode values in base and ref
    parameters (in which case they will be converted to str using the given
    encoding).

    Always returns a str.
    """
    warnings.warn("w3lib.url.urljoin_rfc is deprecated, use urlparse.urljoin instead",
        DeprecationWarning)
    return urlparse.urljoin(unicode_to_str(base, encoding), \
        unicode_to_str(ref, encoding))
Esempio n. 5
0
 def __repr__(self):
     self.date = self.date or time.ctime()
     ret = u"%s\n" % self.date
     ret += u"-" * len(self.date) + "\n"
     ordered_indexes = sorted(self.indexes,
                              key=lambda x: get_index_order(x["name"]))
     for index in ordered_indexes:
         ret += u"%(name)s: %(value)s%(unit)s" % index
         if get_index_class(index["name"]) in ["commodity", "stock"
                                               ] and self.gold_price:
             ret += u"(%.5fozAu)" % (index["value"] / self.gold_price)
         old_index = self.indexes_old.get(index["name"])
         if old_index:
             ts = old_index["timestamp"]
             date = "%s %s" % (_MONTHS[ts.month - 1], ts.day)
             if self.show_year:
                 date += " %s" % ts.year
             ret += u" [%s: %s%s" % (date, old_index["value"],
                                     old_index["unit"])
             if get_index_class(old_index["name"]) in [
                     "commodity", "stock"
             ] and self.gold_price_old:
                 ret += u" (%.5fozAu)" % (old_index["value"] /
                                          self.gold_price_old)
             ret += "]"
         ret += u"\n"
     return unicode_to_str(ret)
Esempio n. 6
0
def get_meta_refresh(text, baseurl='', encoding='utf-8'):
    """Return  the http-equiv parameter of the HTML meta element from the given
    HTML text and return a tuple ``(interval, url)`` where interval is an integer
    containing the delay in seconds (or zero if not present) and url is a
    string with the absolute url to redirect.

    If no meta redirect is found, ``(None, None)`` is returned.

    """

    if six.PY2:
        baseurl = unicode_to_str(baseurl, encoding)
    try:
        text = str_to_unicode(text, encoding)
    except UnicodeDecodeError:
        print(text)
        raise
    text = remove_comments(replace_entities(text))
    m = _meta_refresh_re.search(text)
    if m:
        interval = float(m.group('int'))
        url = safe_url_string(m.group('url').strip(' "\''), encoding)
        url = moves.urllib.parse.urljoin(baseurl, url)
        return interval, url
    else:
        return None, None
Esempio n. 7
0
def encode_multipart(data):
    """Encode the given data to be used in a multipart HTTP POST. Data is a
    where keys are the field name, and values are either strings or tuples
    (filename, content) for file uploads.

    This code is based on distutils.command.upload.

    Return (body, boundary) tuple where ``body`` is binary body value,
    and ``boundary`` is the boundary used (as native string).
    """
    # Build up the MIME payload for the POST data
    boundary = '--------------GHSKFJDLGDS7543FJKLFHRE75642756743254'
    sep_boundary = b'\r\n--' + boundary.encode('ascii')
    end_boundary = sep_boundary + b'--'
    body = BytesIO()
    for key, value in data.items():
        title = u'\r\nContent-Disposition: form-data; name="%s"' % key
        # handle multiple entries for the same name
        if type(value) != type([]):
            value = [value]
        for value in value:
            if type(value) is tuple:
                title += u'; filename="%s"' % value[0]
                value = value[1]
            else:
                value = unicode_to_str(
                    value)  # in distutils: str(value).encode('utf-8')
            body.write(sep_boundary)
            body.write(title.encode('utf-8'))
            body.write(b"\r\n\r\n")
            body.write(value)
    body.write(end_boundary)
    body.write(b"\r\n")
    return body.getvalue(), boundary
Esempio n. 8
0
File: html.py Progetto: fubuki/w3lib
def get_meta_refresh(text, baseurl='', encoding='utf-8'):
    """Return  the http-equiv parameter of the HTML meta element from the given
    HTML text and return a tuple ``(interval, url)`` where interval is an integer
    containing the delay in seconds (or zero if not present) and url is a
    string with the absolute url to redirect.

    If no meta redirect is found, ``(None, None)`` is returned.

    """

    if six.PY2:
        baseurl = unicode_to_str(baseurl, encoding)
    try:
        text = str_to_unicode(text, encoding)
    except UnicodeDecodeError:
        print(text)
        raise
    text = remove_comments(remove_entities(text))
    m = _meta_refresh_re.search(text)
    if m:
        interval = float(m.group('int'))
        url = safe_url_string(m.group('url').strip(' "\''), encoding)
        url = moves.urllib.parse.urljoin(baseurl, url)
        return interval, url
    else:
        return None, None
Esempio n. 9
0
def encode_multipart(data):
    """Encode the given data to be used in a multipart HTTP POST. Data is a
    where keys are the field name, and values are either strings or tuples
    (filename, content) for file uploads.

    This code is based on distutils.command.upload.

    Return (body, boundary) tuple where ``body`` is binary body value,
    and ``boundary`` is the boundary used (as native string).
    """
    # Build up the MIME payload for the POST data
    boundary = '--------------GHSKFJDLGDS7543FJKLFHRE75642756743254'
    sep_boundary = b'\r\n--' + boundary.encode('ascii')
    end_boundary = sep_boundary + b'--'
    body = BytesIO()
    for key, value in data.items():
        title = u'\r\nContent-Disposition: form-data; name="%s"' % key
        # handle multiple entries for the same name
        if type(value) != type([]):
            value = [value]
        for value in value:
            if type(value) is tuple:
                title += u'; filename="%s"' % value[0]
                value = value[1]
            else:
                value = unicode_to_str(value)  # in distutils: str(value).encode('utf-8')
            body.write(sep_boundary)
            body.write(title.encode('utf-8'))
            body.write(b"\r\n\r\n")
            body.write(value)
    body.write(end_boundary)
    body.write(b"\r\n")
    return body.getvalue(), boundary
Esempio n. 10
0
def encode_multipart(data):
    r"""

    .. warning::

        This function is deprecated and will be removed in future.
        Please use ``urllib3.filepost.encode_multipart_formdata`` instead.

    Encode the given data to be used in a multipart HTTP POST.

    `data` is a dictionary where keys are the field name, and values are
    either strings or tuples as `(filename, content)` for file uploads.

    This code is based on :class:`distutils.command.upload`.

    Returns a `(body, boundary)` tuple where `body` is binary body value,
    and `boundary` is the boundary used (as native string).

    >>> import w3lib.form
    >>> w3lib.form.encode_multipart({'key': 'value'})
    ('\r\n----------------GHSKFJDLGDS7543FJKLFHRE75642756743254\r\nContent-Disposition: form-data; name="key"\r\n\r\nvalue\r\n----------------GHSKFJDLGDS7543FJKLFHRE75642756743254--\r\n', '--------------GHSKFJDLGDS7543FJKLFHRE75642756743254')
    >>> w3lib.form.encode_multipart({'key1': 'value1', 'key2': 'value2'})   # doctest: +SKIP
    ('\r\n----------------GHSKFJDLGDS7543FJKLFHRE75642756743254\r\nContent-Disposition: form-data; name="key2"\r\n\r\nvalue2\r\n----------------GHSKFJDLGDS7543FJKLFHRE75642756743254\r\nContent-Disposition: form-data; name="key1"\r\n\r\nvalue1\r\n----------------GHSKFJDLGDS7543FJKLFHRE75642756743254--\r\n', '--------------GHSKFJDLGDS7543FJKLFHRE75642756743254')
    >>> w3lib.form.encode_multipart({'somekey': ('path/to/filename', b'\xa1\xa2\xa3\xa4\r\n\r')})
    ('\r\n----------------GHSKFJDLGDS7543FJKLFHRE75642756743254\r\nContent-Disposition: form-data; name="somekey"; filename="path/to/filename"\r\n\r\n\xa1\xa2\xa3\xa4\r\n\r\r\n----------------GHSKFJDLGDS7543FJKLFHRE75642756743254--\r\n', '--------------GHSKFJDLGDS7543FJKLFHRE75642756743254')
    >>>

    """

    warnings.warn(
        "`w3lib.form.encode_multipart` function is deprecated and "
        "will be removed in future releases. Please use "
        "`urllib3.filepost.encode_multipart_formdata` instead.",
        DeprecationWarning
    )

    # Build up the MIME payload for the POST data
    boundary = '--------------GHSKFJDLGDS7543FJKLFHRE75642756743254'
    sep_boundary = b'\r\n--' + boundary.encode('ascii')
    end_boundary = sep_boundary + b'--'
    body = BytesIO()
    for key, value in data.items():
        title = u'\r\nContent-Disposition: form-data; name="%s"' % key
        # handle multiple entries for the same name
        if type(value) != type([]):
            value = [value]
        for value in value:
            if type(value) is tuple:
                title += u'; filename="%s"' % value[0]
                value = value[1]
            else:
                value = unicode_to_str(value)  # in distutils: str(value).encode('utf-8')
            body.write(sep_boundary)
            body.write(title.encode('utf-8'))
            body.write(b"\r\n\r\n")
            body.write(value)
    body.write(end_boundary)
    body.write(b"\r\n")
    return body.getvalue(), boundary
Esempio n. 11
0
def encode_multipart(data):
    r"""

    .. warning::

        This function is deprecated and will be removed in future.
        Please use ``urllib3.filepost.encode_multipart_formdata`` instead.

    Encode the given data to be used in a multipart HTTP POST.

    `data` is a dictionary where keys are the field name, and values are
    either strings or tuples as `(filename, content)` for file uploads.

    This code is based on :class:`distutils.command.upload`.

    Returns a `(body, boundary)` tuple where `body` is binary body value,
    and `boundary` is the boundary used (as native string).

    >>> import w3lib.form
    >>> w3lib.form.encode_multipart({'key': 'value'})
    ('\r\n----------------GHSKFJDLGDS7543FJKLFHRE75642756743254\r\nContent-Disposition: form-data; name="key"\r\n\r\nvalue\r\n----------------GHSKFJDLGDS7543FJKLFHRE75642756743254--\r\n', '--------------GHSKFJDLGDS7543FJKLFHRE75642756743254')
    >>> w3lib.form.encode_multipart({'key1': 'value1', 'key2': 'value2'})   # doctest: +SKIP
    ('\r\n----------------GHSKFJDLGDS7543FJKLFHRE75642756743254\r\nContent-Disposition: form-data; name="key2"\r\n\r\nvalue2\r\n----------------GHSKFJDLGDS7543FJKLFHRE75642756743254\r\nContent-Disposition: form-data; name="key1"\r\n\r\nvalue1\r\n----------------GHSKFJDLGDS7543FJKLFHRE75642756743254--\r\n', '--------------GHSKFJDLGDS7543FJKLFHRE75642756743254')
    >>> w3lib.form.encode_multipart({'somekey': ('path/to/filename', b'\xa1\xa2\xa3\xa4\r\n\r')})
    ('\r\n----------------GHSKFJDLGDS7543FJKLFHRE75642756743254\r\nContent-Disposition: form-data; name="somekey"; filename="path/to/filename"\r\n\r\n\xa1\xa2\xa3\xa4\r\n\r\r\n----------------GHSKFJDLGDS7543FJKLFHRE75642756743254--\r\n', '--------------GHSKFJDLGDS7543FJKLFHRE75642756743254')
    >>>

    """

    warnings.warn(
        "`w3lib.form.encode_multipart` function is deprecated and "
        "will be removed in future releases. Please use "
        "`urllib3.filepost.encode_multipart_formdata` instead.",
        DeprecationWarning)

    # Build up the MIME payload for the POST data
    boundary = '--------------GHSKFJDLGDS7543FJKLFHRE75642756743254'
    sep_boundary = b'\r\n--' + boundary.encode('ascii')
    end_boundary = sep_boundary + b'--'
    body = BytesIO()
    for key, value in data.items():
        title = u'\r\nContent-Disposition: form-data; name="%s"' % key
        # handle multiple entries for the same name
        if type(value) != type([]):
            value = [value]
        for value in value:
            if type(value) is tuple:
                title += u'; filename="%s"' % value[0]
                value = value[1]
            else:
                value = unicode_to_str(
                    value)  # in distutils: str(value).encode('utf-8')
            body.write(sep_boundary)
            body.write(title.encode('utf-8'))
            body.write(b"\r\n\r\n")
            body.write(value)
    body.write(end_boundary)
    body.write(b"\r\n")
    return body.getvalue(), boundary
Esempio n. 12
0
File: html.py Progetto: xacprod/ve1
def get_base_url(text, baseurl='', encoding='utf-8'):
    """Return the base url if declared in the given html text, relative to the
    given base url. If no base url is found, the given base url is returned
    """
    text = str_to_unicode(text, encoding)
    baseurl = unicode_to_str(baseurl, encoding)
    m = _baseurl_re.search(text)
    if m:
        baseurl = urljoin(baseurl, m.group(1).encode(encoding))
    return safe_url_string(baseurl)
Esempio n. 13
0
def safe_url_string(url, encoding='utf8'):
    """Convert the given url into a legal URL by escaping unsafe characters
    according to RFC-3986.

    If a unicode url is given, it is first converted to str using the given
    encoding (which defaults to 'utf-8'). When passing a encoding, you should
    use the encoding of the original page (the page from which the url was
    extracted from).

    Calling this function on an already "safe" url will return the url
    unmodified.

    Always returns a str.
    """
    s = unicode_to_str(url, encoding)
    return urllib.quote(s,  _safe_chars)
Esempio n. 14
0
    def __repr__(self):
        self.date = self.date or time.ctime()
        ret = u"%s\n" % self.date
        ret += u"-" * len(self.date) + "\n"
        ordered_items = sorted(self.items, key=lambda x: x["name"])
        for item in ordered_items:
            ret += self.lformat % item
            if self.olformat:
                old_item = self.items_old.get(item["name"])
                if old_item:
                    ts = old_item["timestamp"]
                    date = "%s %s" % (_MONTHS[ts.month - 1], ts.day)
                    if self.show_year:
                        date += " %s" % ts.year
                    ret += u" [%s: " % date + self.olformat % old_item + "]"
            ret += "\n"

        return unicode_to_str(ret)
Esempio n. 15
0
    def __repr__(self):
        self.date = self.date or time.ctime()
        ret = u"%s\n" % self.date
        ret += u"-" * len(self.date) + "\n"
        ordered_items = sorted(self.items, key=lambda x: x["name"])
        for item in ordered_items:
            ret += self.lformat % item
            if self.olformat:
                old_item = self.items_old.get(item["name"])
                if old_item:
                    ts = old_item["timestamp"]
                    date = "%s %s" % (_MONTHS[ts.month - 1], ts.day)
                    if self.show_year:
                        date += " %s" % ts.year
                    ret += u" [%s: " % date + self.olformat % old_item + "]"
            ret += "\n"

        return unicode_to_str(ret)
Esempio n. 16
0
 def __repr__(self):
     self.date = self.date or time.ctime()
     ret = u"%s\n" % self.date
     ret += u"-" * len(self.date) + "\n"
     ordered_indexes = sorted(self.indexes, key=lambda x: get_index_order(x["name"]))
     for index in ordered_indexes:
         ret += u"%(name)s: %(value)s%(unit)s" % index
         if get_index_class(index["name"]) in ["commodity", "stock"] and self.gold_price:
             ret += u"(%.5fozAu)" % (index["value"] / self.gold_price)
         old_index = self.indexes_old.get(index["name"])
         if old_index:
             ts = old_index["timestamp"]
             date = "%s %s" % (_MONTHS[ts.month - 1], ts.day)
             if self.show_year:
                 date += " %s" % ts.year
             ret += u" [%s: %s%s" % (date, old_index["value"], old_index["unit"])
             if get_index_class(old_index["name"]) in ["commodity", "stock"] and self.gold_price_old:
                 ret += u" (%.5fozAu)" % (old_index["value"] / self.gold_price_old)
             ret += "]"
         ret += u"\n"
     return unicode_to_str(ret)
Esempio n. 17
0
def jsonrpc_client_call(url, method, *args, **kwargs):
    """Execute a JSON-RPC call on the given url"""
    if args and kwargs:
        raise ValueError(
            "Pass *args or **kwargs but not both to jsonrpc_client_call")
    req = {
        'jsonrpc': '2.0',
        'method': method,
        'params': args or kwargs,
        'id': 1
    }
    data = unicode_to_str(json.dumps(req))
    body = urllib.request.urlopen(url, data).read()
    res = json.loads(body.decode('utf-8'))
    if 'result' in res:
        return res['result']
    elif 'error' in res:
        er = res['error']
        raise JsonRpcError(er['code'], er['message'], er['data'])
    else:
        msg = "JSON-RPC response must contain 'result' or 'error': %s" % res
        raise ValueError(msg)
Esempio n. 18
0
def get_meta_refresh(text, baseurl="", encoding="utf-8"):
    """Return  the http-equiv parameter of the HTML meta element from the given
    HTML text and return a tuple (interval, url) where interval is an integer
    containing the delay in seconds (or zero if not present) and url is a
    string with the absolute url to redirect.

    If no meta redirect is found, (None, None) is returned.
    """
    baseurl = unicode_to_str(baseurl, encoding)
    try:
        text = str_to_unicode(text, encoding)
    except UnicodeDecodeError:
        print text
        raise
    text = remove_comments(remove_entities(text))
    m = _meta_refresh_re.search(text)
    if m:
        interval = float(m.group("int"))
        url = safe_url_string(m.group("url").strip(" \"'"))
        url = urljoin(baseurl, url)
        return interval, url
    else:
        return None, None
Esempio n. 19
0
def parse_url(url, encoding=None):
    """Return urlparsed url from the given argument (which could be an already
    parsed url)
    """
    return url if isinstance(url, urlparse.ParseResult) else \
        urlparse.urlparse(unicode_to_str(url, encoding))
Esempio n. 20
0
def parse_url(url, encoding=None):
    """Return urlparsed url from the given argument (which could be an already
    parsed url)
    """
    return url if isinstance(url, urlparse.ParseResult) else \
        urlparse.urlparse(unicode_to_str(url, encoding))
Esempio n. 21
0
 def test_deprecation(self):
     with deprecated_call():
         unicode_to_str("")