Esempi in Python per unicode_to_str, esempi in Python per w3lib.util.unicode_to_str

Esempio n. 1

0

Mostra file

File: url.py Progetto: 668Jerry/rations

def urljoin_rfc(base, ref, encoding='utf-8'):
    r"""
    .. warning::

        This function is deprecated and will be removed in future.
        Please use ``urlparse.urljoin`` instead.

    Same as urlparse.urljoin but supports unicode values in base and ref
    parameters (in which case they will be converted to str using the given
    encoding).

    Always returns a str.

    >>> import w3lib.url
    >>> w3lib.url.urljoin_rfc('http://www.example.com/path/index.html', u'/otherpath/index2.html')
    'http://www.example.com/otherpath/index2.html'
    >>>

    >>> w3lib.url.urljoin_rfc('http://www.example.com/path/index.html', u'fran\u00e7ais/d\u00e9part.htm')
    'http://www.example.com/path/fran\xc3\xa7ais/d\xc3\xa9part.htm'
    >>>


    """

    warnings.warn("w3lib.url.urljoin_rfc is deprecated, use urlparse.urljoin instead",
        DeprecationWarning)

    str_base = unicode_to_str(base, encoding)
    str_ref = unicode_to_str(ref, encoding)
    return moves.urllib.parse.urljoin(str_base, str_ref)

Esempio n. 2

0

Mostra file

File: url.py Progetto: hbradlow/ggAPI

def urljoin_rfc(base, ref, encoding='utf-8'):
    """Same as urlparse.urljoin but supports unicode values in base and ref
    parameters (in which case they will be converted to str using the given
    encoding).

    Always returns a str.
    """
    return urlparse.urljoin(unicode_to_str(base, encoding), \
        unicode_to_str(ref, encoding))

Esempio n. 3

0

Mostra file

File: url.py Progetto: nasirsphi/w3lib

def urljoin_rfc(base, ref, encoding="utf-8"):
    """Same as urlparse.urljoin but supports unicode values in base and ref
    parameters (in which case they will be converted to str using the given
    encoding).

    Always returns a str.
    """
    warnings.warn("w3lib.url.urljoin_rfc is deprecated, use urlparse.urljoin instead", DeprecationWarning)
    return urlparse.urljoin(unicode_to_str(base, encoding), unicode_to_str(ref, encoding))

Esempio n. 4

0

Mostra file

def urljoin_rfc(base, ref, encoding='utf-8'):
    """Same as urlparse.urljoin but supports unicode values in base and ref
    parameters (in which case they will be converted to str using the given
    encoding).

    Always returns a str.
    """
    warnings.warn("w3lib.url.urljoin_rfc is deprecated, use urlparse.urljoin instead",
        DeprecationWarning)
    return urlparse.urljoin(unicode_to_str(base, encoding), \
        unicode_to_str(ref, encoding))

Esempio n. 5

0

Mostra file

 def __repr__(self):
     self.date = self.date or time.ctime()
     ret = u"%s\n" % self.date
     ret += u"-" * len(self.date) + "\n"
     ordered_indexes = sorted(self.indexes,
                              key=lambda x: get_index_order(x["name"]))
     for index in ordered_indexes:
         ret += u"%(name)s: %(value)s%(unit)s" % index
         if get_index_class(index["name"]) in ["commodity", "stock"
                                               ] and self.gold_price:
             ret += u"(%.5fozAu)" % (index["value"] / self.gold_price)
         old_index = self.indexes_old.get(index["name"])
         if old_index:
             ts = old_index["timestamp"]
             date = "%s %s" % (_MONTHS[ts.month - 1], ts.day)
             if self.show_year:
                 date += " %s" % ts.year
             ret += u" [%s: %s%s" % (date, old_index["value"],
                                     old_index["unit"])
             if get_index_class(old_index["name"]) in [
                     "commodity", "stock"
             ] and self.gold_price_old:
                 ret += u" (%.5fozAu)" % (old_index["value"] /
                                          self.gold_price_old)
             ret += "]"
         ret += u"\n"
     return unicode_to_str(ret)

Esempio n. 6

0

Mostra file

def get_meta_refresh(text, baseurl='', encoding='utf-8'):
    """Return  the http-equiv parameter of the HTML meta element from the given
    HTML text and return a tuple ``(interval, url)`` where interval is an integer
    containing the delay in seconds (or zero if not present) and url is a
    string with the absolute url to redirect.

    If no meta redirect is found, ``(None, None)`` is returned.

    """

    if six.PY2:
        baseurl = unicode_to_str(baseurl, encoding)
    try:
        text = str_to_unicode(text, encoding)
    except UnicodeDecodeError:
        print(text)
        raise
    text = remove_comments(replace_entities(text))
    m = _meta_refresh_re.search(text)
    if m:
        interval = float(m.group('int'))
        url = safe_url_string(m.group('url').strip(' "\''), encoding)
        url = moves.urllib.parse.urljoin(baseurl, url)
        return interval, url
    else:
        return None, None

Esempio n. 7

0

Mostra file

File: form.py Progetto: JodeZer/moodle-scraper

def encode_multipart(data):
    """Encode the given data to be used in a multipart HTTP POST. Data is a
    where keys are the field name, and values are either strings or tuples
    (filename, content) for file uploads.

    This code is based on distutils.command.upload.

    Return (body, boundary) tuple where ``body`` is binary body value,
    and ``boundary`` is the boundary used (as native string).
    """
    # Build up the MIME payload for the POST data
    boundary = '--------------GHSKFJDLGDS7543FJKLFHRE75642756743254'
    sep_boundary = b'\r\n--' + boundary.encode('ascii')
    end_boundary = sep_boundary + b'--'
    body = BytesIO()
    for key, value in data.items():
        title = u'\r\nContent-Disposition: form-data; name="%s"' % key
        # handle multiple entries for the same name
        if type(value) != type([]):
            value = [value]
        for value in value:
            if type(value) is tuple:
                title += u'; filename="%s"' % value[0]
                value = value[1]
            else:
                value = unicode_to_str(
                    value)  # in distutils: str(value).encode('utf-8')
            body.write(sep_boundary)
            body.write(title.encode('utf-8'))
            body.write(b"\r\n\r\n")
            body.write(value)
    body.write(end_boundary)
    body.write(b"\r\n")
    return body.getvalue(), boundary

Esempio n. 8

0

Mostra file

File: html.py Progetto: fubuki/w3lib

def get_meta_refresh(text, baseurl='', encoding='utf-8'):
    """Return  the http-equiv parameter of the HTML meta element from the given
    HTML text and return a tuple ``(interval, url)`` where interval is an integer
    containing the delay in seconds (or zero if not present) and url is a
    string with the absolute url to redirect.

    If no meta redirect is found, ``(None, None)`` is returned.

    """

    if six.PY2:
        baseurl = unicode_to_str(baseurl, encoding)
    try:
        text = str_to_unicode(text, encoding)
    except UnicodeDecodeError:
        print(text)
        raise
    text = remove_comments(remove_entities(text))
    m = _meta_refresh_re.search(text)
    if m:
        interval = float(m.group('int'))
        url = safe_url_string(m.group('url').strip(' "\''), encoding)
        url = moves.urllib.parse.urljoin(baseurl, url)
        return interval, url
    else:
        return None, None

Esempio n. 9

0

Mostra file

File: form.py Progetto: Quebec-Python/web-scraping-101

def encode_multipart(data):
    """Encode the given data to be used in a multipart HTTP POST. Data is a
    where keys are the field name, and values are either strings or tuples
    (filename, content) for file uploads.

    This code is based on distutils.command.upload.

    Return (body, boundary) tuple where ``body`` is binary body value,
    and ``boundary`` is the boundary used (as native string).
    """
    # Build up the MIME payload for the POST data
    boundary = '--------------GHSKFJDLGDS7543FJKLFHRE75642756743254'
    sep_boundary = b'\r\n--' + boundary.encode('ascii')
    end_boundary = sep_boundary + b'--'
    body = BytesIO()
    for key, value in data.items():
        title = u'\r\nContent-Disposition: form-data; name="%s"' % key
        # handle multiple entries for the same name
        if type(value) != type([]):
            value = [value]
        for value in value:
            if type(value) is tuple:
                title += u'; filename="%s"' % value[0]
                value = value[1]
            else:
                value = unicode_to_str(value)  # in distutils: str(value).encode('utf-8')
            body.write(sep_boundary)
            body.write(title.encode('utf-8'))
            body.write(b"\r\n\r\n")
            body.write(value)
    body.write(end_boundary)
    body.write(b"\r\n")
    return body.getvalue(), boundary

Esempio n. 10

0

Mostra file

File: form.py Progetto: 668Jerry/rations

def encode_multipart(data):
    r"""

    .. warning::

        This function is deprecated and will be removed in future.
        Please use ``urllib3.filepost.encode_multipart_formdata`` instead.

    Encode the given data to be used in a multipart HTTP POST.

    `data` is a dictionary where keys are the field name, and values are
    either strings or tuples as `(filename, content)` for file uploads.

    This code is based on :class:`distutils.command.upload`.

    Returns a `(body, boundary)` tuple where `body` is binary body value,
    and `boundary` is the boundary used (as native string).

    >>> import w3lib.form
    >>> w3lib.form.encode_multipart({'key': 'value'})
    ('\r\n----------------GHSKFJDLGDS7543FJKLFHRE75642756743254\r\nContent-Disposition: form-data; name="key"\r\n\r\nvalue\r\n----------------GHSKFJDLGDS7543FJKLFHRE75642756743254--\r\n', '--------------GHSKFJDLGDS7543FJKLFHRE75642756743254')
    >>> w3lib.form.encode_multipart({'key1': 'value1', 'key2': 'value2'})   # doctest: +SKIP
    ('\r\n----------------GHSKFJDLGDS7543FJKLFHRE75642756743254\r\nContent-Disposition: form-data; name="key2"\r\n\r\nvalue2\r\n----------------GHSKFJDLGDS7543FJKLFHRE75642756743254\r\nContent-Disposition: form-data; name="key1"\r\n\r\nvalue1\r\n----------------GHSKFJDLGDS7543FJKLFHRE75642756743254--\r\n', '--------------GHSKFJDLGDS7543FJKLFHRE75642756743254')
    >>> w3lib.form.encode_multipart({'somekey': ('path/to/filename', b'\xa1\xa2\xa3\xa4\r\n\r')})
    ('\r\n----------------GHSKFJDLGDS7543FJKLFHRE75642756743254\r\nContent-Disposition: form-data; name="somekey"; filename="path/to/filename"\r\n\r\n\xa1\xa2\xa3\xa4\r\n\r\r\n----------------GHSKFJDLGDS7543FJKLFHRE75642756743254--\r\n', '--------------GHSKFJDLGDS7543FJKLFHRE75642756743254')
    >>>

    """

    warnings.warn(
        "`w3lib.form.encode_multipart` function is deprecated and "
        "will be removed in future releases. Please use "
        "`urllib3.filepost.encode_multipart_formdata` instead.",
        DeprecationWarning
    )

    # Build up the MIME payload for the POST data
    boundary = '--------------GHSKFJDLGDS7543FJKLFHRE75642756743254'
    sep_boundary = b'\r\n--' + boundary.encode('ascii')
    end_boundary = sep_boundary + b'--'
    body = BytesIO()
    for key, value in data.items():
        title = u'\r\nContent-Disposition: form-data; name="%s"' % key
        # handle multiple entries for the same name
        if type(value) != type([]):
            value = [value]
        for value in value:
            if type(value) is tuple:
                title += u'; filename="%s"' % value[0]
                value = value[1]
            else:
                value = unicode_to_str(value)  # in distutils: str(value).encode('utf-8')
            body.write(sep_boundary)
            body.write(title.encode('utf-8'))
            body.write(b"\r\n\r\n")
            body.write(value)
    body.write(end_boundary)
    body.write(b"\r\n")
    return body.getvalue(), boundary

Esempio n. 11

0

Mostra file

def encode_multipart(data):
    r"""

    .. warning::

        This function is deprecated and will be removed in future.
        Please use ``urllib3.filepost.encode_multipart_formdata`` instead.

    Encode the given data to be used in a multipart HTTP POST.

    `data` is a dictionary where keys are the field name, and values are
    either strings or tuples as `(filename, content)` for file uploads.

    This code is based on :class:`distutils.command.upload`.

    Returns a `(body, boundary)` tuple where `body` is binary body value,
    and `boundary` is the boundary used (as native string).

    >>> import w3lib.form
    >>> w3lib.form.encode_multipart({'key': 'value'})
    ('\r\n----------------GHSKFJDLGDS7543FJKLFHRE75642756743254\r\nContent-Disposition: form-data; name="key"\r\n\r\nvalue\r\n----------------GHSKFJDLGDS7543FJKLFHRE75642756743254--\r\n', '--------------GHSKFJDLGDS7543FJKLFHRE75642756743254')
    >>> w3lib.form.encode_multipart({'key1': 'value1', 'key2': 'value2'})   # doctest: +SKIP
    ('\r\n----------------GHSKFJDLGDS7543FJKLFHRE75642756743254\r\nContent-Disposition: form-data; name="key2"\r\n\r\nvalue2\r\n----------------GHSKFJDLGDS7543FJKLFHRE75642756743254\r\nContent-Disposition: form-data; name="key1"\r\n\r\nvalue1\r\n----------------GHSKFJDLGDS7543FJKLFHRE75642756743254--\r\n', '--------------GHSKFJDLGDS7543FJKLFHRE75642756743254')
    >>> w3lib.form.encode_multipart({'somekey': ('path/to/filename', b'\xa1\xa2\xa3\xa4\r\n\r')})
    ('\r\n----------------GHSKFJDLGDS7543FJKLFHRE75642756743254\r\nContent-Disposition: form-data; name="somekey"; filename="path/to/filename"\r\n\r\n\xa1\xa2\xa3\xa4\r\n\r\r\n----------------GHSKFJDLGDS7543FJKLFHRE75642756743254--\r\n', '--------------GHSKFJDLGDS7543FJKLFHRE75642756743254')
    >>>

    """

    warnings.warn(
        "`w3lib.form.encode_multipart` function is deprecated and "
        "will be removed in future releases. Please use "
        "`urllib3.filepost.encode_multipart_formdata` instead.",
        DeprecationWarning)

    # Build up the MIME payload for the POST data
    boundary = '--------------GHSKFJDLGDS7543FJKLFHRE75642756743254'
    sep_boundary = b'\r\n--' + boundary.encode('ascii')
    end_boundary = sep_boundary + b'--'
    body = BytesIO()
    for key, value in data.items():
        title = u'\r\nContent-Disposition: form-data; name="%s"' % key
        # handle multiple entries for the same name
        if type(value) != type([]):
            value = [value]
        for value in value:
            if type(value) is tuple:
                title += u'; filename="%s"' % value[0]
                value = value[1]
            else:
                value = unicode_to_str(
                    value)  # in distutils: str(value).encode('utf-8')
            body.write(sep_boundary)
            body.write(title.encode('utf-8'))
            body.write(b"\r\n\r\n")
            body.write(value)
    body.write(end_boundary)
    body.write(b"\r\n")
    return body.getvalue(), boundary

Esempio n. 12

0

Mostra file

File: html.py Progetto: xacprod/ve1

def get_base_url(text, baseurl='', encoding='utf-8'):
    """Return the base url if declared in the given html text, relative to the
    given base url. If no base url is found, the given base url is returned
    """
    text = str_to_unicode(text, encoding)
    baseurl = unicode_to_str(baseurl, encoding)
    m = _baseurl_re.search(text)
    if m:
        baseurl = urljoin(baseurl, m.group(1).encode(encoding))
    return safe_url_string(baseurl)

Esempio n. 13

0

Mostra file

def safe_url_string(url, encoding='utf8'):
    """Convert the given url into a legal URL by escaping unsafe characters
    according to RFC-3986.

    If a unicode url is given, it is first converted to str using the given
    encoding (which defaults to 'utf-8'). When passing a encoding, you should
    use the encoding of the original page (the page from which the url was
    extracted from).

    Calling this function on an already "safe" url will return the url
    unmodified.

    Always returns a str.
    """
    s = unicode_to_str(url, encoding)
    return urllib.quote(s,  _safe_chars)

Esempio n. 14

0

Mostra file

    def __repr__(self):
        self.date = self.date or time.ctime()
        ret = u"%s\n" % self.date
        ret += u"-" * len(self.date) + "\n"
        ordered_items = sorted(self.items, key=lambda x: x["name"])
        for item in ordered_items:
            ret += self.lformat % item
            if self.olformat:
                old_item = self.items_old.get(item["name"])
                if old_item:
                    ts = old_item["timestamp"]
                    date = "%s %s" % (_MONTHS[ts.month - 1], ts.day)
                    if self.show_year:
                        date += " %s" % ts.year
                    ret += u" [%s: " % date + self.olformat % old_item + "]"
            ret += "\n"

        return unicode_to_str(ret)

Esempio n. 15

0

Mostra file

File: printers.py Progetto: biddyweb/finance

    def __repr__(self):
        self.date = self.date or time.ctime()
        ret = u"%s\n" % self.date
        ret += u"-" * len(self.date) + "\n"
        ordered_items = sorted(self.items, key=lambda x: x["name"])
        for item in ordered_items:
            ret += self.lformat % item
            if self.olformat:
                old_item = self.items_old.get(item["name"])
                if old_item:
                    ts = old_item["timestamp"]
                    date = "%s %s" % (_MONTHS[ts.month - 1], ts.day)
                    if self.show_year:
                        date += " %s" % ts.year
                    ret += u" [%s: " % date + self.olformat % old_item + "]"
            ret += "\n"

        return unicode_to_str(ret)

Esempio n. 16

0

Mostra file

File: printers.py Progetto: biddyweb/finance

 def __repr__(self):
     self.date = self.date or time.ctime()
     ret = u"%s\n" % self.date
     ret += u"-" * len(self.date) + "\n"
     ordered_indexes = sorted(self.indexes, key=lambda x: get_index_order(x["name"]))
     for index in ordered_indexes:
         ret += u"%(name)s: %(value)s%(unit)s" % index
         if get_index_class(index["name"]) in ["commodity", "stock"] and self.gold_price:
             ret += u"(%.5fozAu)" % (index["value"] / self.gold_price)
         old_index = self.indexes_old.get(index["name"])
         if old_index:
             ts = old_index["timestamp"]
             date = "%s %s" % (_MONTHS[ts.month - 1], ts.day)
             if self.show_year:
                 date += " %s" % ts.year
             ret += u" [%s: %s%s" % (date, old_index["value"], old_index["unit"])
             if get_index_class(old_index["name"]) in ["commodity", "stock"] and self.gold_price_old:
                 ret += u" (%.5fozAu)" % (old_index["value"] / self.gold_price_old)
             ret += "]"
         ret += u"\n"
     return unicode_to_str(ret)

Esempio n. 17

0

Mostra file

def jsonrpc_client_call(url, method, *args, **kwargs):
    """Execute a JSON-RPC call on the given url"""
    if args and kwargs:
        raise ValueError(
            "Pass *args or **kwargs but not both to jsonrpc_client_call")
    req = {
        'jsonrpc': '2.0',
        'method': method,
        'params': args or kwargs,
        'id': 1
    }
    data = unicode_to_str(json.dumps(req))
    body = urllib.request.urlopen(url, data).read()
    res = json.loads(body.decode('utf-8'))
    if 'result' in res:
        return res['result']
    elif 'error' in res:
        er = res['error']
        raise JsonRpcError(er['code'], er['message'], er['data'])
    else:
        msg = "JSON-RPC response must contain 'result' or 'error': %s" % res
        raise ValueError(msg)

Esempio n. 18

0

Mostra file

File: html.py Progetto: AaronMT/spade

def get_meta_refresh(text, baseurl="", encoding="utf-8"):
    """Return  the http-equiv parameter of the HTML meta element from the given
    HTML text and return a tuple (interval, url) where interval is an integer
    containing the delay in seconds (or zero if not present) and url is a
    string with the absolute url to redirect.

    If no meta redirect is found, (None, None) is returned.
    """
    baseurl = unicode_to_str(baseurl, encoding)
    try:
        text = str_to_unicode(text, encoding)
    except UnicodeDecodeError:
        print text
        raise
    text = remove_comments(remove_entities(text))
    m = _meta_refresh_re.search(text)
    if m:
        interval = float(m.group("int"))
        url = safe_url_string(m.group("url").strip(" \"'"))
        url = urljoin(baseurl, url)
        return interval, url
    else:
        return None, None

Esempio n. 19

0

Mostra file

def parse_url(url, encoding=None):
    """Return urlparsed url from the given argument (which could be an already
    parsed url)
    """
    return url if isinstance(url, urlparse.ParseResult) else \
        urlparse.urlparse(unicode_to_str(url, encoding))

Esempio n. 20

0

Mostra file

File: url.py Progetto: StardustZhou/crawl-frontier

def parse_url(url, encoding=None):
    """Return urlparsed url from the given argument (which could be an already
    parsed url)
    """
    return url if isinstance(url, urlparse.ParseResult) else \
        urlparse.urlparse(unicode_to_str(url, encoding))

Esempio n. 21

0

Mostra file

File: test_util.py Progetto: scrapy/w3lib

 def test_deprecation(self):
     with deprecated_call():
         unicode_to_str("")