Exemple #1
0
def autodiscovery(document, url):
    """If the given url refers an actual feed, it returns the given url
    without any change.

    If the given url is a url of an ordinary web page
    (i.e. :mimetype:`text/html`), it finds the urls of the corresponding feed.
    It returns feed urls in feed types' lexicographical order.

    If autodiscovery failed, it raise :exc:`FeedUrlNotFoundError`.

    :param document: html, or xml strings
    :type document: :class:`str`
    :param url: the url used to retrieve the ``document``.
                if feed url is in html and represented in relative url,
                it will be rebuilt on top of the ``url``
    :type url: :class:`str`
    :returns: list of :class:`FeedLink` objects
    :rtype: :class:`collections.MutableSequence`

    """
    document = text(document)
    document_type = get_format(document)
    if document_type is None:
        parser = AutoDiscovery()
        feed_links = parser.find_feed_url(document)
        if not feed_links:
            raise FeedUrlNotFoundError('Cannot find feed url')
        for link in feed_links:
            if link.url.startswith('/'):
                absolute_url = urlparse.urljoin(url, link.url)
                feed_links[feed_links.index(link)] = \
                    FeedLink(link.type, absolute_url)
        return feed_links
    else:
            return [FeedLink(TYPE_TABLE[document_type], url)]
Exemple #2
0
def string_chunks(consume_log, *chunks):
    """Iterate the given chunks of a text with logging consumed offsets
    to test laziness of the parser.  If an argument is a list (that consists
    of a string) it's treated as offset tagging.

    """
    size = len(chunks)
    for i, chunk in enumerate(chunks):
        if type(chunk) is list:
            continue
        chunk = text(chunk)
        if size > i + 1 and type(chunks[i + 1]) is list:
            consume_log.append(chunks[i + 1][0])
        if not isinstance(chunk, binary_type):
            # In IronPython str.encode() returns str instead of bytes,
            # and bytes(str, encoding) returns bytes.
            chunk = binary_type(chunk, 'utf-8')
        yield chunk
Exemple #3
0
def string_chunks(consume_log, *chunks):
    """Iterate the given chunks of a text with logging consumed offsets
    to test laziness of the parser.  If an argument is a list (that consists
    of a string) it's treated as offset tagging.

    """
    size = len(chunks)
    for i, chunk in enumerate(chunks):
        if type(chunk) is list:
            continue
        chunk = text(chunk)
        if size > i + 1 and type(chunks[i + 1]) is list:
            consume_log.append(chunks[i + 1][0])
        if not isinstance(chunk, binary_type):
            # In IronPython str.encode() returns str instead of bytes,
            # and bytes(str, encoding) returns bytes.
            chunk = binary_type(chunk, 'utf-8')
        yield chunk