Esempio n. 1
0
File: yql.py Progetto: nerevu/riko
def parser(_, objconf, skip=False, **kwargs):
    """ Parses the pipe content

    Args:
        _ (None): Ignored
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content
        kwargs (dict): Keyword arguments

    Kwargs:
        assign (str): Attribute to assign parsed content (default: content)
        stream (dict): The original item

    Returns:
        Iter[dict]: The stream of items

    Examples:
        >>> from riko import get_path
        >>> from riko.utils import get_abspath
        >>> from meza.fntools import Objectify
        >>>
        >>> feed = 'http://feeds.feedburner.com/TechCrunch/'
        >>> url = 'http://query.yahooapis.com/v1/public/yql'
        >>> query = "select * from feed where url='%s'" % feed
        >>> conf = {'query': query, 'url': url, 'debug': False}
        >>> objconf = Objectify(conf)
        >>> url = get_abspath(get_path('yql.xml'))
        >>>
        >>> with fetch(url) as f:
        ...     kwargs = {'stream': {}, 'response': f}
        ...     result = parser(None, objconf, **kwargs)
        >>>
        >>> next(result)['title']
        'Bring pizza home'
    """
    if skip:
        stream = kwargs['stream']
    else:
        f = kwargs.get('response')

        if not f:
            params = {'q': objconf.query, 'diagnostics': objconf.debug}

            if objconf.memoize and not objconf.cache_type:
                objconf.cache_type = 'auto'

            f = fetch(params=params, **objconf)

        # TODO: consider paging for large result sets
        root = xml2etree(f).getroot()
        results = root.find('results')
        stream = map(etree2dict, results)

    return stream
Esempio n. 2
0
File: yql.py Progetto: sottom/riko
def parser(_, objconf, skip=False, **kwargs):
    """ Parses the pipe content

    Args:
        _ (None): Ignored
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content
        kwargs (dict): Keyword arguments

    Kwargs:
        assign (str): Attribute to assign parsed content (default: content)
        stream (dict): The original item

    Returns:
        Iter[dict]: The stream of items

    Examples:
        >>> from riko import get_path
        >>> from riko.utils import get_abspath
        >>> from meza.fntools import Objectify
        >>>
        >>> feed = 'http://feeds.feedburner.com/TechCrunch/'
        >>> url = 'http://query.yahooapis.com/v1/public/yql'
        >>> query = "select * from feed where url='%s'" % feed
        >>> conf = {'query': query, 'url': url, 'debug': False}
        >>> objconf = Objectify(conf)
        >>> url = get_abspath(get_path('yql.xml'))
        >>>
        >>> with fetch(url) as f:
        ...     kwargs = {'stream': {}, 'response': f}
        ...     result = parser(None, objconf, **kwargs)
        >>>
        >>> next(result)['title']
        'Bring pizza home'
    """
    if skip:
        stream = kwargs['stream']
    else:
        f = kwargs.get('response')

        if not f:
            params = {'q': objconf.query, 'diagnostics': objconf.debug}

            if objconf.memoize and not objconf.cache_type:
                objconf.cache_type = 'auto'

            f = fetch(params=params, **objconf)

        # TODO: consider paging for large result sets
        root = xml2etree(f).getroot()
        results = root.find('results')
        stream = map(etree2dict, results)

    return stream
Esempio n. 3
0
def parser(_, objconf, skip=False, **kwargs):
    """ Parses the pipe content

    Args:
        _ (None): Ignored
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content

    Returns:
        Iter[dict]: The stream of items

    Examples:
        >>> from meza.fntools import Objectify
        >>> from riko import get_path
        >>>
        >>> url = get_path('ouseful.xml')
        >>> objconf = Objectify({'url': url, 'xpath': '/rss/channel/item'})
        >>> result = parser(None, objconf, stream={})
        >>> title = 'Running “Native” Data Wrangling Applications'
        >>> next(result)['title'][:44] == title
        True
    """
    if skip:
        stream = kwargs['stream']
    else:
        url = get_abspath(objconf.url)
        ext = splitext(url)[1].lstrip('.')
        xml = (ext == 'xml') or objconf.strict

        if objconf.memoize and not objconf.cache_type:
            objconf.cache_type = 'auto'

        with fetch(**objconf) as f:
            root = xml2etree(f, xml=xml, html5=objconf.html5).getroot()
            elements = xpath(root, objconf.xpath)

        items = map(etree2dict, elements)
        stringified = ({kwargs['assign']: str(i)} for i in items)
        stream = stringified if objconf.stringify else items

    return stream
Esempio n. 4
0
def parser(_, objconf, skip=False, **kwargs):
    """ Parses the pipe content

    Args:
        _ (None): Ignored
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content

    Returns:
        Iter[dict]: The stream of items

    Examples:
        >>> from meza.fntools import Objectify
        >>> from riko import get_path
        >>>
        >>> url = get_path('ouseful.xml')
        >>> objconf = Objectify({'url': url, 'xpath': '/rss/channel/item'})
        >>> result = parser(None, objconf, stream={})
        >>> title = 'Running “Native” Data Wrangling Applications'
        >>> next(result)['title'][:44] == title
        True
    """
    if skip:
        stream = kwargs['stream']
    else:
        url = get_abspath(objconf.url)
        ext = splitext(url)[1].lstrip('.')
        xml = (ext == 'xml') or objconf.strict

        with fetch(**objconf) as f:
            root = xml2etree(f, xml=xml, html5=objconf.html5).getroot()
            elements = xpath(root, objconf.xpath)

        items = map(etree2dict, elements)
        stringified = ({kwargs['assign']: str(i)} for i in items)
        stream = stringified if objconf.stringify else items

    return stream