Example #1
0
def async_get_rss(url, convert_charrefs=False):
    try:
        f = yield async_url_open(url, timeout=TIMEOUT)
    except ValueError:
        f = filter(None, url.splitlines())

    document = microdom.parse(f, lenient=True)
    return_value(doc2entries(document))
Example #2
0
def async_get_rss(url, convert_charrefs=False):
    try:
        f = yield async_url_open(url, timeout=TIMEOUT)
    except ValueError:
        f = filter(None, url.splitlines())

    document = microdom.parse(f, lenient=True)
    return_value(doc2entries(document))
Example #3
0
File: csv.py Project: Fuzzwah/riko
def async_parser(_, objconf, skip, **kwargs):
    """ Asynchronously parses the pipe content

    Args:
        _ (None): Ignored
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content
        kwargs (dict): Keyword arguments

    Kwargs:
        stream (dict): The original item

    Returns:
        Tuple(Iter[dict], bool): Tuple of (stream, skip)

    Examples:
        >>> from riko import get_path
        >>> from riko.bado import react
        >>> from riko.bado.mock import FakeReactor
        >>> from riko.lib.utils import Objectify
        >>>
        >>> def run(reactor):
        ...     callback = lambda x: print(next(x[0])['mileage'])
        ...     url = get_path('spreadsheet.csv')
        ...     conf = {'url': url, 'sanitize': True, 'skip_rows': 0}
        ...     objconf = Objectify(conf)
        ...     d = async_parser(None, objconf, False, stream={})
        ...     return d.addCallbacks(callback, logger.error)
        >>>
        >>> try:
        ...     react(run, _reactor=FakeReactor())
        ... except SystemExit:
        ...     pass
        ...
        7213
    """
    if skip:
        stream = kwargs['stream']
    else:
        # TODO: write function to extract encoding from response
        url = utils.get_abspath(objconf.url)
        response = yield io.async_url_open(url)
        first_row, custom_header = objconf.skip_rows, objconf.col_names
        renamed = {'first_row': first_row, 'custom_header': custom_header}
        rkwargs = utils.combine_dicts(objconf, renamed)
        rkwargs['encoding'] = objconf.encoding
        stream = read_csv(response, **rkwargs)

    result = (stream, skip)
    return_value(result)
Example #4
0
def async_parser(_, objconf, skip, **kwargs):
    """ Asynchronously parses the pipe content

    Args:
        _ (None): Ignored
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content
        kwargs (dict): Keyword arguments

    Kwargs:
        stream (dict): The original item

    Returns:
        Tuple(Iter[dict], bool): Tuple of (stream, skip)

    Examples:
        >>> from riko import get_path
        >>> from riko.bado import react
        >>> from riko.bado.mock import FakeReactor
        >>> from riko.lib.utils import Objectify
        >>>
        >>> def run(reactor):
        ...     callback = lambda x: print(next(x[0])['mileage'])
        ...     url = get_path('spreadsheet.csv')
        ...     conf = {'url': url, 'sanitize': True, 'skip_rows': 0}
        ...     objconf = Objectify(conf)
        ...     d = async_parser(None, objconf, False, stream={})
        ...     return d.addCallbacks(callback, logger.error)
        >>>
        >>> try:
        ...     react(run, _reactor=FakeReactor())
        ... except SystemExit:
        ...     pass
        ...
        7213
    """
    if skip:
        stream = kwargs['stream']
    else:
        # TODO: write function to extract encoding from response
        url = utils.get_abspath(objconf.url)
        response = yield io.async_url_open(url)
        first_row, custom_header = objconf.skip_rows, objconf.col_names
        renamed = {'first_row': first_row, 'custom_header': custom_header}
        rkwargs = utils.combine_dicts(objconf, renamed)
        rkwargs['encoding'] = objconf.encoding
        stream = read_csv(response, **rkwargs)

    result = (stream, skip)
    return_value(result)
Example #5
0
def asyncGetRSS(url, convert_charrefs=False):
    # TODO: implement via an async parser
    # maybe get twisted.web.microdom.parse working for HTML
    try:
        parser = LinkParser(convert_charrefs=convert_charrefs)
    except TypeError:
        parser = LinkParser()

    try:
        f = yield async_url_open(url, timeout=TIMEOUT)
    except ValueError:
        f = filter(None, url.splitlines())

    return_value(gen_entries(f, parser))
Example #6
0
File: csv.py Project: nerevu/riko
def async_parser(_, objconf, skip=False, **kwargs):
    """ Asynchronously parses the pipe content

    Args:
        _ (None): Ignored
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content
        kwargs (dict): Keyword arguments

    Kwargs:
        stream (dict): The original item

    Returns:
        Iter[dict]: The stream of items

    Examples:
        >>> from riko import get_path
        >>> from riko.bado import react
        >>> from riko.bado.mock import FakeReactor
        >>> from meza.fntools import Objectify
        >>>
        >>> def run(reactor):
        ...     callback = lambda x: print(next(x)['mileage'])
        ...     url = get_path('spreadsheet.csv')
        ...     conf = {
        ...         'url': url, 'sanitize': True, 'skip_rows': 0,
        ...         'encoding': ENCODING}
        ...     objconf = Objectify(conf)
        ...     d = async_parser(None, objconf, stream={})
        ...     return d.addCallbacks(callback, logger.error)
        >>>
        >>> try:
        ...     react(run, _reactor=FakeReactor())
        ... except SystemExit:
        ...     pass
        ...
        7213
    """
    if skip:
        stream = kwargs['stream']
    else:
        url = get_abspath(objconf.url)
        r = yield io.async_url_open(url)
        first_row, custom_header = objconf.skip_rows, objconf.col_names
        renamed = {'first_row': first_row, 'custom_header': custom_header}
        rkwargs = merge([objconf, renamed])
        stream = auto_close(read_csv(r, **rkwargs), r)

    return_value(stream)
Example #7
0
File: csv.py Project: sottom/riko
def async_parser(_, objconf, skip=False, **kwargs):
    """ Asynchronously parses the pipe content

    Args:
        _ (None): Ignored
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content
        kwargs (dict): Keyword arguments

    Kwargs:
        stream (dict): The original item

    Returns:
        Iter[dict]: The stream of items

    Examples:
        >>> from riko import get_path
        >>> from riko.bado import react
        >>> from riko.bado.mock import FakeReactor
        >>> from meza.fntools import Objectify
        >>>
        >>> def run(reactor):
        ...     callback = lambda x: print(next(x)['mileage'])
        ...     url = get_path('spreadsheet.csv')
        ...     conf = {
        ...         'url': url, 'sanitize': True, 'skip_rows': 0,
        ...         'encoding': ENCODING}
        ...     objconf = Objectify(conf)
        ...     d = async_parser(None, objconf, stream={})
        ...     return d.addCallbacks(callback, logger.error)
        >>>
        >>> try:
        ...     react(run, _reactor=FakeReactor())
        ... except SystemExit:
        ...     pass
        ...
        7213
    """
    if skip:
        stream = kwargs['stream']
    else:
        url = get_abspath(objconf.url)
        r = yield io.async_url_open(url)
        first_row, custom_header = objconf.skip_rows, objconf.col_names
        renamed = {'first_row': first_row, 'custom_header': custom_header}
        rkwargs = merge([objconf, renamed])
        stream = auto_close(read_csv(r, **rkwargs), r)

    return_value(stream)
Example #8
0
def async_parser(_, objconf, skip, **kwargs):
    """ Asynchronously parses the pipe content

    Args:
        _ (None): Ignored
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content
        kwargs (dict): Keyword arguments

    Kwargs:
        stream (dict): The original item

    Returns:
        Tuple(Iter[dict], bool): Tuple of (stream, skip)

    Examples:
        >>> from riko import get_path
        >>> from riko.bado import react
        >>> from riko.bado.mock import FakeReactor
        >>> from riko.lib.utils import Objectify
        >>>
        >>> def run(reactor):
        ...     callback = lambda x: print(x[0][0]['title'])
        ...     url = get_path('gigs.json')
        ...     objconf = Objectify({'url': url, 'path': 'value.items'})
        ...     d = async_parser(None, objconf, False, stream={})
        ...     return d.addCallbacks(callback, logger.error)
        >>>
        >>> try:
        ...     react(run, _reactor=FakeReactor())
        ... except SystemExit:
        ...     pass
        ...
        Business System Analyst
    """
    if skip:
        stream = kwargs['stream']
    else:
        url = utils.get_abspath(objconf.url)
        ext = splitext(url)[1].lstrip('.')
        f = yield io.async_url_open(url)
        stream = utils.any2dict(f, ext, objconf.html5, path=objconf.path)
        f.close()

    result = (stream, skip)
    return_value(result)
Example #9
0
def async_parser(_, objconf, skip=False, **kwargs):
    """ Asynchronously parses the pipe content

    Args:
        _ (None): Ignored
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content
        kwargs (dict): Keyword arguments

    Kwargs:
        stream (dict): The original item

    Returns:
        Iter[dict]: The stream of items

    Examples:
        >>> from riko import get_path
        >>> from riko.bado import react
        >>> from riko.bado.mock import FakeReactor
        >>> from meza.fntools import Objectify
        >>>
        >>> def run(reactor):
        ...     callback = lambda x: print(next(x)['content'])
        ...     url = get_path('lorem.txt')
        ...     objconf = Objectify({'url': url, 'encoding': ENCODING})
        ...     d = async_parser(None, objconf, assign='content')
        ...     return d.addCallbacks(callback, logger.error)
        >>>
        >>> try:
        ...     react(run, _reactor=FakeReactor())
        ... except SystemExit:
        ...     pass
        ...
        What is Lorem Ipsum?
    """
    if skip:
        stream = kwargs['stream']
    else:
        url = get_abspath(objconf.url)
        f = yield io.async_url_open(url)
        assign = kwargs['assign']
        encoding = objconf.encoding
        _stream = ({assign: line.strip().decode(encoding)} for line in f)
        stream = auto_close(_stream, f)

    return_value(stream)
Example #10
0
def async_parser(_, objconf, skip=False, **kwargs):
    """ Asynchronously parses the pipe content

    Args:
        _ (None): Ignored
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content
        kwargs (dict): Keyword arguments

    Kwargs:
        stream (dict): The original item

    Returns:
        Iter[dict]: The stream of items

    Examples:
        >>> from riko import get_path
        >>> from riko.bado import react
        >>> from riko.bado.mock import FakeReactor
        >>> from meza.fntools import Objectify
        >>>
        >>> def run(reactor):
        ...     callback = lambda x: print(next(x)['content'])
        ...     url = get_path('lorem.txt')
        ...     objconf = Objectify({'url': url, 'encoding': ENCODING})
        ...     d = async_parser(None, objconf, assign='content')
        ...     return d.addCallbacks(callback, logger.error)
        >>>
        >>> try:
        ...     react(run, _reactor=FakeReactor())
        ... except SystemExit:
        ...     pass
        ...
        What is Lorem Ipsum?
    """
    if skip:
        stream = kwargs['stream']
    else:
        url = get_abspath(objconf.url)
        f = yield io.async_url_open(url)
        assign = kwargs['assign']
        encoding = objconf.encoding
        _stream = ({assign: line.strip().decode(encoding)} for line in f)
        stream = auto_close(_stream, f)

    return_value(stream)
Example #11
0
def async_parser(_, objconf, skip=False, **kwargs):
    """ Asynchronously parses the pipe content

    Args:
        _ (None): Ignored
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content
        kwargs (dict): Keyword arguments

    Kwargs:
        stream (dict): The original item

    Returns:
        Iter[dict]: The stream of items

    Examples:
        >>> from riko import get_path
        >>> from riko.bado import react
        >>> from riko.bado.mock import FakeReactor
        >>> from meza.fntools import Objectify
        >>>
        >>> def run(reactor):
        ...     callback = lambda x: print(x[0]['title'])
        ...     url = get_path('gigs.json')
        ...     objconf = Objectify({'url': url, 'path': 'value.items'})
        ...     d = async_parser(None, objconf, stream={})
        ...     return d.addCallbacks(callback, logger.error)
        >>>
        >>> try:
        ...     react(run, _reactor=FakeReactor())
        ... except SystemExit:
        ...     pass
        ...
        Business System Analyst
    """
    if skip:
        stream = kwargs['stream']
    else:
        url = get_abspath(objconf.url)
        ext = p.splitext(url)[1].lstrip('.')
        f = yield io.async_url_open(url)
        stream = any2dict(f, ext, objconf.html5, path=objconf.path)
        f.close()

    return_value(stream)
Example #12
0
def async_parser(_, objconf, skip=False, **kwargs):
    """ Asynchronously parses the pipe content

    Args:
        _ (None): Ignored
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content
        kwargs (dict): Keyword arguments

    Kwargs:
        assign (str): Attribute to assign parsed content (default: content)
        stream (dict): The original item

    Returns:
        Iter[dict]: The stream of items

    Examples:
        >>> from riko import get_path
        >>> from riko.bado import react
        >>> from riko.bado.mock import FakeReactor
        >>> from meza.fntools import Objectify
        >>>
        >>> @coroutine
        ... def run(reactor):
        ...     xml_url = get_path('ouseful.xml')
        ...     xml_conf = {'url': xml_url, 'xpath': '/rss/channel/item'}
        ...     xml_objconf = Objectify(xml_conf)
        ...     xml_args = (None, xml_objconf)
        ...     html_url = get_path('sciencedaily.html')
        ...     html_conf = {'url': html_url, 'xpath': '/html/head/title'}
        ...     html_objconf = Objectify(html_conf)
        ...     html_args = (None, html_objconf)
        ...     kwargs = {'stream': {}}
        ...
        ...     try:
        ...         xml_stream = yield async_parser(*xml_args, **kwargs)
        ...         html_stream = yield async_parser(*html_args, **kwargs)
        ...         print(next(xml_stream)['title'][:44])
        ...         print(next(html_stream))
        ...     except Exception as e:
        ...         logger.error(e)
        ...         logger.error(traceback.format_exc())
        ...
        >>>
        >>> try:
        ...     react(run, _reactor=FakeReactor())
        ... except SystemExit:
        ...     pass
        ...
        Running “Native” Data Wrangling Applications
        Help Page -- ScienceDaily
    """
    if skip:
        stream = kwargs['stream']
    else:
        url = get_abspath(objconf.url)
        ext = splitext(url)[1].lstrip('.')
        xml = (ext == 'xml') or objconf.strict

        try:
            f = yield io.async_url_open(url)
            tree = yield util.xml2etree(f, xml=xml)
        except Exception as e:
            logger.error(e)
            logger.error(traceback.format_exc())

        elements = xpath(tree, objconf.xpath)
        f.close()
        items = map(util.etree2dict, elements)
        stringified = ({kwargs['assign']: encode(i)} for i in items)
        stream = stringified if objconf.stringify else items

    return_value(stream)
Example #13
0
def async_parser(_, objconf, skip=False, **kwargs):
    """ Asynchronously parses the pipe content

    Args:
        _ (None): Ignored
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content
        kwargs (dict): Keyword arguments

    Kwargs:
        assign (str): Attribute to assign parsed content (default: content)
        stream (dict): The original item

    Returns:
        Iter[dict]: The stream of items

    Examples:
        >>> from riko import get_path
        >>> from riko.bado import react
        >>> from riko.bado.mock import FakeReactor
        >>> from meza.fntools import Objectify
        >>>
        >>> @coroutine
        ... def run(reactor):
        ...     xml_url = get_path('ouseful.xml')
        ...     xml_conf = {'url': xml_url, 'xpath': '/rss/channel/item'}
        ...     xml_objconf = Objectify(xml_conf)
        ...     xml_args = (None, xml_objconf)
        ...     html_url = get_path('sciencedaily.html')
        ...     html_conf = {'url': html_url, 'xpath': '/html/head/title'}
        ...     html_objconf = Objectify(html_conf)
        ...     html_args = (None, html_objconf)
        ...     kwargs = {'stream': {}}
        ...
        ...     try:
        ...         xml_stream = yield async_parser(*xml_args, **kwargs)
        ...         html_stream = yield async_parser(*html_args, **kwargs)
        ...         print(next(xml_stream)['title'][:44])
        ...         print(next(html_stream))
        ...     except Exception as e:
        ...         logger.error(e)
        ...         logger.error(traceback.format_exc())
        ...
        >>>
        >>> try:
        ...     react(run, _reactor=FakeReactor())
        ... except SystemExit:
        ...     pass
        ...
        Running “Native” Data Wrangling Applications
        Help Page -- ScienceDaily
    """
    if skip:
        stream = kwargs['stream']
    else:
        url = get_abspath(objconf.url)
        ext = splitext(url)[1].lstrip('.')
        xml = (ext == 'xml') or objconf.strict

        try:
            f = yield io.async_url_open(url)
            tree = yield util.xml2etree(f, xml=xml)
        except Exception as e:
            logger.error(e)
            logger.error(traceback.format_exc())

        elements = xpath(tree, objconf.xpath)
        f.close()
        items = map(util.etree2dict, elements)
        stringified = ({kwargs['assign']: encode(i)} for i in items)
        stream = stringified if objconf.stringify else items

    return_value(stream)