コード例 #1
0
ファイル: fetchsitefeed.py プロジェクト: Fuzzwah/riko
def parser(_, objconf, skip, **kwargs):
    """ Parses the pipe content

    Args:
        _ (None): Ignored
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content
        kwargs (dict): Keyword arguments

    Kwargs:
        stream (dict): The original item

    Returns:
        Tuple(Iter[dict], bool): Tuple of (stream, skip)

    Examples:
        >>> from riko import get_path
        >>> from riko.lib.utils import Objectify
        >>>
        >>> objconf = Objectify({'url': get_path('bbc.html')})
        >>> result, skip = parser(None, objconf, False, stream={})
        >>> next(result)['title'] == 'Using NFC tags in the car'
        True
    """
    if skip:
        stream = kwargs['stream']
    else:
        url = utils.get_abspath(objconf.url)
        rss = autorss.get_rss(url)
        link = utils.get_abspath(next(rss)['link'])
        parsed = utils.parse_rss(link)
        stream = utils.gen_entries(parsed)

    return stream, skip
コード例 #2
0
ファイル: fetchsitefeed.py プロジェクト: zmyer/riko
def parser(_, objconf, skip, **kwargs):
    """ Parses the pipe content

    Args:
        _ (None): Ignored
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content
        kwargs (dict): Keyword arguments

    Kwargs:
        stream (dict): The original item

    Returns:
        Tuple(Iter[dict], bool): Tuple of (stream, skip)

    Examples:
        >>> from riko import get_path
        >>> from riko.lib.utils import Objectify
        >>>
        >>> objconf = Objectify({'url': get_path('bbc.html')})
        >>> result, skip = parser(None, objconf, False, stream={})
        >>> next(result)['title'] == 'Using NFC tags in the car'
        True
    """
    if skip:
        stream = kwargs['stream']
    else:
        url = utils.get_abspath(objconf.url)
        rss = autorss.get_rss(url)
        link = utils.get_abspath(next(rss)['link'])
        parsed = utils.parse_rss(link)
        stream = utils.gen_entries(parsed)

    return stream, skip
コード例 #3
0
ファイル: fetch.py プロジェクト: Fuzzwah/riko
def parser(_, objconf, skip, **kwargs):
    """ Parses the pipe content

    Args:
        _ (None): Ignored
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content
        kwargs (dict): Keyword arguments

    Kwargs:
        stream (dict): The original item
        conf (dict): The pipe configuration

    Returns:
        Tuple(Iter[dict], bool): Tuple of (stream, skip)

    Examples:
        >>> from riko import get_path
        >>> from riko.lib.utils import Objectify
        >>>
        >>> objconf = Objectify({'url': get_path('feed.xml'), 'sleep': 0})
        >>> result, skip = parser(None, objconf, False, stream={})
        >>> next(result)['title'] == 'Donations'
        True
    """
    if skip:
        stream = kwargs['stream']
    else:
        url = utils.get_abspath(objconf.url)
        parsed = utils.parse_rss(url, objconf.sleep)
        stream = utils.gen_entries(parsed)

    return stream, skip
コード例 #4
0
def parser(_, objconf, skip, **kwargs):
    """ Parses the pipe content

    Args:
        _ (None): Ignored
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content

    Returns:
        Tuple(Iter[dict], bool): Tuple of (stream, skip)

    Examples:
        >>> from riko import get_path
        >>> from riko.lib.utils import Objectify
        >>>
        >>> url = get_path('spreadsheet.csv')
        >>> conf = {'url': url, 'sanitize': True, 'skip_rows': 0}
        >>> objconf = Objectify(conf)
        >>> result, skip = parser(None, objconf, False, stream={})
        >>> next(result)['mileage'] == '7213'
        True
    """
    if skip:
        stream = kwargs['stream']
    else:
        url = utils.get_abspath(objconf.url)
        first_row, custom_header = objconf.skip_rows, objconf.col_names
        renamed = {'first_row': first_row, 'custom_header': custom_header}
        response = urlopen(url)
        encoding = utils.get_response_encoding(response, objconf.encoding)
        rkwargs = utils.combine_dicts(objconf, renamed)
        rkwargs['encoding'] = encoding
        stream = read_csv(response, **rkwargs)

    return stream, skip
コード例 #5
0
ファイル: csv.py プロジェクト: Fuzzwah/riko
def parser(_, objconf, skip, **kwargs):
    """ Parses the pipe content

    Args:
        _ (None): Ignored
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content

    Returns:
        Tuple(Iter[dict], bool): Tuple of (stream, skip)

    Examples:
        >>> from riko import get_path
        >>> from riko.lib.utils import Objectify
        >>>
        >>> url = get_path('spreadsheet.csv')
        >>> conf = {'url': url, 'sanitize': True, 'skip_rows': 0}
        >>> objconf = Objectify(conf)
        >>> result, skip = parser(None, objconf, False, stream={})
        >>> next(result)['mileage'] == '7213'
        True
    """
    if skip:
        stream = kwargs['stream']
    else:
        url = utils.get_abspath(objconf.url)
        first_row, custom_header = objconf.skip_rows, objconf.col_names
        renamed = {'first_row': first_row, 'custom_header': custom_header}
        response = urlopen(url)
        encoding = utils.get_response_encoding(response, objconf.encoding)
        rkwargs = utils.combine_dicts(objconf, renamed)
        rkwargs['encoding'] = encoding
        stream = read_csv(response, **rkwargs)

    return stream, skip
コード例 #6
0
ファイル: feedautodiscovery.py プロジェクト: Fuzzwah/riko
def parser(_, objconf, skip, **kwargs):
    """ Parses the pipe content

    Args:
        _ (None): Ignored
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content
        kwargs (dict): Keyword arguments

    Kwargs:
        stream (dict): The original item

    Returns:
        Tuple(Iter[dict], bool): Tuple of (stream, skip)

    Examples:
        >>> from riko import get_path
        >>> from riko.lib.utils import Objectify
        >>>
        >>> objconf = Objectify({'url': get_path('bbc.html')})
        >>> result, skip = parser(None, objconf, False, stream={})
        >>> next(result)['link'] == 'file://riko/data/greenhughes.xml'
        True
    """
    if skip:
        stream = kwargs['stream']
    else:
        url = utils.get_abspath(objconf.url)
        stream = autorss.get_rss(url)

    return stream, skip
コード例 #7
0
def parser(_, objconf, skip, **kwargs):
    """ Parses the pipe content

    Args:
        _ (None): Ignored
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content
        kwargs (dict): Keyword arguments

    Kwargs:
        stream (dict): The original item
        conf (dict): The pipe configuration

    Returns:
        Tuple(Iter[dict], bool): Tuple of (stream, skip)

    Examples:
        >>> from riko import get_path
        >>> from riko.lib.utils import Objectify
        >>>
        >>> objconf = Objectify({'url': get_path('feed.xml'), 'sleep': 0})
        >>> result, skip = parser(None, objconf, False, stream={})
        >>> next(result)['title'] == 'Donations'
        True
    """
    if skip:
        stream = kwargs['stream']
    else:
        url = utils.get_abspath(objconf.url)
        parsed = utils.parse_rss(url, objconf.sleep)
        stream = utils.gen_entries(parsed)

    return stream, skip
コード例 #8
0
def parser(_, objconf, skip, **kwargs):
    """ Parses the pipe content

    Args:
        _ (None): Ignored
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content
        kwargs (dict): Keyword arguments

    Kwargs:
        stream (dict): The original item

    Returns:
        Tuple(Iter[dict], bool): Tuple of (stream, skip)

    Examples:
        >>> from riko import get_path
        >>> from riko.lib.utils import Objectify
        >>>
        >>> url = get_path('gigs.json')
        >>> objconf = Objectify({'url': url, 'path': 'value.items'})
        >>> result, skip = parser(None, objconf, False, stream={})
        >>> result[0]['title'] == 'Business System Analyst'
        True
    """
    if skip:
        stream = kwargs['stream']
    else:
        url = utils.get_abspath(objconf.url)
        ext = splitext(url)[1].lstrip('.')

        with closing(urlopen(url)) as f:
            stream = utils.any2dict(f, ext, objconf.html5, path=objconf.path)

    return stream, skip
コード例 #9
0
ファイル: fetchsitefeed.py プロジェクト: Fuzzwah/riko
def async_parser(_, objconf, skip, **kwargs):
    """ Asynchronously parses the pipe content

    Args:
        _ (None): Ignored
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content
        kwargs (dict): Keyword arguments

    Kwargs:
        stream (dict): The original item

    Returns:
        Tuple(Iter[dict], bool): Tuple of (stream, skip)

    Examples:
        >>> from riko import get_path
        >>> from riko.bado import react
        >>> from riko.bado.mock import FakeReactor
        >>> from riko.lib.utils import Objectify
        >>>
        >>> def run(reactor):
        ...     callback = lambda x: print(next(x[0])['title'])
        ...     objconf = Objectify({'url': get_path('bbc.html')})
        ...     d = async_parser(None, objconf, False, stream={})
        ...     return d.addCallbacks(callback, logger.error)
        >>>
        >>> try:
        ...     react(run, _reactor=FakeReactor())
        ... except SystemExit:
        ...     pass
        ...
        Using NFC tags in the car
    """
    if skip:
        stream = kwargs['stream']
    else:
        url = utils.get_abspath(objconf.url)
        rss = yield autorss.asyncGetRSS(url)
        link = utils.get_abspath(next(rss)['link'])
        content = yield io.async_url_read(link)
        parsed = utils.parse_rss(content)
        stream = utils.gen_entries(parsed)

    result = (stream, skip)
    return_value(result)
コード例 #10
0
ファイル: fetchsitefeed.py プロジェクト: zmyer/riko
def async_parser(_, objconf, skip, **kwargs):
    """ Asynchronously parses the pipe content

    Args:
        _ (None): Ignored
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content
        kwargs (dict): Keyword arguments

    Kwargs:
        stream (dict): The original item

    Returns:
        Tuple(Iter[dict], bool): Tuple of (stream, skip)

    Examples:
        >>> from riko import get_path
        >>> from riko.bado import react
        >>> from riko.bado.mock import FakeReactor
        >>> from riko.lib.utils import Objectify
        >>>
        >>> def run(reactor):
        ...     callback = lambda x: print(next(x[0])['title'])
        ...     objconf = Objectify({'url': get_path('bbc.html')})
        ...     d = async_parser(None, objconf, False, stream={})
        ...     return d.addCallbacks(callback, logger.error)
        >>>
        >>> try:
        ...     react(run, _reactor=FakeReactor())
        ... except SystemExit:
        ...     pass
        ...
        Using NFC tags in the car
    """
    if skip:
        stream = kwargs['stream']
    else:
        url = utils.get_abspath(objconf.url)
        rss = yield autorss.asyncGetRSS(url)
        link = utils.get_abspath(next(rss)['link'])
        content = yield io.async_url_read(link)
        parsed = utils.parse_rss(content)
        stream = utils.gen_entries(parsed)

    result = (stream, skip)
    return_value(result)
コード例 #11
0
def async_parser(base, objconf, skip, **kwargs):
    """ Asynchronously parses the pipe content

    Args:
        base (str): The base currency (exchanging from)
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content
        kwargs (dict): Keyword arguments

    Kwargs:
        assign (str): Attribute to assign parsed content (default: exchangerate)
        stream (dict): The original item

    Returns:
        Deferred: twisted.internet.defer.Deferred Tuple of (item, skip)

    Examples:
        >>> from riko import get_path
        >>> from riko.bado import react
        >>> from riko.bado.mock import FakeReactor
        >>> from riko.lib.utils import Objectify
        >>>
        >>> def run(reactor):
        ...     callback = lambda x: print(x[0])
        ...     url = get_path('quote.json')
        ...     conf = {
        ...         'url': url, 'currency': 'USD', 'sleep': 0, 'precision': 6}
        ...     item = {'content': 'GBP'}
        ...     objconf = Objectify(conf)
        ...     kwargs = {'stream': item, 'assign': 'content'}
        ...     d = async_parser(item['content'], objconf, False, **kwargs)
        ...     return d.addCallbacks(callback, logger.error)
        >>>
        >>> try:
        ...     react(run, _reactor=FakeReactor())
        ... except SystemExit:
        ...     pass
        ...
        1.545801
    """
    if skip:
        rate = kwargs['stream']
    elif objconf.url.startswith('http'):
        r = yield treq.get(objconf.url, params=objconf.params)
        json = yield treq.json(r)
    else:
        url = utils.get_abspath(objconf.url)
        content = yield io.async_url_read(url, delay=objconf.sleep)
        json = loads(decode(content))

    if not skip:
        places = Decimal(10) ** -objconf.precision
        rates = parse_response(json)
        rate = calc_rate(base, objconf.currency, rates, places=places)

    result = (rate, skip)
    return_value(result)
コード例 #12
0
def parser(base, objconf, skip, **kwargs):
    """ Parses the pipe content

    Args:
        base (str): The base currency (exchanging from)
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content
        kwargs (dict): Keyword arguments

    Kwargs:
        assign (str): Attribute to assign parsed content (default: exchangerate)
        stream (dict): The original item

    Returns:
        Tuple(dict, bool): Tuple of (item, skip)

    Examples:
        >>> from riko import get_path
        >>> from riko.lib.utils import Objectify
        >>>
        >>> url = get_path('quote.json')
        >>> conf = {'url': url, 'currency': 'USD', 'sleep': 0, 'precision': 6}
        >>> item = {'content': 'GBP'}
        >>> objconf = Objectify(conf)
        >>> kwargs = {'stream': item, 'assign': 'content'}
        >>> result, skip = parser(item['content'], objconf, False, **kwargs)
        >>> result
        Decimal('1.545801')
    """
    if skip:
        rate = kwargs['stream']
    elif objconf.url.startswith('http'):
        get = partial(requests.get, stream=True)
        sget = utils.memoize(utils.HALF_DAY)(get) if objconf.memoize else get
        r = sget(objconf.url, params=objconf.params)
        json = next(items(r.raw, ''))
    else:
        context = utils.SleepyDict(delay=objconf.sleep)
        url = utils.get_abspath(objconf.url)

    try:
        with closing(urlopen(url, context=context)) as f:
            json = next(items(f, ''))
    except TypeError:
        with closing(urlopen(url)) as f:
            json = next(items(f, ''))

    if not skip:
        places = Decimal(10) ** -objconf.precision
        rates = parse_response(json)
        rate = calc_rate(base, objconf.currency, rates, places=places)

    return rate, skip
コード例 #13
0
ファイル: fetchpage.py プロジェクト: Fuzzwah/riko
def async_parser(_, objconf, skip, **kwargs):
    """ Asynchronously parses the pipe content

    Args:
        _ (None): Ignored
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content
        kwargs (dict): Keyword arguments

    Kwargs:
        assign (str): Attribute to assign parsed content (default: content)
        stream (dict): The original item

    Returns:
        Tuple(Iter[dict], bool): Tuple of (stream, skip)

    Examples:
        >>> from riko import get_path
        >>> from riko.bado import react
        >>> from riko.bado.mock import FakeReactor
        >>> from riko.lib.utils import Objectify
        >>> from meza._compat import decode
        >>>
        >>> def run(reactor):
        ...     callback = lambda x: print(decode(next(x[0])['content'][:32]))
        ...     url = get_path('cnn.html')
        ...     conf = {'url': url, 'start': '<title>', 'end': '</title>'}
        ...     objconf = Objectify(conf)
        ...     kwargs = {'stream': {}, 'assign': 'content'}
        ...     d = async_parser(None, objconf, False, **kwargs)
        ...     return d.addCallbacks(callback, logger.error)
        >>>
        >>> try:
        ...     react(run, _reactor=FakeReactor())
        ... except SystemExit:
        ...     pass
        ...
        CNN.com International - Breaking
    """
    if skip:
        stream = kwargs['stream']
    else:
        url = utils.get_abspath(objconf.url)
        content = yield io.async_url_read(url)
        parsed = get_string(content, objconf.start, objconf.end)
        detagged = get_text(parsed) if objconf.detag else parsed
        splits = detagged.split(objconf.token) if objconf.token else [detagged]
        stream = ({kwargs['assign']: chunk} for chunk in splits)

    result = (stream, skip)
    return_value(result)
コード例 #14
0
ファイル: fetchpage.py プロジェクト: zmyer/riko
def async_parser(_, objconf, skip, **kwargs):
    """ Asynchronously parses the pipe content

    Args:
        _ (None): Ignored
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content
        kwargs (dict): Keyword arguments

    Kwargs:
        assign (str): Attribute to assign parsed content (default: content)
        stream (dict): The original item

    Returns:
        Tuple(Iter[dict], bool): Tuple of (stream, skip)

    Examples:
        >>> from riko import get_path
        >>> from riko.bado import react
        >>> from riko.bado.mock import FakeReactor
        >>> from riko.lib.utils import Objectify
        >>> from meza._compat import decode
        >>>
        >>> def run(reactor):
        ...     callback = lambda x: print(decode(next(x[0])['content'][:32]))
        ...     url = get_path('cnn.html')
        ...     conf = {'url': url, 'start': '<title>', 'end': '</title>'}
        ...     objconf = Objectify(conf)
        ...     kwargs = {'stream': {}, 'assign': 'content'}
        ...     d = async_parser(None, objconf, False, **kwargs)
        ...     return d.addCallbacks(callback, logger.error)
        >>>
        >>> try:
        ...     react(run, _reactor=FakeReactor())
        ... except SystemExit:
        ...     pass
        ...
        CNN.com International - Breaking
    """
    if skip:
        stream = kwargs['stream']
    else:
        url = utils.get_abspath(objconf.url)
        content = yield io.async_url_read(url)
        parsed = get_string(content, objconf.start, objconf.end)
        detagged = get_text(parsed) if objconf.detag else parsed
        splits = detagged.split(objconf.token) if objconf.token else [detagged]
        stream = ({kwargs['assign']: chunk} for chunk in splits)

    result = (stream, skip)
    return_value(result)
コード例 #15
0
def async_parser(_, objconf, skip, **kwargs):
    """ Asynchronously parses the pipe content

    Args:
        _ (None): Ignored
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content
        kwargs (dict): Keyword arguments

    Kwargs:
        stream (dict): The original item

    Returns:
        Tuple(Iter[dict], bool): Tuple of (stream, skip)

    Examples:
        >>> from riko import get_path
        >>> from riko.bado import react
        >>> from riko.bado.mock import FakeReactor
        >>> from riko.lib.utils import Objectify
        >>>
        >>> def run(reactor):
        ...     callback = lambda x: print(next(x[0])['mileage'])
        ...     url = get_path('spreadsheet.csv')
        ...     conf = {'url': url, 'sanitize': True, 'skip_rows': 0}
        ...     objconf = Objectify(conf)
        ...     d = async_parser(None, objconf, False, stream={})
        ...     return d.addCallbacks(callback, logger.error)
        >>>
        >>> try:
        ...     react(run, _reactor=FakeReactor())
        ... except SystemExit:
        ...     pass
        ...
        7213
    """
    if skip:
        stream = kwargs['stream']
    else:
        # TODO: write function to extract encoding from response
        url = utils.get_abspath(objconf.url)
        response = yield io.async_url_open(url)
        first_row, custom_header = objconf.skip_rows, objconf.col_names
        renamed = {'first_row': first_row, 'custom_header': custom_header}
        rkwargs = utils.combine_dicts(objconf, renamed)
        rkwargs['encoding'] = objconf.encoding
        stream = read_csv(response, **rkwargs)

    result = (stream, skip)
    return_value(result)
コード例 #16
0
ファイル: csv.py プロジェクト: Fuzzwah/riko
def async_parser(_, objconf, skip, **kwargs):
    """ Asynchronously parses the pipe content

    Args:
        _ (None): Ignored
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content
        kwargs (dict): Keyword arguments

    Kwargs:
        stream (dict): The original item

    Returns:
        Tuple(Iter[dict], bool): Tuple of (stream, skip)

    Examples:
        >>> from riko import get_path
        >>> from riko.bado import react
        >>> from riko.bado.mock import FakeReactor
        >>> from riko.lib.utils import Objectify
        >>>
        >>> def run(reactor):
        ...     callback = lambda x: print(next(x[0])['mileage'])
        ...     url = get_path('spreadsheet.csv')
        ...     conf = {'url': url, 'sanitize': True, 'skip_rows': 0}
        ...     objconf = Objectify(conf)
        ...     d = async_parser(None, objconf, False, stream={})
        ...     return d.addCallbacks(callback, logger.error)
        >>>
        >>> try:
        ...     react(run, _reactor=FakeReactor())
        ... except SystemExit:
        ...     pass
        ...
        7213
    """
    if skip:
        stream = kwargs['stream']
    else:
        # TODO: write function to extract encoding from response
        url = utils.get_abspath(objconf.url)
        response = yield io.async_url_open(url)
        first_row, custom_header = objconf.skip_rows, objconf.col_names
        renamed = {'first_row': first_row, 'custom_header': custom_header}
        rkwargs = utils.combine_dicts(objconf, renamed)
        rkwargs['encoding'] = objconf.encoding
        stream = read_csv(response, **rkwargs)

    result = (stream, skip)
    return_value(result)
コード例 #17
0
def async_parser(_, objconf, skip, **kwargs):
    """ Asynchronously parses the pipe content

    Args:
        _ (None): Ignored
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content
        kwargs (dict): Keyword arguments

    Kwargs:
        stream (dict): The original item

    Returns:
        Tuple(Iter[dict], bool): Tuple of (stream, skip)

    Examples:
        >>> from riko import get_path
        >>> from riko.bado import react
        >>> from riko.bado.mock import FakeReactor
        >>> from riko.lib.utils import Objectify
        >>>
        >>> def run(reactor):
        ...     callback = lambda x: print(x[0][0]['title'])
        ...     url = get_path('gigs.json')
        ...     objconf = Objectify({'url': url, 'path': 'value.items'})
        ...     d = async_parser(None, objconf, False, stream={})
        ...     return d.addCallbacks(callback, logger.error)
        >>>
        >>> try:
        ...     react(run, _reactor=FakeReactor())
        ... except SystemExit:
        ...     pass
        ...
        Business System Analyst
    """
    if skip:
        stream = kwargs['stream']
    else:
        url = utils.get_abspath(objconf.url)
        ext = splitext(url)[1].lstrip('.')
        f = yield io.async_url_open(url)
        stream = utils.any2dict(f, ext, objconf.html5, path=objconf.path)
        f.close()

    result = (stream, skip)
    return_value(result)
コード例 #18
0
ファイル: fetch.py プロジェクト: Fuzzwah/riko
def async_parser(_, objconf, skip, **kwargs):
    """ Asynchronously parses the pipe content

    Args:
        _ (None): Ignored
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content
        kwargs (dict): Keyword arguments

    Kwargs:
        stream (dict): The original item
        conf (dict): The pipe configuration

    Returns:
        Deferred: twisted.internet.defer.Deferred Tuple(Iter[dict], bool)

    Examples:
        >>> from riko import get_path
        >>> from riko.bado import react
        >>> from riko.bado.mock import FakeReactor
        >>> from riko.lib.utils import Objectify
        >>>
        >>> def run(reactor):
        ...     callback = lambda x: print(next(x[0])['title'])
        ...     objconf = Objectify({'url': get_path('feed.xml'), 'sleep': 0})
        ...     d = async_parser(None, objconf, False, stream={})
        ...     return d.addCallbacks(callback, logger.error)
        >>>
        >>> try:
        ...     react(run, _reactor=FakeReactor())
        ... except SystemExit:
        ...     pass
        ...
        Donations
    """
    if skip:
        stream = kwargs['stream']
    else:
        url = utils.get_abspath(objconf.url)
        content = yield io.async_url_read(url, delay=objconf.sleep)
        parsed = utils.parse_rss(content)
        stream = utils.gen_entries(parsed)

    result = (stream, skip)
    return_value(result)
コード例 #19
0
def async_parser(_, objconf, skip, **kwargs):
    """ Asynchronously parses the pipe content

    Args:
        _ (None): Ignored
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content
        kwargs (dict): Keyword arguments

    Kwargs:
        stream (dict): The original item
        conf (dict): The pipe configuration

    Returns:
        Deferred: twisted.internet.defer.Deferred Tuple(Iter[dict], bool)

    Examples:
        >>> from riko import get_path
        >>> from riko.bado import react
        >>> from riko.bado.mock import FakeReactor
        >>> from riko.lib.utils import Objectify
        >>>
        >>> def run(reactor):
        ...     callback = lambda x: print(next(x[0])['title'])
        ...     objconf = Objectify({'url': get_path('feed.xml'), 'sleep': 0})
        ...     d = async_parser(None, objconf, False, stream={})
        ...     return d.addCallbacks(callback, logger.error)
        >>>
        >>> try:
        ...     react(run, _reactor=FakeReactor())
        ... except SystemExit:
        ...     pass
        ...
        Donations
    """
    if skip:
        stream = kwargs['stream']
    else:
        url = utils.get_abspath(objconf.url)
        content = yield io.async_url_read(url, delay=objconf.sleep)
        parsed = utils.parse_rss(content)
        stream = utils.gen_entries(parsed)

    result = (stream, skip)
    return_value(result)
コード例 #20
0
ファイル: fetchpage.py プロジェクト: zmyer/riko
def parser(_, objconf, skip, **kwargs):
    """ Parses the pipe content

    Args:
        _ (None): Ignored
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content

    Returns:
        Tuple(Iter[dict], bool): Tuple of (stream, skip)

    Examples:
        >>> from riko.lib.utils import Objectify
        >>> from riko import get_path
        >>> from meza._compat import decode
        >>>
        >>> url = get_path('cnn.html')
        >>> conf = {'url': url, 'start': '<title>', 'end': '</title>'}
        >>> objconf = Objectify(conf)
        >>> kwargs = {'stream': {}, 'assign': 'content'}
        >>> result, skip = parser(None, objconf, False, **kwargs)
        >>> resp = next(result)['content'][:21]
        >>> decode(resp) == 'CNN.com International'
        True
    """
    if skip:
        stream = kwargs['stream']
    else:
        url = utils.get_abspath(objconf.url)

        with closing(urlopen(url)) as response:
            f = response.fp
            encoding = utils.get_response_encoding(response, 'utf-8')
            decoded = iterdecode(f, encoding)
            sliced = utils.betwix(decoded, objconf.start, objconf.end, True)
            content = '\n'.join(sliced)

        parsed = get_string(content, objconf.start, objconf.end)
        detagged = get_text(parsed) if objconf.detag else parsed
        splits = detagged.split(objconf.token) if objconf.token else [detagged]
        stream = ({kwargs['assign']: chunk} for chunk in splits)

    return stream, skip
コード例 #21
0
ファイル: fetchpage.py プロジェクト: Fuzzwah/riko
def parser(_, objconf, skip, **kwargs):
    """ Parses the pipe content

    Args:
        _ (None): Ignored
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content

    Returns:
        Tuple(Iter[dict], bool): Tuple of (stream, skip)

    Examples:
        >>> from riko.lib.utils import Objectify
        >>> from riko import get_path
        >>> from meza._compat import decode
        >>>
        >>> url = get_path('cnn.html')
        >>> conf = {'url': url, 'start': '<title>', 'end': '</title>'}
        >>> objconf = Objectify(conf)
        >>> kwargs = {'stream': {}, 'assign': 'content'}
        >>> result, skip = parser(None, objconf, False, **kwargs)
        >>> resp = next(result)['content'][:21]
        >>> decode(resp) == 'CNN.com International'
        True
    """
    if skip:
        stream = kwargs['stream']
    else:
        url = utils.get_abspath(objconf.url)

        with closing(urlopen(url)) as response:
            f = response.fp
            encoding = utils.get_response_encoding(response, 'utf-8')
            decoded = iterdecode(f, encoding)
            sliced = utils.betwix(decoded, objconf.start, objconf.end, True)
            content = '\n'.join(sliced)

        parsed = get_string(content, objconf.start, objconf.end)
        detagged = get_text(parsed) if objconf.detag else parsed
        splits = detagged.split(objconf.token) if objconf.token else [detagged]
        stream = ({kwargs['assign']: chunk} for chunk in splits)

    return stream, skip
コード例 #22
0
ファイル: xpathfetchpage.py プロジェクト: zmyer/riko
def parser(_, objconf, skip, **kwargs):
    """ Parses the pipe content

    Args:
        _ (None): Ignored
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content

    Returns:
        Tuple(Iter[dict], bool): Tuple of (stream, skip)

    Examples:
        >>> from riko.lib.utils import Objectify
        >>> from riko import get_path
        >>>
        >>> url = get_path('ouseful.xml')
        >>> objconf = Objectify({'url': url, 'xpath': '/rss/channel/item'})
        >>> result, skip = parser(None, objconf, False, stream={})
        >>> title = 'Running “Native” Data Wrangling Applications'
        >>> next(result)['title'][:44] == title
        True
    """
    if skip:
        stream = kwargs['stream']
    else:
        url = utils.get_abspath(objconf.url)
        ext = splitext(url)[1].lstrip('.')
        xml = (ext == 'xml') or objconf.strict

        with closing(urlopen(url)) as f:
            root = utils.xml2etree(f, xml=xml, html5=objconf.html5).getroot()
            elements = utils.xpath(root, objconf.xpath)

        items = map(utils.etree2dict, elements)
        stringified = ({kwargs['assign']: str(i)} for i in items)
        stream = stringified if objconf.stringify else items

    return stream, skip
コード例 #23
0
ファイル: xpathfetchpage.py プロジェクト: zmyer/riko
def async_parser(_, objconf, skip, **kwargs):
    """ Asynchronously parses the pipe content

    Args:
        _ (None): Ignored
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content
        kwargs (dict): Keyword arguments

    Kwargs:
        assign (str): Attribute to assign parsed content (default: content)
        stream (dict): The original item

    Returns:
        Tuple(Iter[dict], bool): Tuple of (stream, skip)

    Examples:
        >>> from riko import get_path
        >>> from riko.bado import react
        >>> from riko.bado.mock import FakeReactor
        >>> from riko.lib.utils import Objectify
        >>>
        >>> @coroutine
        ... def run(reactor):
        ...     xml_url = get_path('ouseful.xml')
        ...     xml_conf = {'url': xml_url, 'xpath': '/rss/channel/item'}
        ...     xml_objconf = Objectify(xml_conf)
        ...     xml_args = (None, xml_objconf, False)
        ...     html_url = get_path('sciencedaily.html')
        ...     html_conf = {'url': html_url, 'xpath': '/html/head/title'}
        ...     html_objconf = Objectify(html_conf)
        ...     html_args = (None, html_objconf, False)
        ...     kwargs = {'stream': {}}
        ...
        ...     try:
        ...         xml_stream, _ = yield async_parser(*xml_args, **kwargs)
        ...         html_stream, _ = yield async_parser(*html_args, **kwargs)
        ...         print(next(xml_stream)['title'][:44])
        ...         print(next(html_stream))
        ...     except Exception as e:
        ...         logger.error(e)
        ...         logger.error(traceback.format_exc())
        ...
        >>>
        >>> try:
        ...     react(run, _reactor=FakeReactor())
        ... except SystemExit:
        ...     pass
        ...
        Running “Native” Data Wrangling Applications
        Help Page -- ScienceDaily
    """
    if skip:
        stream = kwargs['stream']
    else:
        url = utils.get_abspath(objconf.url)
        ext = splitext(url)[1].lstrip('.')
        xml = (ext == 'xml') or objconf.strict

        try:
            f = yield io.async_url_open(url)
            tree = yield util.xml2etree(f, xml=xml)
        except Exception as e:
            logger.error(e)
            logger.error(traceback.format_exc())

        elements = utils.xpath(tree, objconf.xpath)
        f.close()
        items = map(util.etree2dict, elements)
        stringified = ({kwargs['assign']: encode(i)} for i in items)
        stream = stringified if objconf.stringify else items

    result = (stream, skip)
    return_value(result)