コード例 #1
0
ファイル: fetchtext.py プロジェクト: nerevu/riko
def parser(_, objconf, skip=False, **kwargs):
    """ Parses the pipe content

    Args:
        _ (None): Ignored
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content
        kwargs (dict): Keyword arguments

    Kwargs:
        stream (dict): The original item

    Returns:
        Iter[dict]: The stream of items

    Examples:
        >>> from riko import get_path
        >>> from meza.fntools import Objectify
        >>>
        >>> url = get_path('lorem.txt')
        >>> objconf = Objectify({'url': url, 'encoding': ENCODING})
        >>> result = parser(None, objconf, assign='content')
        >>> next(result)['content'] == 'What is Lorem Ipsum?'
        True
    """
    if skip:
        stream = kwargs['stream']
    else:
        f = fetch(decode=True, **objconf)
        _stream = ({kwargs['assign']: line.strip()} for line in f)
        stream = auto_close(_stream, f)

    return stream
コード例 #2
0
ファイル: csv.py プロジェクト: sottom/riko
def parser(_, objconf, skip=False, **kwargs):
    """ Parses the pipe content

    Args:
        _ (None): Ignored
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content

    Returns:
        Iter[dict]: The stream of items

    Examples:
        >>> from riko import get_path
        >>> from meza.fntools import Objectify
        >>>
        >>> url = get_path('spreadsheet.csv')
        >>> conf = {
        ...     'url': url, 'sanitize': True, 'skip_rows': 0,
        ...     'encoding': ENCODING}
        >>> objconf = Objectify(conf)
        >>> result = parser(None, objconf, stream={})
        >>> next(result)['mileage'] == '7213'
        True
    """
    if skip:
        stream = kwargs['stream']
    else:
        first_row, custom_header = objconf.skip_rows, objconf.col_names
        renamed = {'first_row': first_row, 'custom_header': custom_header}

        f = fetch(decode=True, **objconf)
        rkwargs = merge([objconf, renamed])
        stream = auto_close(read_csv(f, **rkwargs), f)

    return stream
コード例 #3
0
ファイル: fetchtext.py プロジェクト: tianhm/riko
def parser(_, objconf, skip=False, **kwargs):
    """ Parses the pipe content

    Args:
        _ (None): Ignored
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content
        kwargs (dict): Keyword arguments

    Kwargs:
        stream (dict): The original item

    Returns:
        Iter[dict]: The stream of items

    Examples:
        >>> from riko import get_path
        >>> from meza.fntools import Objectify
        >>>
        >>> url = get_path('lorem.txt')
        >>> objconf = Objectify({'url': url, 'encoding': ENCODING})
        >>> result = parser(None, objconf, assign='content')
        >>> next(result)['content'] == 'What is Lorem Ipsum?'
        True
    """
    if skip:
        stream = kwargs['stream']
    else:
        f = fetch(decode=True, **objconf)
        _stream = ({kwargs['assign']: line.strip()} for line in f)
        stream = auto_close(_stream, f)

    return stream
コード例 #4
0
ファイル: csv.py プロジェクト: nerevu/riko
def parser(_, objconf, skip=False, **kwargs):
    """ Parses the pipe content

    Args:
        _ (None): Ignored
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content

    Returns:
        Iter[dict]: The stream of items

    Examples:
        >>> from riko import get_path
        >>> from meza.fntools import Objectify
        >>>
        >>> url = get_path('spreadsheet.csv')
        >>> conf = {
        ...     'url': url, 'sanitize': True, 'skip_rows': 0,
        ...     'encoding': ENCODING}
        >>> objconf = Objectify(conf)
        >>> result = parser(None, objconf, stream={})
        >>> next(result)['mileage'] == '7213'
        True
    """
    if skip:
        stream = kwargs['stream']
    else:
        first_row, custom_header = objconf.skip_rows, objconf.col_names
        renamed = {'first_row': first_row, 'custom_header': custom_header}

        f = fetch(decode=True, **objconf)
        rkwargs = merge([objconf, renamed])
        stream = auto_close(read_csv(f, **rkwargs), f)

    return stream
コード例 #5
0
ファイル: exchangerate.py プロジェクト: nerevu/riko
def parser(base, objconf, skip=False, **kwargs):
    """ Parses the pipe content

    Args:
        base (str): The base currency (exchanging from)
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content
        kwargs (dict): Keyword arguments

    Kwargs:
        assign (str): Attribute to assign parsed content (default: exchangerate)
        stream (dict): The original item

    Returns:
        dict: The item

    Examples:
        >>> from riko import get_path
        >>> from meza.fntools import Objectify
        >>>
        >>> url = get_path('quote.json')
        >>> conf = {'url': url, 'currency': 'USD', 'delay': 0, 'precision': 6}
        >>> item = {'content': 'GBP'}
        >>> objconf = Objectify(conf)
        >>> kwargs = {'stream': item, 'assign': 'content'}
        >>> parser(item['content'], objconf, **kwargs)
        Decimal('1.275201')
    """
    same_currency = base == objconf.currency

    if skip:
        rate = kwargs['stream']
    elif same_currency:
        rate = Decimal(1)
    else:
        decode = objconf.url.startswith('http')

        with fetch(decode=decode, **objconf) as f:
            try:
                json = next(items(f, ''))
            except Exception as e:
                f.seek(0)
                logger.error('Error parsing {url}'.format(**objconf))
                logger.debug(f.read())
                logger.error(e)
                logger.error(traceback.format_exc())
                skip = True
                rate = 0

    if not (skip or same_currency):
        places = Decimal(10) ** -objconf.precision
        rates = parse_response(json)
        rate = calc_rate(base, objconf.currency, rates, places=places)

    return rate
コード例 #6
0
ファイル: exchangerate.py プロジェクト: m8e/riko
def parser(base, objconf, skip=False, **kwargs):
    """ Parses the pipe content

    Args:
        base (str): The base currency (exchanging from)
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content
        kwargs (dict): Keyword arguments

    Kwargs:
        assign (str): Attribute to assign parsed content (default: exchangerate)
        stream (dict): The original item

    Returns:
        dict: The item

    Examples:
        >>> from riko import get_path
        >>> from meza.fntools import Objectify
        >>>
        >>> url = get_path('quote.json')
        >>> conf = {'url': url, 'currency': 'USD', 'delay': 0, 'precision': 6}
        >>> item = {'content': 'GBP'}
        >>> objconf = Objectify(conf)
        >>> kwargs = {'stream': item, 'assign': 'content'}
        >>> parser(item['content'], objconf, **kwargs)
        Decimal('1.275201')
    """
    same_currency = base == objconf.currency

    if skip:
        rate = kwargs['stream']
    elif same_currency:
        rate = Decimal(1)
    else:
        decode = objconf.url.startswith('http')

        with fetch(decode=decode, **objconf) as f:
            try:
                json = next(items(f, ''))
            except Exception as e:
                f.seek(0)
                logger.error('Error parsing {url}'.format(**objconf))
                logger.debug(f.read())
                logger.error(e)
                logger.error(traceback.format_exc())
                skip = True
                rate = 0

    if not (skip or same_currency):
        places = Decimal(10)**-objconf.precision
        rates = parse_response(json)
        rate = calc_rate(base, objconf.currency, rates, places=places)

    return rate
コード例 #7
0
ファイル: yql.py プロジェクト: nerevu/riko
def parser(_, objconf, skip=False, **kwargs):
    """ Parses the pipe content

    Args:
        _ (None): Ignored
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content
        kwargs (dict): Keyword arguments

    Kwargs:
        assign (str): Attribute to assign parsed content (default: content)
        stream (dict): The original item

    Returns:
        Iter[dict]: The stream of items

    Examples:
        >>> from riko import get_path
        >>> from riko.utils import get_abspath
        >>> from meza.fntools import Objectify
        >>>
        >>> feed = 'http://feeds.feedburner.com/TechCrunch/'
        >>> url = 'http://query.yahooapis.com/v1/public/yql'
        >>> query = "select * from feed where url='%s'" % feed
        >>> conf = {'query': query, 'url': url, 'debug': False}
        >>> objconf = Objectify(conf)
        >>> url = get_abspath(get_path('yql.xml'))
        >>>
        >>> with fetch(url) as f:
        ...     kwargs = {'stream': {}, 'response': f}
        ...     result = parser(None, objconf, **kwargs)
        >>>
        >>> next(result)['title']
        'Bring pizza home'
    """
    if skip:
        stream = kwargs['stream']
    else:
        f = kwargs.get('response')

        if not f:
            params = {'q': objconf.query, 'diagnostics': objconf.debug}

            if objconf.memoize and not objconf.cache_type:
                objconf.cache_type = 'auto'

            f = fetch(params=params, **objconf)

        # TODO: consider paging for large result sets
        root = xml2etree(f).getroot()
        results = root.find('results')
        stream = map(etree2dict, results)

    return stream
コード例 #8
0
ファイル: yql.py プロジェクト: sottom/riko
def parser(_, objconf, skip=False, **kwargs):
    """ Parses the pipe content

    Args:
        _ (None): Ignored
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content
        kwargs (dict): Keyword arguments

    Kwargs:
        assign (str): Attribute to assign parsed content (default: content)
        stream (dict): The original item

    Returns:
        Iter[dict]: The stream of items

    Examples:
        >>> from riko import get_path
        >>> from riko.utils import get_abspath
        >>> from meza.fntools import Objectify
        >>>
        >>> feed = 'http://feeds.feedburner.com/TechCrunch/'
        >>> url = 'http://query.yahooapis.com/v1/public/yql'
        >>> query = "select * from feed where url='%s'" % feed
        >>> conf = {'query': query, 'url': url, 'debug': False}
        >>> objconf = Objectify(conf)
        >>> url = get_abspath(get_path('yql.xml'))
        >>>
        >>> with fetch(url) as f:
        ...     kwargs = {'stream': {}, 'response': f}
        ...     result = parser(None, objconf, **kwargs)
        >>>
        >>> next(result)['title']
        'Bring pizza home'
    """
    if skip:
        stream = kwargs['stream']
    else:
        f = kwargs.get('response')

        if not f:
            params = {'q': objconf.query, 'diagnostics': objconf.debug}

            if objconf.memoize and not objconf.cache_type:
                objconf.cache_type = 'auto'

            f = fetch(params=params, **objconf)

        # TODO: consider paging for large result sets
        root = xml2etree(f).getroot()
        results = root.find('results')
        stream = map(etree2dict, results)

    return stream
コード例 #9
0
ファイル: autorss.py プロジェクト: nerevu/riko
def get_rss(url, convert_charrefs=False):
    try:
        parser = LinkParser(convert_charrefs=convert_charrefs)
    except TypeError:
        parser = LinkParser()

    try:
        f = fetch(url, timeout=TIMEOUT)
    except ValueError:
        f = filter(None, url.splitlines())

    return file2entries(f, parser)
コード例 #10
0
ファイル: autorss.py プロジェクト: zixuedanxin/riko
def get_rss(url, convert_charrefs=False):
    try:
        parser = LinkParser(convert_charrefs=convert_charrefs)
    except TypeError:
        parser = LinkParser()

    try:
        f = fetch(url, timeout=TIMEOUT)
    except ValueError:
        f = filter(None, url.splitlines())

    return file2entries(f, parser)
コード例 #11
0
ファイル: parsers.py プロジェクト: nerevu/riko
def parse_rss(url=None, **kwargs):
    try:
        f = fetch(decode(url), **kwargs)
    except (ValueError, URLError):
        parsed = rssparser.parse(url)
    else:
        content = f.read() if speedparser else f

        try:
            parsed = rssparser.parse(content)
        finally:
            f.close()

    return parsed
コード例 #12
0
ファイル: parsers.py プロジェクト: mnjstwins/riko
def parse_rss(url=None, **kwargs):
    try:
        f = fetch(decode(url), **kwargs)
    except (ValueError, URLError):
        parsed = rssparser.parse(url)
    else:
        content = f.read() if speedparser else f

        try:
            parsed = rssparser.parse(content)
        finally:
            f.close()

    return parsed
コード例 #13
0
def parser(base, objconf, skip=False, **kwargs):
    """ Parses the pipe content

    Args:
        base (str): The base currency (exchanging from)
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content
        kwargs (dict): Keyword arguments

    Kwargs:
        assign (str): Attribute to assign parsed content (default: exchangerate)
        stream (dict): The original item

    Returns:
        dict: The item

    Examples:
        >>> from riko import get_path
        >>> from meza.fntools import Objectify
        >>>
        >>> url = get_path('quote.json')
        >>> conf = {'url': url, 'currency': 'USD', 'delay': 0, 'precision': 6}
        >>> item = {'content': 'GBP'}
        >>> objconf = Objectify(conf)
        >>> kwargs = {'stream': item, 'assign': 'content'}
        >>> parser(item['content'], objconf, **kwargs)
        Decimal('1.545801')
    """
    same_currency = base == objconf.currency

    if skip:
        rate = kwargs['stream']
    elif same_currency:
        rate = Decimal(1)
    else:
        decode = objconf.url.startswith('http')

        if objconf.memoize and not objconf.cache_type:
            objconf.cache_type = 'auto'

        with fetch(decode=decode, **objconf) as f:
            json = next(items(f, ''))

    if not (skip or same_currency):
        places = Decimal(10)**-objconf.precision
        rates = parse_response(json)
        rate = calc_rate(base, objconf.currency, rates, places=places)

    return rate
コード例 #14
0
ファイル: xpathfetchpage.py プロジェクト: mnjstwins/riko
def parser(_, objconf, skip=False, **kwargs):
    """ Parses the pipe content

    Args:
        _ (None): Ignored
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content

    Returns:
        Iter[dict]: The stream of items

    Examples:
        >>> from meza.fntools import Objectify
        >>> from riko import get_path
        >>>
        >>> url = get_path('ouseful.xml')
        >>> objconf = Objectify({'url': url, 'xpath': '/rss/channel/item'})
        >>> result = parser(None, objconf, stream={})
        >>> title = 'Running “Native” Data Wrangling Applications'
        >>> next(result)['title'][:44] == title
        True
    """
    if skip:
        stream = kwargs['stream']
    else:
        url = get_abspath(objconf.url)
        ext = splitext(url)[1].lstrip('.')
        xml = (ext == 'xml') or objconf.strict

        if objconf.memoize and not objconf.cache_type:
            objconf.cache_type = 'auto'

        with fetch(**objconf) as f:
            root = xml2etree(f, xml=xml, html5=objconf.html5).getroot()
            elements = xpath(root, objconf.xpath)

        items = map(etree2dict, elements)
        stringified = ({kwargs['assign']: str(i)} for i in items)
        stream = stringified if objconf.stringify else items

    return stream
コード例 #15
0
ファイル: fetchpage.py プロジェクト: mnjstwins/riko
def parser(_, objconf, skip=False, **kwargs):
    """ Parses the pipe content

    Args:
        _ (None): Ignored
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content

    Returns:
        Iter[dict]: The stream of items

    Examples:
        >>> from meza.fntools import Objectify
        >>> from riko import get_path
        >>> from meza.compat import decode
        >>>
        >>> url = get_path('cnn.html')
        >>> conf = {'url': url, 'start': '<title>', 'end': '</title>'}
        >>> objconf = Objectify(conf)
        >>> kwargs = {'stream': {}, 'assign': 'content'}
        >>> result = parser(None, objconf, **kwargs)
        >>> resp = next(result)['content'][:21]
        >>> decode(resp) == 'CNN.com International'
        True
    """
    if skip:
        stream = kwargs['stream']
    else:
        if objconf.memoize and not objconf.cache_type:
            objconf.cache_type = 'auto'

        with fetch(decode=True, **objconf) as f:
            sliced = betwix(f, objconf.start, objconf.end, True)
            content = '\n'.join(sliced)

        parsed = get_string(content, objconf.start, objconf.end)
        detagged = get_text(parsed) if objconf.detag else parsed
        splits = detagged.split(objconf.token) if objconf.token else [detagged]
        stream = ({kwargs['assign']: chunk} for chunk in splits)

    return stream
コード例 #16
0
ファイル: fetchpage.py プロジェクト: nerevu/riko
def parser(_, objconf, skip=False, **kwargs):
    """ Parses the pipe content

    Args:
        _ (None): Ignored
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content

    Returns:
        Iter[dict]: The stream of items

    Examples:
        >>> from meza.fntools import Objectify
        >>> from riko import get_path
        >>> from meza.compat import decode
        >>>
        >>> url = get_path('cnn.html')
        >>> conf = {'url': url, 'start': '<title>', 'end': '</title>'}
        >>> objconf = Objectify(conf)
        >>> kwargs = {'stream': {}, 'assign': 'content'}
        >>> result = parser(None, objconf, **kwargs)
        >>> resp = next(result)['content'][:21]
        >>> decode(resp) == 'CNN.com International'
        True
    """
    if skip:
        stream = kwargs['stream']
    else:
        with fetch(decode=True, **objconf) as f:
            sliced = betwix(f, objconf.start, objconf.end, True)
            content = '\n'.join(sliced)

        parsed = get_string(content, objconf.start, objconf.end)
        detagged = get_text(parsed) if objconf.detag else parsed
        splits = detagged.split(objconf.token) if objconf.token else [detagged]
        stream = ({kwargs['assign']: chunk} for chunk in splits)

    return stream
コード例 #17
0
ファイル: fetchdata.py プロジェクト: mnjstwins/riko
def parser(_, objconf, skip=False, **kwargs):
    """ Parses the pipe content

    Args:
        _ (None): Ignored
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content
        kwargs (dict): Keyword arguments

    Kwargs:
        stream (dict): The original item

    Returns:
        Iter[dict]: The stream of items

    Examples:
        >>> from riko import get_path
        >>> from meza.fntools import Objectify
        >>>
        >>> url = get_path('gigs.json')
        >>> objconf = Objectify({'url': url, 'path': 'value.items'})
        >>> result = parser(None, objconf, stream={})
        >>> result[0]['title'] == 'Business System Analyst'
        True
    """
    if skip:
        stream = kwargs['stream']
    else:
        url = get_abspath(objconf.url)
        ext = p.splitext(url)[1].lstrip('.')

        if objconf.memoize and not objconf.cache_type:
            objconf.cache_type = 'auto'

        with fetch(**objconf) as f:
            stream = any2dict(f, ext, objconf.html5, path=objconf.path)

    return stream
コード例 #18
0
ファイル: xpathfetchpage.py プロジェクト: nerevu/riko
def parser(_, objconf, skip=False, **kwargs):
    """ Parses the pipe content

    Args:
        _ (None): Ignored
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content

    Returns:
        Iter[dict]: The stream of items

    Examples:
        >>> from meza.fntools import Objectify
        >>> from riko import get_path
        >>>
        >>> url = get_path('ouseful.xml')
        >>> objconf = Objectify({'url': url, 'xpath': '/rss/channel/item'})
        >>> result = parser(None, objconf, stream={})
        >>> title = 'Running “Native” Data Wrangling Applications'
        >>> next(result)['title'][:44] == title
        True
    """
    if skip:
        stream = kwargs['stream']
    else:
        url = get_abspath(objconf.url)
        ext = splitext(url)[1].lstrip('.')
        xml = (ext == 'xml') or objconf.strict

        with fetch(**objconf) as f:
            root = xml2etree(f, xml=xml, html5=objconf.html5).getroot()
            elements = xpath(root, objconf.xpath)

        items = map(etree2dict, elements)
        stringified = ({kwargs['assign']: str(i)} for i in items)
        stream = stringified if objconf.stringify else items

    return stream
コード例 #19
0
ファイル: fetchdata.py プロジェクト: nerevu/riko
def parser(_, objconf, skip=False, **kwargs):
    """ Parses the pipe content

    Args:
        _ (None): Ignored
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content
        kwargs (dict): Keyword arguments

    Kwargs:
        stream (dict): The original item

    Returns:
        Iter[dict]: The stream of items

    Examples:
        >>> from riko import get_path
        >>> from meza.fntools import Objectify
        >>>
        >>> url = get_path('gigs.json')
        >>> objconf = Objectify({'url': url, 'path': 'value.items'})
        >>> result = parser(None, objconf, stream={})
        >>> result[0]['title'] == 'Business System Analyst'
        True
    """
    if skip:
        stream = kwargs['stream']
    else:
        url = get_abspath(objconf.url)
        ext = p.splitext(url)[1].lstrip('.')

        with fetch(**objconf) as f:
            ext = ext or f.ext
            stream = any2dict(f, ext, objconf.html5, path=objconf.path)

    return stream