Exemple #1
0
    def writexml(self, stream, *args, **kwargs):
        if self.raw:
            val = decode(self.nodeValue)
        else:
            v = decode(self.nodeValue)
            v = ' '.join(v.split()) if kwargs.get('strip') else v
            val = escape(v)

        val = encode(val)
        stream.write(val)
Exemple #2
0
    def writexml(self, stream, *args, **kwargs):
        if self.raw:
            val = decode(self.nodeValue)
        else:
            v = decode(self.nodeValue)
            v = ' '.join(v.split()) if kwargs.get('strip') else v
            val = escape(v)

        val = encode(val)
        stream.write(val)
Exemple #3
0
    def open(self, url, **params):
        if url.startswith('http') and params:
            r = requests.get(url, params=params, stream=True)
            r.raw.decode_content = self.decode
            response = r.text if self.cache_type else r.raw
        else:
            try:
                r = urlopen(url, context=self.context, timeout=self.timeout)
            except TypeError:
                r = urlopen(url, timeout=self.timeout)

            text = r.read() if self.cache_type else None

            if self.decode:
                encoding = get_response_encoding(r, self.def_encoding)

                if self.cache_type:
                    response = decode(text, encoding)
                else:
                    response = reencode(r.fp, encoding, decode=True)
            else:
                response = text if self.cache_type else r

        self.r = r
        return response
Exemple #4
0
    def open(self, url, **params):
        if url.startswith('http') and params:
            r = requests.get(url, params=params, stream=True)
            r.raw.decode_content = self.decode
            response = r.text if self.cache_type else r.raw
        else:
            try:
                r = urlopen(url, context=self.context, timeout=self.timeout)
            except TypeError:
                r = urlopen(url, timeout=self.timeout)

            text = r.read() if self.cache_type else None

            if self.decode:
                encoding = get_response_encoding(r, self.def_encoding)

                if text:
                    response = decode(text, encoding)
                else:
                    response = reencode(r.fp, encoding, decode=True)
            else:
                response = text or r

        content_type = get_response_content_type(r)

        if 'xml' in content_type:
            self.ext = 'xml'
        elif 'json' in content_type:
            self.ext = 'json'
        else:
            self.ext = content_type.split('/')[1].split(';')[0]

        self.r = r
        return response
Exemple #5
0
def async_parser(base, objconf, skip=False, **kwargs):
    """ Asynchronously parses the pipe content

    Args:
        base (str): The base currency (exchanging from)
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content
        kwargs (dict): Keyword arguments

    Kwargs:
        assign (str): Attribute to assign parsed content (default: exchangerate)
        stream (dict): The original item

    Returns:
        Deferred: twisted.internet.defer.Deferred item

    Examples:
        >>> from riko import get_path
        >>> from riko.bado import react
        >>> from riko.bado.mock import FakeReactor
        >>> from meza.fntools import Objectify
        >>>
        >>> def run(reactor):
        ...     url = get_path('quote.json')
        ...     conf = {
        ...         'url': url, 'currency': 'USD', 'delay': 0, 'precision': 6}
        ...     item = {'content': 'GBP'}
        ...     objconf = Objectify(conf)
        ...     kwargs = {'stream': item, 'assign': 'content'}
        ...     d = async_parser(item['content'], objconf, **kwargs)
        ...     return d.addCallbacks(print, logger.error)
        >>>
        >>> try:
        ...     react(run, _reactor=FakeReactor())
        ... except SystemExit:
        ...     pass
        ...
        1.275201
    """
    same_currency = base == objconf.currency

    if skip:
        rate = kwargs['stream']
    elif same_currency:
        rate = Decimal(1)
    elif objconf.url.startswith('http'):
        r = yield treq.get(objconf.url, params=objconf.params)
        json = yield treq.json(r)
    else:
        url = get_abspath(objconf.url)
        content = yield io.async_url_read(url, delay=objconf.delay)
        json = loads(decode(content))

    if not (skip or same_currency):
        places = Decimal(10) ** -objconf.precision
        rates = parse_response(json)
        rate = calc_rate(base, objconf.currency, rates, places=places)

    return_value(rate)
Exemple #6
0
def async_parser(base, objconf, skip=False, **kwargs):
    """ Asynchronously parses the pipe content

    Args:
        base (str): The base currency (exchanging from)
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content
        kwargs (dict): Keyword arguments

    Kwargs:
        assign (str): Attribute to assign parsed content (default: exchangerate)
        stream (dict): The original item

    Returns:
        Deferred: twisted.internet.defer.Deferred item

    Examples:
        >>> from riko import get_path
        >>> from riko.bado import react
        >>> from riko.bado.mock import FakeReactor
        >>> from meza.fntools import Objectify
        >>>
        >>> def run(reactor):
        ...     url = get_path('quote.json')
        ...     conf = {
        ...         'url': url, 'currency': 'USD', 'delay': 0, 'precision': 6}
        ...     item = {'content': 'GBP'}
        ...     objconf = Objectify(conf)
        ...     kwargs = {'stream': item, 'assign': 'content'}
        ...     d = async_parser(item['content'], objconf, **kwargs)
        ...     return d.addCallbacks(print, logger.error)
        >>>
        >>> try:
        ...     react(run, _reactor=FakeReactor())
        ... except SystemExit:
        ...     pass
        ...
        1.545801
    """
    same_currency = base == objconf.currency

    if skip:
        rate = kwargs['stream']
    elif same_currency:
        rate = Decimal(1)
    elif objconf.url.startswith('http'):
        r = yield treq.get(objconf.url, params=objconf.params)
        json = yield treq.json(r)
    else:
        url = get_abspath(objconf.url)
        content = yield io.async_url_read(url, delay=objconf.delay)
        json = loads(decode(content))

    if not (skip or same_currency):
        places = Decimal(10) ** -objconf.precision
        rates = parse_response(json)
        rate = calc_rate(base, objconf.currency, rates, places=places)

    return_value(rate)
Exemple #7
0
def get_text(html, convert_charrefs=False):
    try:
        parser = LinkParser(convert_charrefs=convert_charrefs)
    except TypeError:
        parser = LinkParser()

    try:
        parser.feed(html)
    except TypeError:
        parser.feed(decode(html))

    return parser.data.getvalue()
Exemple #8
0
def get_text(html, convert_charrefs=False):
    try:
        parser = LinkParser(convert_charrefs=convert_charrefs)
    except TypeError:
        parser = LinkParser()

    try:
        parser.feed(html)
    except TypeError:
        parser.feed(decode(html))

    return parser.data.getvalue()
Exemple #9
0
def get_abspath(url):
    url = 'http://%s' % url if url and '://' not in url else url

    if url and url.startswith('file:///'):
        # already have an abspath
        pass
    elif url and url.startswith('file://'):
        parent = p.dirname(p.dirname(__file__))
        rel_path = url[7:]
        abspath = p.abspath(p.join(parent, rel_path))
        url = 'file://%s' % abspath

    return decode(url)
Exemple #10
0
def get_abspath(url):
    url = 'http://%s' % url if url and '://' not in url else url

    if url and url.startswith('file:///'):
        # already have an abspath
        pass
    elif url and url.startswith('file://'):
        parent = p.dirname(p.dirname(__file__))
        rel_path = url[7:]
        abspath = p.abspath(p.join(parent, rel_path))
        url = 'file://%s' % abspath

    return decode(url)
Exemple #11
0
def parse_rss(url=None, **kwargs):
    try:
        f = fetch(decode(url), **kwargs)
    except (ValueError, URLError):
        parsed = rssparser.parse(url)
    else:
        content = f.read() if speedparser else f

        try:
            parsed = rssparser.parse(content)
        finally:
            f.close()

    return parsed
Exemple #12
0
def parse_rss(url=None, **kwargs):
    try:
        f = fetch(decode(url), **kwargs)
    except (ValueError, URLError):
        parsed = rssparser.parse(url)
    else:
        content = f.read() if speedparser else f

        try:
            parsed = rssparser.parse(content)
        finally:
            f.close()

    return parsed
Exemple #13
0
def opener(url,
           memoize=False,
           delay=0,
           encoding=ENCODING,
           params=None,
           **kwargs):
    params = params or {}
    timeout = kwargs.get('timeout')
    decode = kwargs.get('decode')

    if url.startswith('http') and params:
        r = requests.get(url, params=params, stream=True)
        r.raw.decode_content = decode
        response = r.text if memoize else r.raw
    else:
        req = Request(url, headers={'User-Agent': default_user_agent()})
        context = SleepyDict(delay=delay) if delay else None

        try:
            r = urlopen(req, context=context, timeout=timeout)
        except TypeError:
            r = urlopen(req, timeout=timeout)
        except HTTPError as e:
            raise URLError(f'{url} returned {e.code}: {e.reason}')
        except URLError as e:
            raise URLError(f'{url}: {e.reason}')

        text = r.read() if memoize else None

        if decode:
            encoding = get_response_encoding(r, encoding)

            if text:
                response = compat.decode(text, encoding)
            else:
                response = reencode(r.fp, encoding, decode=True)
                response.r = r
        else:
            response = text or r

    content_type = get_response_content_type(r)
    return (response, content_type)
Exemple #14
0
    def dataReceived(self, data):
        stateTable = self._build_state_table()
        self.encoding = self.encoding or detect(data)['encoding']
        self.check_encoding(data)
        self.state = self.state or 'begin'
        content = decode(data, self.encoding)

        # bring state, lineno, colno into local scope
        lineno, colno = self.lineno, self.colno
        curState = self.state

        # replace saveMark with a nested scope function
        saveMark = lambda: (lineno, colno)
        self.saveMark, _saveMark = saveMark, self.saveMark

        # fetch functions from the stateTable
        beginFn, doFn, endFn = stateTable[curState]

        try:
            for char in content:
                # do newline stuff
                if char == '\n':
                    lineno += 1
                    colno = 0
                else:
                    colno += 1

                newState = doFn(char)

                if newState and newState != curState:
                    # this is the endFn from the previous state
                    endFn()
                    curState = newState
                    beginFn, doFn, endFn = stateTable[curState]
                    beginFn(char)
        finally:
            self.saveMark = _saveMark
            self.lineno, self.colno = lineno, colno

        # state doesn't make sense if there's an exception..
        self.state = curState
Exemple #15
0
    def dataReceived(self, data):
        stateTable = self._build_state_table()
        self.encoding = self.encoding or detect(data)['encoding']
        self.check_encoding(data)
        self.state = self.state or 'begin'
        content = decode(data, self.encoding)

        # bring state, lineno, colno into local scope
        lineno, colno = self.lineno, self.colno
        curState = self.state

        # replace saveMark with a nested scope function
        saveMark = lambda: (lineno, colno)
        self.saveMark, _saveMark = saveMark, self.saveMark

        # fetch functions from the stateTable
        beginFn, doFn, endFn = stateTable[curState]

        try:
            for char in content:
                # do newline stuff
                if char == '\n':
                    lineno += 1
                    colno = 0
                else:
                    colno += 1

                newState = doFn(char)

                if newState and newState != curState:
                    # this is the endFn from the previous state
                    endFn()
                    curState = newState
                    beginFn, doFn, endFn = stateTable[curState]
                    beginFn(char)
        finally:
            self.saveMark = _saveMark
            self.lineno, self.colno = lineno, colno

        # state doesn't make sense if there's an exception..
        self.state = curState
Exemple #16
0
    def open(self, url, **params):
        if url.startswith('http') and params:
            r = requests.get(url, params=params, stream=True)
            r.raw.decode_content = self.decode
            response = r.text if self.cache_type else r.raw
        else:
            req = Request(url, headers={'User-Agent': default_user_agent()})
            try:
                r = urlopen(req, context=self.context, timeout=self.timeout)
            except TypeError:
                r = urlopen(req, timeout=self.timeout)
            except HTTPError as e:
                msg = '{} returned {}: {}'
                raise URLError(msg.format(url, e.code, e.reason))
            except URLError as e:
                raise URLError('{}: {}'.format(url, e.reason))

            text = r.read() if self.cache_type else None

            if self.decode:
                encoding = get_response_encoding(r, self.def_encoding)

                if text:
                    response = decode(text, encoding)
                else:
                    response = reencode(r.fp, encoding, decode=True)
            else:
                response = text or r

        content_type = get_response_content_type(r)

        if 'xml' in content_type:
            self.ext = 'xml'
        elif 'json' in content_type:
            self.ext = 'json'
        else:
            self.ext = content_type.split('/')[1].split(';')[0]

        self.r = r
        return response
Exemple #17
0
    def open(self, url, **params):
        if url.startswith('http') and params:
            r = requests.get(url, params=params, stream=True)
            r.raw.decode_content = self.decode
            response = r.text if self.cache_type else r.raw
        else:
            try:
                r = urlopen(url, context=self.context, timeout=self.timeout)
            except TypeError:
                r = urlopen(url, timeout=self.timeout)
            except HTTPError as e:
                msg = '{} returned {}: {}'
                raise URLError(msg.format(url, e.code, e.reason))
            except URLError as e:
                raise URLError('{}: {}'.format(url, e.reason))

            text = r.read() if self.cache_type else None

            if self.decode:
                encoding = get_response_encoding(r, self.def_encoding)

                if text:
                    response = decode(text, encoding)
                else:
                    response = reencode(r.fp, encoding, decode=True)
            else:
                response = text or r

        content_type = get_response_content_type(r)

        if 'xml' in content_type:
            self.ext = 'xml'
        elif 'json' in content_type:
            self.ext = 'json'
        else:
            self.ext = content_type.split('/')[1].split(';')[0]

        self.r = r
        return response
Exemple #18
0
def file2entries(f, parser):
    for line in f:
        parser.feed(decode(line))

        for entry in parser.entry:
            yield entry
Exemple #19
0
 def handle_data(self, data):
     self.data.write('%s\n' % decode(data))
Exemple #20
0
def file2entries(f, parser):
    for line in f:
        parser.feed(decode(line))

        for entry in parser.entry:
            yield entry
Exemple #21
0
 def handle_data(self, data):
     self.data.write('%s\n' % decode(data))