Example #1
0
def _retry_read_url(url, retry_count, pause, name):
    """
    Open url (and retry)
    """
    for _ in range(retry_count):
        time.sleep(pause)

        # kludge to close the socket ASAP
        try:
            with urlopen(url) as resp:
                lines = resp.read()
        except _network_error_classes:
            pass
        else:
            rs = read_csv(StringIO(bytes_to_str(lines)), index_col=0,
                          parse_dates=True, na_values='-')[::-1]
            # Yahoo! Finance sometimes does this awesome thing where they
            # return 2 rows for the most recent business day
            if len(rs) > 2 and rs.index[-1] == rs.index[-2]:  # pragma: no cover
                rs = rs[:-1]

            #Get rid of unicode characters in index name.
            try:
                rs.index.name = rs.index.name.decode('unicode_escape').encode('ascii', 'ignore')
            except AttributeError:
                #Python 3 string has no decode method.
                rs.index.name = rs.index.name.encode('ascii', 'ignore').decode()

            return rs

    raise IOError("after %d tries, %s did not "
                  "return a 200 for url %r" % (retry_count, name, url))
Example #2
0
def _retry_read_url(url, retry_count, pause, name):
    for _ in range(retry_count):
        time.sleep(pause)

        # kludge to close the socket ASAP
        try:
            with urlopen(url) as resp:
                lines = resp.read()
        except _network_error_classes:
            pass
        else:
            rs = read_csv(StringIO(bytes_to_str(lines)),
                          index_col=0,
                          parse_dates=True,
                          na_values='-')[::-1]
            # Yahoo! Finance sometimes does this awesome thing where they
            # return 2 rows for the most recent business day
            if len(rs
                   ) > 2 and rs.index[-1] == rs.index[-2]:  # pragma: no cover
                rs = rs[:-1]

            #Get rid of unicode characters in index name.
            try:
                rs.index.name = rs.index.name.decode('unicode_escape').encode(
                    'ascii', 'ignore')
            except AttributeError:
                #Python 3 string has no decode method.
                rs.index.name = rs.index.name.encode('ascii',
                                                     'ignore').decode()

            return rs

    raise IOError("after %d tries, %s did not "
                  "return a 200 for url %r" % (retry_count, name, url))
Example #3
0
def get_dividends_yahoo(sid, start, end):
    # Taken from get_data_yahoo in Pandas library and adjust a single parameter to get dividends
    from pandas.compat import StringIO, bytes_to_str
    from pandas.io.common import urlopen

    start, end = pd.to_datetime(start), pd.to_datetime(end)
    url = ('http://ichart.finance.yahoo.com/table.csv?' + 's=%s' % sid +
           '&a=%s' % (start.month - 1) +
           '&b=%s' % start.day +
           '&c=%s' % start.year +
           '&d=%s' % (end.month - 1) +
           '&e=%s' % end.day +
           '&f=%s' % end.year +
           '&g=v' +  # THE CHANGE
           '&ignore=.csv')

    with urlopen(url) as resp:
        lines = resp.read()
    rs = pd.read_csv(StringIO(bytes_to_str(lines)), index_col=0,
                     parse_dates=True, na_values='-')[::-1]
    # Yahoo! Finance sometimes does this awesome thing where they
    # return 2 rows for the most recent business day
    if len(rs) > 2 and rs.index[-1] == rs.index[-2]:  # pragma: no cover
        rs = rs[:-1]
    return rs
Example #4
0
    def datasets(self):
        if self._datasets is None:
            response = self._requests_get('/ExplorerHandler.ashx?t=marts')
            # format is json, but `Nodes` is not double-quoted
            # Thus, unable to parse by response.json()

            # replace 1st Nodes to "Nodes"
            content = bytes_to_str(response.content)
            content = content.replace(str('Nodes'), str('"Nodes"'), 1)
            import json
            result = json.loads(content)
            nodes = result['Nodes']

            # import html.parser
            # import HTMLParser
            # parser = HTMLParser.HTMLParser()
            def delabel(node):
                # print(node['label'])
                # print(parser.feed(node['label']))
                return node

            nodes = [delabel(n) for n in nodes]
            datasets = pd.DataFrame(result['Nodes'])

            self._datasets = []
            # for dataflow in root.iter(sdmx._STRUCTURE + 'Dataflow'):
            #     name = sdmx._get_english_name(dataflow)
            #     id = dataflow.get('id')
            #     resource = EurostatResource(name=name, id=id)
            #     self._datasets.append(resource)
        return self._datasets
Example #5
0
def read_clipboard(**kwargs):  # pragma: no cover
    """
    Read text from clipboard and pass to read_table. See read_table for the
    full argument list

    Returns
    -------
    parsed : DataFrame
    """
    if kwargs.get('sep') is None and kwargs.get('delim_whitespace') is None:
        kwargs['sep'] = '\s+'
    from pandas.util.clipboard import clipboard_get
    from pandas.io.parsers import read_table
    text = clipboard_get()

    # try to decode (if needed on PY3)
    if compat.PY3:
        try:
            text = compat.bytes_to_str(
                text,
                encoding=(kwargs.get('encoding')
                          or get_option('display.encoding')))
        except:
            pass
    return read_table(StringIO(text), **kwargs)
Example #6
0
File: ins.py Project: x829901/tia
def get_dividends_yahoo(sid, start, end):
    # Taken from get_data_yahoo in Pandas library and adjust a single parameter to get dividends
    from pandas.compat import StringIO, bytes_to_str
    from pandas.io.common import urlopen

    start, end = pd.to_datetime(start), pd.to_datetime(end)
    url = ('http://ichart.finance.yahoo.com/table.csv?' + 's=%s' % sid +
           '&a=%s' % (start.month - 1) +
           '&b=%s' % start.day +
           '&c=%s' % start.year +
           '&d=%s' % (end.month - 1) +
           '&e=%s' % end.day +
           '&f=%s' % end.year +
           '&g=v' +  # THE CHANGE
           '&ignore=.csv')

    with urlopen(url) as resp:
        lines = resp.read()
    rs = pd.read_csv(StringIO(bytes_to_str(lines)), index_col=0,
                     parse_dates=True, na_values='-')[::-1]
    # Yahoo! Finance sometimes does this awesome thing where they
    # return 2 rows for the most recent business day
    if len(rs) > 2 and rs.index[-1] == rs.index[-2]:  # pragma: no cover
        rs = rs[:-1]
    return rs
Example #7
0
def read_clipboard(**kwargs):  # pragma: no cover
    """
    Read text from clipboard and pass to read_table. See read_table for the
    full argument list

    If unspecified, `sep` defaults to '\s+'

    Returns
    -------
    parsed : DataFrame
    """
    if kwargs.get('sep') is None and kwargs.get('delim_whitespace') is None:
        kwargs['sep'] = '\s+'
    from pandas.util.clipboard import clipboard_get
    from pandas.io.parsers import read_table
    text = clipboard_get()

    # try to decode (if needed on PY3)
    if compat.PY3:
        try:
            text = compat.bytes_to_str(
                text, encoding=(kwargs.get('encoding') or
                                get_option('display.encoding'))
            )
        except:
            pass
    return read_table(StringIO(text), **kwargs)
Example #8
0
    def datasets(self):
        if self._datasets is None:
            response = self._requests_get('/ExplorerHandler.ashx?t=marts')
            # format is json, but `Nodes` is not double-quoted
            # Thus, unable to parse by response.json()

            # replace 1st Nodes to "Nodes"
            content = bytes_to_str(response.content)
            content = content.replace(str('Nodes'), str('"Nodes"'), 1)
            import json
            result = json.loads(content)
            nodes = result['Nodes']

            # import html.parser
            # import HTMLParser
            # parser = HTMLParser.HTMLParser()
            def delabel(node):
                # print(node['label'])
                # print(parser.feed(node['label']))
                return node

            nodes = [delabel(n) for n in nodes]
            datasets = pd.DataFrame(result['Nodes'])

            self._datasets = []
            # for dataflow in root.iter(sdmx._STRUCTURE + 'Dataflow'):
            #     name = sdmx._get_english_name(dataflow)
            #     id = dataflow.get('id')
            #     resource = EurostatResource(name=name, id=id)
            #     self._datasets.append(resource)
        return self._datasets
Example #9
0
def read_clipboard(sep=r'\s+', **kwargs):  # pragma: no cover
    r"""
    Read text from clipboard and pass to read_table. See read_table for the
    full argument list

    Parameters
    ----------
    sep : str, default '\s+'.
        A string or regex delimiter. The default of '\s+' denotes
        one or more whitespace characters.

    Returns
    -------
    parsed : DataFrame
    """
    encoding = kwargs.pop('encoding', 'utf-8')

    # only utf-8 is valid for passed value because that's what clipboard
    # supports
    if encoding is not None and encoding.lower().replace('-', '') != 'utf8':
        raise NotImplementedError(
            'reading from clipboard only supports utf-8 encoding')

    from pandas.io.clipboard import clipboard_get
    from pandas.io.parsers import read_table
    text = clipboard_get()

    # try to decode (if needed on PY3)
    # Strange. linux py33 doesn't complain, win py33 does
    if compat.PY3:
        try:
            text = compat.bytes_to_str(
                text, encoding=(kwargs.get('encoding') or
                                get_option('display.encoding'))
            )
        except:
            pass

    # Excel copies into clipboard with \t separation
    # inspect no more then the 10 first lines, if they
    # all contain an equal number (>0) of tabs, infer
    # that this came from excel and set 'sep' accordingly
    lines = text[:10000].split('\n')[:-1][:10]

    # Need to remove leading white space, since read_table
    # accepts:
    #    a  b
    # 0  1  2
    # 1  3  4

    counts = {x.lstrip().count('\t') for x in lines}
    if len(lines) > 1 and len(counts) == 1 and counts.pop() != 0:
        sep = r'\t'

    if sep is None and kwargs.get('delim_whitespace') is None:
        sep = r'\s+'

    return read_table(StringIO(text), sep=sep, **kwargs)
Example #10
0
def read_clipboard(sep='\s+', **kwargs):  # pragma: no cover
    r"""
    Read text from clipboard and pass to read_table. See read_table for the
    full argument list

    Parameters
    ----------
    sep : str, default '\s+'.
        A string or regex delimiter. The default of '\s+' denotes
        one or more whitespace characters.

    Returns
    -------
    parsed : DataFrame
    """
    encoding = kwargs.pop('encoding', 'utf-8')

    # only utf-8 is valid for passed value because that's what clipboard
    # supports
    if encoding is not None and encoding.lower().replace('-', '') != 'utf8':
        raise NotImplementedError(
            'reading from clipboard only supports utf-8 encoding')

    from pandas.io.clipboard import clipboard_get
    from pandas.io.parsers import read_table
    text = clipboard_get()

    # try to decode (if needed on PY3)
    # Strange. linux py33 doesn't complain, win py33 does
    if compat.PY3:
        try:
            text = compat.bytes_to_str(
                text,
                encoding=(kwargs.get('encoding')
                          or get_option('display.encoding')))
        except:
            pass

    # Excel copies into clipboard with \t separation
    # inspect no more then the 10 first lines, if they
    # all contain an equal number (>0) of tabs, infer
    # that this came from excel and set 'sep' accordingly
    lines = text[:10000].split('\n')[:-1][:10]

    # Need to remove leading white space, since read_table
    # accepts:
    #    a  b
    # 0  1  2
    # 1  3  4

    counts = set([x.lstrip().count('\t') for x in lines])
    if len(lines) > 1 and len(counts) == 1 and counts.pop() != 0:
        sep = '\t'

    if sep is None and kwargs.get('delim_whitespace') is None:
        sep = '\s+'

    return read_table(StringIO(text), sep=sep, **kwargs)
Example #11
0
 def _read_url_as_StringIO(self, url, params=None):
     """
     Open url (and retry)
     """
     response = self._get_response(url, params=params)
     out = StringIO()
     if isinstance(response.content, compat.binary_type):
         out.write(bytes_to_str(response.content))
     else:
         out.write(response.content)
     out.seek(0)
     return out
Example #12
0
def test_repr_binary_type():
    import string
    letters = string.ascii_letters
    try:
        raw = bytes(letters, encoding=cf.get_option('display.encoding'))
    except TypeError:
        raw = bytes(letters)
    b = str(compat.bytes_to_str(raw))
    res = printing.pprint_thing(b, quote_strings=True)
    assert res == repr(b)
    res = printing.pprint_thing(b, quote_strings=False)
    assert res == b
 def _read_url_as_StringIO(self, url, params=None):
     """
     Open url (and retry)
     """
     response = self._get_response(url, params=params)
     out = StringIO()
     if isinstance(response.content, compat.binary_type):
         out.write(bytes_to_str(response.content))
     else:
         out.write(response.content)
     out.seek(0)
     return out
Example #14
0
def test_repr_binary_type():
    import string
    letters = string.ascii_letters
    try:
        raw = bytes(letters, encoding=cf.get_option('display.encoding'))
    except TypeError:
        raw = bytes(letters)
    b = str(compat.bytes_to_str(raw))
    res = printing.pprint_thing(b, quote_strings=True)
    assert res == repr(b)
    res = printing.pprint_thing(b, quote_strings=False)
    assert res == b
Example #15
0
def test_repr_binary_type():
    import string
    letters = string.ascii_letters
    btype = compat.binary_type
    try:
        raw = btype(letters, encoding=cf.get_option('display.encoding'))
    except TypeError:
        raw = btype(letters)
    b = compat.text_type(compat.bytes_to_str(raw))
    res = com.pprint_thing(b, quote_strings=True)
    assert_equal(res, repr(b))
    res = com.pprint_thing(b, quote_strings=False)
    assert_equal(res, b)
Example #16
0
    def __next__(self):
        newline = self.mmap.readline()

        # readline returns bytes, not str, but Python's CSV reader
        # expects str, so convert the output to str before continuing
        newline = compat.bytes_to_str(newline)

        # mmap doesn't raise if reading past the allocated
        # data but instead returns an empty string, so raise
        # if that is returned
        if newline == '':
            raise StopIteration
        return newline
Example #17
0
def test_repr_binary_type():
    import string
    letters = string.ascii_letters
    btype = compat.binary_type
    try:
        raw = btype(letters, encoding=cf.get_option('display.encoding'))
    except TypeError:
        raw = btype(letters)
    b = compat.text_type(compat.bytes_to_str(raw))
    res = com.pprint_thing(b, quote_strings=True)
    assert_equal(res, repr(b))
    res = com.pprint_thing(b, quote_strings=False)
    assert_equal(res, b)
Example #18
0
    def __next__(self):
        newline = self.mmap.readline()

        # readline returns bytes, not str, but Python's CSV reader
        # expects str, so convert the output to str before continuing
        newline = compat.bytes_to_str(newline)

        # mmap doesn't raise if reading past the allocated
        # data but instead returns an empty string, so raise
        # if that is returned
        if newline == '':
            raise StopIteration
        return newline
Example #19
0
    def process_http_error(ex):
        # See `BigQuery Troubleshooting Errors <https://cloud.google.com/bigquery/troubleshooting-errors>`__

        status = json.loads(bytes_to_str(ex.content))['error']
        errors = status.get('errors', None)

        if errors:
            for error in errors:
                reason = error['reason']
                message = error['message']

                raise GenericGBQException("Reason: {0}, Message: {1}".format(reason, message))

        raise GenericGBQException(errors)
Example #20
0
    def process_http_error(ex):
        # See `BigQuery Troubleshooting Errors <https://cloud.google.com/bigquery/troubleshooting-errors>`__

        status = json.loads(bytes_to_str(ex.content))['error']
        errors = status.get('errors', None)

        if errors:
            for error in errors:
                reason = error['reason']
                message = error['message']

                raise GenericGBQException("Reason: {0}, Message: {1}".format(reason, message))

        raise GenericGBQException(errors)
Example #21
0
def read_clipboard(**kwargs):  # pragma: no cover
    """
    Read text from clipboard and pass to read_table. See read_table for the
    full argument list

    If unspecified, `sep` defaults to '\s+'

    Returns
    -------
    parsed : DataFrame
    """
    from pandas.util.clipboard import clipboard_get
    from pandas.io.parsers import read_table
    text = clipboard_get()

    # try to decode (if needed on PY3)
    # Strange. linux py33 doesn't complain, win py33 does
    if compat.PY3:
        try:
            text = compat.bytes_to_str(
                text,
                encoding=(kwargs.get('encoding')
                          or get_option('display.encoding')))
        except:
            pass

    # Excel copies into clipboard with \t seperation
    # inspect no more then the 10 first lines, if they
    # all contain an equal number (>0) of tabs, infer
    # that this came from excel and set 'sep' accordingly
    lines = text[:10000].split('\n')[:-1][:10]

    # Need to remove leading white space, since read_table
    # accepts:
    #    a  b
    # 0  1  2
    # 1  3  4

    counts = set([x.lstrip().count('\t') for x in lines])
    if len(lines) > 1 and len(counts) == 1 and counts.pop() != 0:
        kwargs['sep'] = '\t'

    if kwargs.get('sep') is None and kwargs.get('delim_whitespace') is None:
        kwargs['sep'] = '\s+'

    return read_table(StringIO(text), **kwargs)
Example #22
0
def read_clipboard(**kwargs):  # pragma: no cover
    """
    Read text from clipboard and pass to read_table. See read_table for the
    full argument list

    If unspecified, `sep` defaults to '\s+'

    Returns
    -------
    parsed : DataFrame
    """
    from pandas.util.clipboard import clipboard_get
    from pandas.io.parsers import read_table
    text = clipboard_get()

    # try to decode (if needed on PY3)
    # Strange. linux py33 doesn't complain, win py33 does
    if compat.PY3:
        try:
            text = compat.bytes_to_str(
                text, encoding=(kwargs.get('encoding') or
                                get_option('display.encoding'))
            )
        except:
            pass

    # Excel copies into clipboard with \t seperation
    # inspect no more then the 10 first lines, if they
    # all contain an equal number (>0) of tabs, infer
    # that this came from excel and set 'sep' accordingly
    lines = text[:10000].split('\n')[:-1][:10]

    # Need to remove leading white space, since read_table
    # accepts:
    #    a  b
    # 0  1  2
    # 1  3  4

    counts = set([x.lstrip().count('\t') for x in lines])
    if len(lines) > 1 and len(counts) == 1 and counts.pop() != 0:
        kwargs['sep'] = '\t'

    if kwargs.get('sep') is None and kwargs.get('delim_whitespace') is None:
        kwargs['sep'] = '\s+'

    return read_table(StringIO(text), **kwargs)
Example #23
0
 def _read_url_as_StringIO(self, url, params=None):
     """
     Open url (and retry)
     """
     response = self._get_response(url, params=params)
     text = self._sanitize_response(response)
     out = StringIO()
     if len(text) == 0:
         service = self.__class__.__name__
         raise IOError("{} request returned no data; check URL for invalid "
                       "inputs: {}".format(service, self.url))
     if isinstance(text, compat.binary_type):
         out.write(bytes_to_str(text))
     else:
         out.write(text)
     out.seek(0)
     return out
Example #24
0
def _retry_read_url(url, retry_count, pause, name):
    for _ in range(retry_count):
        time.sleep(pause)

        # kludge to close the socket ASAP
        try:
            with urlopen(url) as resp:
                lines = resp.read()
        except _network_error_classes:
            pass
        else:
            rs = read_csv(StringIO(bytes_to_str(lines)), index_col=0, parse_dates=True)[::-1]
            # Yahoo! Finance sometimes does this awesome thing where they
            # return 2 rows for the most recent business day
            if len(rs) > 2 and rs.index[-1] == rs.index[-2]:  # pragma: no cover
                rs = rs[:-1]
            return rs

    raise IOError("after %d tries, %s did not " "return a 200 for url %r" % (retry_count, name, url))
Example #25
0
def _retry_read_url(url, retry_count, pause, name):
    for _ in range(retry_count):
        time.sleep(pause)

        # kludge to close the socket ASAP
        try:
            with urlopen(url) as resp:
                lines = resp.read()
        except _network_error_classes:
            pass
        else:
            rs = read_csv(StringIO(bytes_to_str(lines)), index_col=0,
                          parse_dates=True)[::-1]
            # Yahoo! Finance sometimes does this awesome thing where they
            # return 2 rows for the most recent business day
            if len(rs) > 2 and rs.index[-1] == rs.index[-2]:  # pragma: no cover
                rs = rs[:-1]
            return rs

    raise IOError("after %d tries, %s did not "
                  "return a 200 for url %r" % (retry_count, name, url))
    def _read_url_as_StringIO(self, url, params=None, min=0, errors='ignore'):
        """重写基类同名方法

        根据派生类提供的encoding解析文本
        """
        response = self._get_response(url, params=params)
        text = self._sanitize_response(response)
        out = StringIO()
        if len(text) <= self._read_url_as_StringIO_min_len:
            if self._read_url_as_StringIO_less_min_len:
                service = self.__class__.__name__
                raise IOError("{} request returned no data; check URL for "
                              "invalid inputs: {}".format(service, self.url))
            else:
                return None
        if isinstance(text, compat.binary_type):
            out.write(bytes_to_str(text, encoding=self._encoding))
        else:
            out.write(text)
        out.seek(0)
        return out
Example #27
0
def csv_to_df(text):
    df = pd.read_csv(StringIO(bytes_to_str(text)), index_col=0,
                     parse_dates=True, infer_datetime_format=True,
                     na_values='-')[::-1]

    # Yahoo! Finance sometimes does this awesome thing where they
    # return 2 rows for the most recent business day
    if len(df) > 2 and df.index[-1] == df.index[-2]:  # pragma: no cover
        df = df[:-1]

    # Get rid of unicode characters in index name.
    try:
        df.index.name = df.index.name.decode('unicode_escape').encode('ascii', 'ignore')
    except AttributeError:
        # Python 3 string has no decode method.
        df.index.name = df.index.name.encode('ascii', 'ignore').decode()

    column_renames = {'Adj. Open': 'Adj Open', 'Adj. High': 'Adj High',
                      'Adj. Low': 'Adj Low', 'Adj. Close': 'Adj Close',
                      'Adj. Volume': 'Adj Volume'}
    df.rename(columns=column_renames, inplace=True)
    return df.tz_localize(pytz.UTC)
Example #28
0
    def _read_raw(self, **kwargs):
        if self._raw_content is None:
            response = self._requests_get()
            content_length = response.headers.get("content-length")
            out = StringIO()

            try:
                content_length = int(content_length)
                pb = network.ProgressBar(total=content_length)

                for chunk in response.iter_content(self._chunk_size):
                    if chunk:
                        out.write(chunk)
                        pb.update(self._chunk_size)
                self._raw_content = out
            except Exception as e:
                # print(e)
                # no content_length or any errors
                if isinstance(response.content, binary_type):
                    out.write(bytes_to_str(response.content))
                else:
                    out.write(response.content)
                self._raw_content = out
        return self._raw_content
Example #29
0
    def _read_raw(self, **kwargs):
        if self._raw_content is None:
            response = self._requests_get()
            content_length = response.headers.get('content-length')
            out = StringIO()

            try:
                content_length = int(content_length)
                pb = network.ProgressBar(total=content_length)

                for chunk in response.iter_content(self._chunk_size):
                    if chunk:
                        out.write(chunk)
                        pb.update(self._chunk_size)
                self._raw_content = out
            except Exception as e:
                # print(e)
                # no content_length or any errors
                if isinstance(response.content, binary_type):
                    out.write(bytes_to_str(response.content))
                else:
                    out.write(response.content)
                self._raw_content = out
        return self._raw_content
Example #30
0
def _get_data(symbol, start=None, end=None, retry_count=3, pause=0.001):
    """
    Returns DataFrame of historical corporate actions (dividends and stock
    splits) from symbols, over date range, start to end. All dates in the
    resulting DataFrame correspond with dividend and stock split ex-dates.

    Parameters
    ----------
        sym : string with a single Single stock symbol (ticker).
        start : string, (defaults to '1/1/2010')
                Starting date, timestamp. Parses many different kind of date
                representations (e.g., 'JAN-01-2010', '1/1/10', 'Jan, 1, 1980')
        end : string, (defaults to today)
                Ending date, timestamp. Same format as starting date.
        retry_count : int, default 3
                Number of times to retry query request.
        pause : int, default 0
                Time, in seconds, of the pause between retries.
    """

    start, end = _sanitize_dates(start, end)
    url = (_URL + 's=%s' % symbol + \
                '&a=%s' % (start.month - 1) + \
                '&b=%s' % start.day + \
                '&c=%s' % start.year + \
                '&d=%s' % (end.month - 1) + \
                '&e=%s' % end.day + \
                '&f=%s' % end.year + \
                '&g=v')

    for _ in range(retry_count):
        time.sleep(pause)

        try:
            with urlopen(url) as resp:
                lines = resp.read()
        except _network_error_classes:
            pass
        else:
            actions_index = []
            actions_entries = []

            for line in csv.reader(StringIO(bytes_to_str(lines))):
                # Ignore lines that aren't dividends or splits (Yahoo
                # add a bunch of irrelevant fields.)
                if len(line) != 3 or line[0] not in ('DIVIDEND', 'SPLIT'):
                    continue

                action, date, value = line
                if action == 'DIVIDEND':
                    actions_index.append(to_datetime(date))
                    actions_entries.append({
                        'action': action,
                        'value': float(value)
                    })
                elif action == 'SPLIT' and ':' in value:
                    # Convert the split ratio to a fraction. For example a
                    # 4:1 split expressed as a fraction is 1/4 = 0.25.
                    denominator, numerator = value.split(':', 1)
                    split_fraction = float(numerator) / float(denominator)

                    actions_index.append(to_datetime(date))
                    actions_entries.append({
                        'action': action,
                        'value': split_fraction
                    })

            return DataFrame(actions_entries, index=actions_index)

    raise IOError("after %d tries, Yahoo! did not " \
                                "return a 200 for url %r" % (retry_count, url))
Example #31
0
def read_clipboard(sep=r'\s+', **kwargs):  # pragma: no cover
    r"""
    Read text from clipboard and pass to read_csv. See read_csv for the
    full argument list

    Parameters
    ----------
    sep : str, default '\s+'.
        A string or regex delimiter. The default of '\s+' denotes
        one or more whitespace characters.

    Returns
    -------
    parsed : DataFrame
    """
    encoding = kwargs.pop('encoding', 'utf-8')

    # only utf-8 is valid for passed value because that's what clipboard
    # supports
    if encoding is not None and encoding.lower().replace('-', '') != 'utf8':
        raise NotImplementedError(
            'reading from clipboard only supports utf-8 encoding')

    from pandas.io.clipboard import clipboard_get
    from pandas.io.parsers import read_csv
    text = clipboard_get()

    # try to decode (if needed on PY3)
    # Strange. linux py33 doesn't complain, win py33 does
    if PY3:
        try:
            text = compat.bytes_to_str(
                text, encoding=(kwargs.get('encoding') or
                                get_option('display.encoding'))
            )
        except:
            pass

    # Excel copies into clipboard with \t separation
    # inspect no more then the 10 first lines, if they
    # all contain an equal number (>0) of tabs, infer
    # that this came from excel and set 'sep' accordingly
    lines = text[:10000].split('\n')[:-1][:10]

    # Need to remove leading white space, since read_csv
    # accepts:
    #    a  b
    # 0  1  2
    # 1  3  4

    counts = {x.lstrip().count('\t') for x in lines}
    if len(lines) > 1 and len(counts) == 1 and counts.pop() != 0:
        sep = '\t'

    # Edge case where sep is specified to be None, return to default
    if sep is None and kwargs.get('delim_whitespace') is None:
        sep = r'\s+'

    # Regex separator currently only works with python engine.
    # Default to python if separator is multi-character (regex)
    if len(sep) > 1 and kwargs.get('engine') is None:
        kwargs['engine'] = 'python'
    elif len(sep) > 1 and kwargs.get('engine') == 'c':
        warnings.warn('read_clipboard with regex separator does not work'
                      ' properly with c engine')

    # In PY2, the c table reader first encodes text with UTF-8 but Python
    # table reader uses the format of the passed string. For consistency,
    # encode strings for python engine so that output from python and c
    # engines produce consistent results
    if kwargs.get('engine') == 'python' and PY2:
        text = text.encode('utf-8')

    return read_csv(StringIO(text), sep=sep, **kwargs)
Example #32
0
def read_clipboard(sep=r'\s+', **kwargs):  # pragma: no cover
    r"""
    Read text from clipboard and pass to read_csv. See read_csv for the
    full argument list

    Parameters
    ----------
    sep : str, default '\s+'.
        A string or regex delimiter. The default of '\s+' denotes
        one or more whitespace characters.

    Returns
    -------
    parsed : DataFrame
    """
    encoding = kwargs.pop('encoding', 'utf-8')

    # only utf-8 is valid for passed value because that's what clipboard
    # supports
    if encoding is not None and encoding.lower().replace('-', '') != 'utf8':
        raise NotImplementedError(
            'reading from clipboard only supports utf-8 encoding')

    from pandas.io.clipboard import clipboard_get
    from pandas.io.parsers import read_csv
    text = clipboard_get()

    # try to decode (if needed on PY3)
    # Strange. linux py33 doesn't complain, win py33 does
    if PY3:
        try:
            text = compat.bytes_to_str(
                text,
                encoding=(kwargs.get('encoding')
                          or get_option('display.encoding')))
        except AttributeError:
            pass

    # Excel copies into clipboard with \t separation
    # inspect no more then the 10 first lines, if they
    # all contain an equal number (>0) of tabs, infer
    # that this came from excel and set 'sep' accordingly
    lines = text[:10000].split('\n')[:-1][:10]

    # Need to remove leading white space, since read_csv
    # accepts:
    #    a  b
    # 0  1  2
    # 1  3  4

    counts = {x.lstrip().count('\t') for x in lines}
    if len(lines) > 1 and len(counts) == 1 and counts.pop() != 0:
        sep = '\t'

    # Edge case where sep is specified to be None, return to default
    if sep is None and kwargs.get('delim_whitespace') is None:
        sep = r'\s+'

    # Regex separator currently only works with python engine.
    # Default to python if separator is multi-character (regex)
    if len(sep) > 1 and kwargs.get('engine') is None:
        kwargs['engine'] = 'python'
    elif len(sep) > 1 and kwargs.get('engine') == 'c':
        warnings.warn('read_clipboard with regex separator does not work'
                      ' properly with c engine')

    # In PY2, the c table reader first encodes text with UTF-8 but Python
    # table reader uses the format of the passed string. For consistency,
    # encode strings for python engine so that output from python and c
    # engines produce consistent results
    if kwargs.get('engine') == 'python' and PY2:
        text = text.encode('utf-8')

    return read_csv(StringIO(text), sep=sep, **kwargs)
Example #33
0
def get_data_yahoo_actions(symbol,
                           start=None,
                           end=None,
                           retry_count=3,
                           pause=0.001):
    """
  Returns DataFrame of historical corporate actions (dividends and stock
  splits) from symbols, over date range, start to end.

  Parameters
  ----------
    sym : string with a single Single stock symbol (ticker).
    start : string, (defaults to '1/1/2010')
        Starting date, timestamp. Parses many different kind of date
        representations (e.g., 'JAN-01-2010', '1/1/10', 'Jan, 1, 1980')
    end : string, (defaults to today)
        Ending date, timestamp. Same format as starting date.
    retry_count : int, default 3
        Number of times to retry query request.
    pause : int, default 0
        Time, in seconds, of the pause between retries.
  """

    start, end = _sanitize_dates(start, end)
    url = (_HISTORICAL_YAHOO_ACTIONS_URL + 's=%s' % symbol + '&a=%s' %
           (start.month - 1) + '&b=%s' % start.day + '&c=%s' % start.year +
           '&d=%s' % (end.month - 1) + '&e=%s' % end.day + '&f=%s' % end.year +
           '&g=v')

    for _ in range(retry_count):
        time.sleep(pause)

        try:
            with urlopen(url) as resp:
                lines = resp.read()
        except _network_error_classes:
            pass
        else:
            actions_index = []
            actions_entries = []

            for line in csv.reader(StringIO(bytes_to_str(lines))):
                # Ignore lines that aren't dividends or splits (Yahoo
                # add a bunch of irrelevant fields.)
                if len(line) != 3 or line[0] not in ('DIVIDEND', 'SPLIT'):
                    continue

                action, date, value = line
                if action == 'DIVIDEND':
                    actions_index.append(to_datetime(date))
                    actions_entries.append({
                        'action': action,
                        'value': float(value)
                    })
                elif action == 'SPLIT' and ':' in value:
                    # Convert the split ratio to a fraction. For example a
                    # 4:1 split expressed as a fraction is 1/4 = 0.25.
                    denominator, numerator = value.split(':', 1)
                    split_fraction = float(numerator) / float(denominator)

                    actions_index.append(to_datetime(date))
                    actions_entries.append({
                        'action': action,
                        'value': split_fraction
                    })

            return DataFrame(actions_entries, index=actions_index)

    raise IOError("after %d tries, Yahoo! did not "
                  "return a 200 for url %r" % (retry_count, url))