Ejemplo n.º 1
0
def _get_hist_google(sym=None, start=None, end=None, retry_count=3,
                    pause=0, **kwargs):
    """
    Get historical data for the given name from google.
    Date format is datetime

    Returns a DataFrame.
    """
    if(sym is None):
        warnings.warn("Need to provide a name.")
        return None

    start, end = _sanitize_dates(start, end)

    google_URL = 'http://www.google.com/finance/historical?'

    # www.google.com/finance/historical?q=GOOG&startdate=Jun+9%2C+2011&enddate=Jun+8%2C+2013&output=csv
    url = google_URL + urllib.urlencode({"q": sym, \
        "startdate": start.strftime('%b %d, %Y'), \
        "enddate": end.strftime('%b %d, %Y'), "output": "csv" })
    for _ in range(retry_count):
        resp = urllib2.urlopen(url)
        if resp.code == 200:
            lines = resp.read()
            rs = read_csv(StringIO(bytes_to_str(lines)), index_col=0,
                          parse_dates=True)[::-1]

            return rs

        time.sleep(pause)

    raise Exception("after %d tries, Google did not "
                    "return a 200 for url %s" % (pause, url))
Ejemplo n.º 2
0
def get_data_yahoo(name=None, start=None, end=None, retry_count=3, pause=0):
    """
    Get historical data for the given name from yahoo.
    Date format is datetime

    Returns a DataFrame.
    """
    start, end = _sanitize_dates(start, end)

    if (name is None):
        print "Need to provide a name"
        return None

    yahoo_URL = 'http://ichart.yahoo.com/table.csv?'

    url = yahoo_URL + 's=%s' % name + \
      '&a=%s' % (start.month - 1) + \
      '&b=%s' % start.day + \
      '&c=%s' % start.year + \
      '&d=%s' % (end.month - 1) + \
      '&e=%s' % end.day + \
      '&f=%s' % end.year + \
      '&g=d' + \
      '&ignore=.csv'
    for i in range(0, retry_count):
        resp = urllib2.urlopen(url)
        if resp.code == 200:
            lines = resp.read()
            rs = read_csv(StringIO(bytes_to_str(lines)),
                          index_col=0,
                          parse_dates=True)
            return rs[::-1]
        time.sleep(pause)
    raise Exception("after %d tries, Yahoo did not return a 200 for url %s" %
                    (pause, url))
Ejemplo n.º 3
0
def _get_hist_google(sym, start=None, end=None, retry_count=3, pause=0.001,
                     **kwargs):
    """
    Get historical data for the given name from google.
    Date format is datetime

    Returns a DataFrame.
    """
    start, end = _sanitize_dates(start, end)

    google_URL = 'http://www.google.com/finance/historical?'

    # www.google.com/finance/historical?q=GOOG&startdate=Jun+9%2C+2011&enddate=Jun+8%2C+2013&output=csv
    url = google_URL + urllib.urlencode({"q": sym,
                                         "startdate": start.strftime('%b %d, '
                                                                     '%Y'),
                                         "enddate": end.strftime('%b %d, %Y'),
                                         "output": "csv"})
    for _ in xrange(retry_count):
        with closing(urlopen(url)) as resp:
            if resp.code == 200:
                rs = read_csv(StringIO(bytes_to_str(resp.read())), index_col=0,
                              parse_dates=True)[::-1]

                return rs

        time.sleep(pause)

    raise IOError("after %d tries, Google did not "
                  "return a 200 for url %s" % (retry_count, url))
Ejemplo n.º 4
0
def get_data_yahoo(name=None, start=None, end=None, retry_count=3, pause=0):
    """
    Get historical data for the given name from yahoo.
    Date format is datetime

    Returns a DataFrame.
    """
    start, end = _sanitize_dates(start, end)

    if(name is None):
        print "Need to provide a name"
        return None

    yahoo_URL = 'http://ichart.yahoo.com/table.csv?'

    url = yahoo_URL + 's=%s' % name + \
      '&a=%s' % (start.month - 1) + \
      '&b=%s' % start.day + \
      '&c=%s' % start.year + \
      '&d=%s' % (end.month - 1) + \
      '&e=%s' % end.day + \
      '&f=%s' % end.year + \
      '&g=d' + \
      '&ignore=.csv'
    for i in range(0, retry_count):
        resp =  urllib2.urlopen(url)
        if resp.code == 200:
            lines = resp.read()
            rs = read_csv(StringIO(bytes_to_str(lines)), index_col=0,
                          parse_dates=True)
            return rs[::-1]
        time.sleep(pause)
    raise Exception(
              "after %d tries, Yahoo did not return a 200 for url %s" % (pause, url))
Ejemplo n.º 5
0
def _get_hist_yahoo(sym=None,
                    start=None,
                    end=None,
                    retry_count=3,
                    pause=0,
                    **kwargs):
    """
    Get historical data for the given name from yahoo.
    Date format is datetime

    Returns a DataFrame.
    """
    if (sym is None):
        warnings.warn("Need to provide a name.")
        return None

    start, end = _sanitize_dates(start, end)

    yahoo_URL = 'http://ichart.yahoo.com/table.csv?'

    url = yahoo_URL + 's=%s' % sym + \
        '&a=%s' % (start.month - 1) + \
        '&b=%s' % start.day + \
        '&c=%s' % start.year + \
        '&d=%s' % (end.month - 1) + \
        '&e=%s' % end.day + \
        '&f=%s' % end.year + \
        '&g=d' + \
        '&ignore=.csv'

    for _ in range(retry_count):
        resp = urllib.request.urlopen(url)
        if resp.code == 200:
            lines = resp.read()
            rs = read_csv(StringIO(bytes_to_str(lines)),
                          index_col=0,
                          parse_dates=True)[::-1]

            # Yahoo! Finance sometimes does this awesome thing where they
            # return 2 rows for the most recent business day
            if len(rs
                   ) > 2 and rs.index[-1] == rs.index[-2]:  # pragma: no cover
                rs = rs[:-1]

            return rs

        time.sleep(pause)

    raise Exception("after %d tries, Yahoo did not "
                    "return a 200 for url %s" % (pause, url))
Ejemplo n.º 6
0
def _get_hist_yahoo(sym=None, start=None, end=None, retry_count=3,
                    pause=0, **kwargs):
    """
    Get historical data for the given name from yahoo.
    Date format is datetime

    Returns a DataFrame.
    """
    if(sym is None):
        warnings.warn("Need to provide a name.")
        return None

    start, end = _sanitize_dates(start, end)

    yahoo_URL = 'http://ichart.yahoo.com/table.csv?'

    url = yahoo_URL + 's=%s' % sym + \
        '&a=%s' % (start.month - 1) + \
        '&b=%s' % start.day + \
        '&c=%s' % start.year + \
        '&d=%s' % (end.month - 1) + \
        '&e=%s' % end.day + \
        '&f=%s' % end.year + \
        '&g=d' + \
        '&ignore=.csv'

    for _ in range(retry_count):
        resp = urllib2.urlopen(url)
        if resp.code == 200:
            lines = resp.read()
            rs = read_csv(StringIO(bytes_to_str(lines)), index_col=0,
                          parse_dates=True)[::-1]

            # Yahoo! Finance sometimes does this awesome thing where they
            # return 2 rows for the most recent business day
            if len(rs) > 2 and rs.index[-1] == rs.index[-2]:  # pragma: no cover
                rs = rs[:-1]

            return rs

        time.sleep(pause)

    raise Exception("after %d tries, Yahoo did not "
                    "return a 200 for url %s" % (pause, url))
Ejemplo n.º 7
0
def _retry_read_url(url, retry_count, pause, name):
    for _ in xrange(retry_count):
        time.sleep(pause)

        # kludge to close the socket ASAP
        try:
            with urlopen(url) as resp:
                lines = resp.read()
        except _network_error_classes:
            pass
        else:
            rs = read_csv(StringIO(bytes_to_str(lines)), index_col=0,
                          parse_dates=True)[::-1]
            # Yahoo! Finance sometimes does this awesome thing where they
            # return 2 rows for the most recent business day
            if len(rs) > 2 and rs.index[-1] == rs.index[-2]:  # pragma: no cover
                rs = rs[:-1]
            return rs

    raise IOError("after %d tries, %s did not "
                  "return a 200 for url %r" % (retry_count, name, url))
Ejemplo n.º 8
0
def _get_hist_yahoo(sym, start=None, end=None, retry_count=3, pause=0.001,
                    **kwargs):
    """
    Get historical data for the given name from yahoo.
    Date format is datetime

    Returns a DataFrame.
    """
    start, end = _sanitize_dates(start, end)

    yahoo_URL = 'http://ichart.yahoo.com/table.csv?'

    url = (yahoo_URL + 's=%s' % sym +
           '&a=%s' % (start.month - 1) +
           '&b=%s' % start.day +
           '&c=%s' % start.year +
           '&d=%s' % (end.month - 1) +
           '&e=%s' % end.day +
           '&f=%s' % end.year +
           '&g=d' +
           '&ignore=.csv')

    for _ in xrange(retry_count):
        with closing(urlopen(url)) as resp:
            if resp.code == 200:
                lines = resp.read()
                rs = read_csv(StringIO(bytes_to_str(lines)), index_col=0,
                              parse_dates=True)[::-1]

                # Yahoo! Finance sometimes does this awesome thing where they
                # return 2 rows for the most recent business day
                if len(rs) > 2 and rs.index[-1] == rs.index[-2]:  # pragma: no cover
                    rs = rs[:-1]

                return rs

        time.sleep(pause)

    raise IOError("after %d tries, Yahoo did not "
                  "return a 200 for url %r" % (retry_count, url))
Ejemplo n.º 9
0
def _retry_read_url(url, retry_count, pause, name):
    for _ in xrange(retry_count):
        time.sleep(pause)

        # kludge to close the socket ASAP
        try:
            with urlopen(url) as resp:
                lines = resp.read()
        except _network_error_classes:
            pass
        else:
            rs = read_csv(StringIO(bytes_to_str(lines)),
                          index_col=0,
                          parse_dates=True)[::-1]
            # Yahoo! Finance sometimes does this awesome thing where they
            # return 2 rows for the most recent business day
            if len(rs
                   ) > 2 and rs.index[-1] == rs.index[-2]:  # pragma: no cover
                rs = rs[:-1]
            return rs

    raise IOError("after %d tries, %s did not "
                  "return a 200 for url %r" % (retry_count, name, url))
Ejemplo n.º 10
0
def _get_hist_yahoo(sym=None, start=None, end=None, retry_count=3,
                    pause=0, dividends=False, splits=False, **kwargs):
    """
    Get historical data for the given name from yahoo.
    Date format is datetime

    Returns a DataFrame.
    """
    if(sym is None):
        warnings.warn("Need to provide a name.")
        return None

    start, end = _sanitize_dates(start, end)

    # Yahoo! Finance doesn't show splits with 'table.csv' setting
    if splits:
        url_type = 'x'
    else:
        url_type = 'table.csv'

    yahoo_URL = 'http://ichart.yahoo.com/%s?' % url_type

    url = yahoo_URL + 's=%s' % sym + \
        '&a=%s' % (start.month - 1) + \
        '&b=%s' % start.day + \
        '&c=%s' % start.year + \
        '&d=%s' % (end.month - 1) + \
        '&e=%s' % end.day + \
        '&f=%s' % end.year + \
        '&g=%s' % ('v' if dividends or splits else 'd') + \
        '&ignore=.csv'

    for _ in range(retry_count):
        resp = urllib2.urlopen(url)
        if resp.code == 200:
            lines = resp.read()
            rs = read_csv(StringIO(bytes_to_str(lines)), index_col=0,
                          parse_dates=True)[::-1]

            # Yahoo! Finance sometimes does this awesome thing where they
            # return 2 rows for the most recent business day
            if len(rs) > 2 and rs.index[-1] == rs.index[-2]:  # pragma: no cover
                rs = rs[:-1]

            rs.rename(columns={'Dividends': 'Values'}, inplace=True)
            rs_splits, rs_dividends = DataFrame(), DataFrame()

            # check to see if there is split data
            try:
                has_splits = rs.xs('SPLIT')['Values'].any()
            except AttributeError:
                has_splits = rs.xs('SPLIT')['Values']
            except KeyError:
                # There is no split data
                has_splits = False

            split_format = splits or has_splits

            if (splits and has_splits and hasattr(rs.xs('SPLIT'), 'pivot')):
                # Yahoo! Finance returns additional info like 'STARTDATE' and
                # 'ENDDATE'. This selects only the data we want
                rs_splits = rs.xs('SPLIT').reset_index()

                # If Yahoo! Finance returns one value, the result of '.xs' will
                # be a Series instead of a DataFrame
            elif (splits and has_splits):
                d = {'index': ['SPLIT'],
                     'Date': [rs.xs('SPLIT')['Date']],
                     'Values': [rs.xs('SPLIT')['Values']]}

                rs_splits = DataFrame(d)

            if dividends and split_format:
                # check to see if there is dividend data
                try:
                    has_dividends = rs.xs('DIVIDEND')['Values'].any()
                except AttributeError:
                    has_dividends = rs.xs('DIVIDEND')['Values']
                except KeyError:
                    # There is no dividend data
                    has_dividends = False

                if (has_dividends and hasattr(rs.xs('DIVIDEND'), 'pivot')):
                    rs_dividends = rs.xs('DIVIDEND').reset_index()
                elif has_dividends:
                    d = {'index': ['DIVIDEND'],
                         'Date': [rs.xs('DIVIDEND')['Date']],
                         'Values': [rs.xs('DIVIDEND')['Values']]}

                    rs_dividends = DataFrame(d)

            elif dividends:
                # if there are no splits there won't be a 'DIVIDEND' section
                has_dividends = len(rs) > 0

                if has_dividends:
                    rs_dividends = rs

            # print(rs)
            rs = concat([rs_splits, rs_dividends])
            # print(rs_splits)
            # print(rs_dividends)
            # print(rs)

            if (split_format and not rs.empty):
                # Dates in split format are yyyymmdd so convert to yyyy-mm-dd
                rs.Date = rs.Date.apply(lambda x: str(x))
                rs.Date = rs.Date.apply(
                    lambda x: '%s-%s-%s' % (x[:4], x[4:6], x[6:]))

                # pivot DataFrame to match format of a normal query
                rs = rs.pivot(index='Date', columns='index', values='Values')

            if (splits and has_splits):
                rs.rename(columns={'SPLIT': 'Splits'}, inplace=True)

            if (dividends and has_dividends and split_format):
                rs.rename(columns={'DIVIDEND': 'Dividends'}, inplace=True)
            elif (dividends and has_dividends):
            	rs.rename(columns={'Values': 'Dividends'}, inplace=True)

            return rs

        time.sleep(pause)

    raise Exception("after %d tries, Yahoo did not "
                    "return a 200 for url %s" % (pause, url))