def _get_hist_google(sym=None, start=None, end=None, retry_count=3, pause=0, **kwargs): """ Get historical data for the given name from google. Date format is datetime Returns a DataFrame. """ if(sym is None): warnings.warn("Need to provide a name.") return None start, end = _sanitize_dates(start, end) google_URL = 'http://www.google.com/finance/historical?' # www.google.com/finance/historical?q=GOOG&startdate=Jun+9%2C+2011&enddate=Jun+8%2C+2013&output=csv url = google_URL + urllib.urlencode({"q": sym, \ "startdate": start.strftime('%b %d, %Y'), \ "enddate": end.strftime('%b %d, %Y'), "output": "csv" }) for _ in range(retry_count): resp = urllib2.urlopen(url) if resp.code == 200: lines = resp.read() rs = read_csv(StringIO(bytes_to_str(lines)), index_col=0, parse_dates=True)[::-1] return rs time.sleep(pause) raise Exception("after %d tries, Google did not " "return a 200 for url %s" % (pause, url))
def get_data_yahoo(name=None, start=None, end=None, retry_count=3, pause=0): """ Get historical data for the given name from yahoo. Date format is datetime Returns a DataFrame. """ start, end = _sanitize_dates(start, end) if (name is None): print "Need to provide a name" return None yahoo_URL = 'http://ichart.yahoo.com/table.csv?' url = yahoo_URL + 's=%s' % name + \ '&a=%s' % (start.month - 1) + \ '&b=%s' % start.day + \ '&c=%s' % start.year + \ '&d=%s' % (end.month - 1) + \ '&e=%s' % end.day + \ '&f=%s' % end.year + \ '&g=d' + \ '&ignore=.csv' for i in range(0, retry_count): resp = urllib2.urlopen(url) if resp.code == 200: lines = resp.read() rs = read_csv(StringIO(bytes_to_str(lines)), index_col=0, parse_dates=True) return rs[::-1] time.sleep(pause) raise Exception("after %d tries, Yahoo did not return a 200 for url %s" % (pause, url))
def _get_hist_google(sym, start=None, end=None, retry_count=3, pause=0.001, **kwargs): """ Get historical data for the given name from google. Date format is datetime Returns a DataFrame. """ start, end = _sanitize_dates(start, end) google_URL = 'http://www.google.com/finance/historical?' # www.google.com/finance/historical?q=GOOG&startdate=Jun+9%2C+2011&enddate=Jun+8%2C+2013&output=csv url = google_URL + urllib.urlencode({"q": sym, "startdate": start.strftime('%b %d, ' '%Y'), "enddate": end.strftime('%b %d, %Y'), "output": "csv"}) for _ in xrange(retry_count): with closing(urlopen(url)) as resp: if resp.code == 200: rs = read_csv(StringIO(bytes_to_str(resp.read())), index_col=0, parse_dates=True)[::-1] return rs time.sleep(pause) raise IOError("after %d tries, Google did not " "return a 200 for url %s" % (retry_count, url))
def get_data_yahoo(name=None, start=None, end=None, retry_count=3, pause=0): """ Get historical data for the given name from yahoo. Date format is datetime Returns a DataFrame. """ start, end = _sanitize_dates(start, end) if(name is None): print "Need to provide a name" return None yahoo_URL = 'http://ichart.yahoo.com/table.csv?' url = yahoo_URL + 's=%s' % name + \ '&a=%s' % (start.month - 1) + \ '&b=%s' % start.day + \ '&c=%s' % start.year + \ '&d=%s' % (end.month - 1) + \ '&e=%s' % end.day + \ '&f=%s' % end.year + \ '&g=d' + \ '&ignore=.csv' for i in range(0, retry_count): resp = urllib2.urlopen(url) if resp.code == 200: lines = resp.read() rs = read_csv(StringIO(bytes_to_str(lines)), index_col=0, parse_dates=True) return rs[::-1] time.sleep(pause) raise Exception( "after %d tries, Yahoo did not return a 200 for url %s" % (pause, url))
def _get_hist_yahoo(sym=None, start=None, end=None, retry_count=3, pause=0, **kwargs): """ Get historical data for the given name from yahoo. Date format is datetime Returns a DataFrame. """ if (sym is None): warnings.warn("Need to provide a name.") return None start, end = _sanitize_dates(start, end) yahoo_URL = 'http://ichart.yahoo.com/table.csv?' url = yahoo_URL + 's=%s' % sym + \ '&a=%s' % (start.month - 1) + \ '&b=%s' % start.day + \ '&c=%s' % start.year + \ '&d=%s' % (end.month - 1) + \ '&e=%s' % end.day + \ '&f=%s' % end.year + \ '&g=d' + \ '&ignore=.csv' for _ in range(retry_count): resp = urllib.request.urlopen(url) if resp.code == 200: lines = resp.read() rs = read_csv(StringIO(bytes_to_str(lines)), index_col=0, parse_dates=True)[::-1] # Yahoo! Finance sometimes does this awesome thing where they # return 2 rows for the most recent business day if len(rs ) > 2 and rs.index[-1] == rs.index[-2]: # pragma: no cover rs = rs[:-1] return rs time.sleep(pause) raise Exception("after %d tries, Yahoo did not " "return a 200 for url %s" % (pause, url))
def _get_hist_yahoo(sym=None, start=None, end=None, retry_count=3, pause=0, **kwargs): """ Get historical data for the given name from yahoo. Date format is datetime Returns a DataFrame. """ if(sym is None): warnings.warn("Need to provide a name.") return None start, end = _sanitize_dates(start, end) yahoo_URL = 'http://ichart.yahoo.com/table.csv?' url = yahoo_URL + 's=%s' % sym + \ '&a=%s' % (start.month - 1) + \ '&b=%s' % start.day + \ '&c=%s' % start.year + \ '&d=%s' % (end.month - 1) + \ '&e=%s' % end.day + \ '&f=%s' % end.year + \ '&g=d' + \ '&ignore=.csv' for _ in range(retry_count): resp = urllib2.urlopen(url) if resp.code == 200: lines = resp.read() rs = read_csv(StringIO(bytes_to_str(lines)), index_col=0, parse_dates=True)[::-1] # Yahoo! Finance sometimes does this awesome thing where they # return 2 rows for the most recent business day if len(rs) > 2 and rs.index[-1] == rs.index[-2]: # pragma: no cover rs = rs[:-1] return rs time.sleep(pause) raise Exception("after %d tries, Yahoo did not " "return a 200 for url %s" % (pause, url))
def _retry_read_url(url, retry_count, pause, name): for _ in xrange(retry_count): time.sleep(pause) # kludge to close the socket ASAP try: with urlopen(url) as resp: lines = resp.read() except _network_error_classes: pass else: rs = read_csv(StringIO(bytes_to_str(lines)), index_col=0, parse_dates=True)[::-1] # Yahoo! Finance sometimes does this awesome thing where they # return 2 rows for the most recent business day if len(rs) > 2 and rs.index[-1] == rs.index[-2]: # pragma: no cover rs = rs[:-1] return rs raise IOError("after %d tries, %s did not " "return a 200 for url %r" % (retry_count, name, url))
def _get_hist_yahoo(sym, start=None, end=None, retry_count=3, pause=0.001, **kwargs): """ Get historical data for the given name from yahoo. Date format is datetime Returns a DataFrame. """ start, end = _sanitize_dates(start, end) yahoo_URL = 'http://ichart.yahoo.com/table.csv?' url = (yahoo_URL + 's=%s' % sym + '&a=%s' % (start.month - 1) + '&b=%s' % start.day + '&c=%s' % start.year + '&d=%s' % (end.month - 1) + '&e=%s' % end.day + '&f=%s' % end.year + '&g=d' + '&ignore=.csv') for _ in xrange(retry_count): with closing(urlopen(url)) as resp: if resp.code == 200: lines = resp.read() rs = read_csv(StringIO(bytes_to_str(lines)), index_col=0, parse_dates=True)[::-1] # Yahoo! Finance sometimes does this awesome thing where they # return 2 rows for the most recent business day if len(rs) > 2 and rs.index[-1] == rs.index[-2]: # pragma: no cover rs = rs[:-1] return rs time.sleep(pause) raise IOError("after %d tries, Yahoo did not " "return a 200 for url %r" % (retry_count, url))
def _retry_read_url(url, retry_count, pause, name): for _ in xrange(retry_count): time.sleep(pause) # kludge to close the socket ASAP try: with urlopen(url) as resp: lines = resp.read() except _network_error_classes: pass else: rs = read_csv(StringIO(bytes_to_str(lines)), index_col=0, parse_dates=True)[::-1] # Yahoo! Finance sometimes does this awesome thing where they # return 2 rows for the most recent business day if len(rs ) > 2 and rs.index[-1] == rs.index[-2]: # pragma: no cover rs = rs[:-1] return rs raise IOError("after %d tries, %s did not " "return a 200 for url %r" % (retry_count, name, url))
def _get_hist_yahoo(sym=None, start=None, end=None, retry_count=3, pause=0, dividends=False, splits=False, **kwargs): """ Get historical data for the given name from yahoo. Date format is datetime Returns a DataFrame. """ if(sym is None): warnings.warn("Need to provide a name.") return None start, end = _sanitize_dates(start, end) # Yahoo! Finance doesn't show splits with 'table.csv' setting if splits: url_type = 'x' else: url_type = 'table.csv' yahoo_URL = 'http://ichart.yahoo.com/%s?' % url_type url = yahoo_URL + 's=%s' % sym + \ '&a=%s' % (start.month - 1) + \ '&b=%s' % start.day + \ '&c=%s' % start.year + \ '&d=%s' % (end.month - 1) + \ '&e=%s' % end.day + \ '&f=%s' % end.year + \ '&g=%s' % ('v' if dividends or splits else 'd') + \ '&ignore=.csv' for _ in range(retry_count): resp = urllib2.urlopen(url) if resp.code == 200: lines = resp.read() rs = read_csv(StringIO(bytes_to_str(lines)), index_col=0, parse_dates=True)[::-1] # Yahoo! Finance sometimes does this awesome thing where they # return 2 rows for the most recent business day if len(rs) > 2 and rs.index[-1] == rs.index[-2]: # pragma: no cover rs = rs[:-1] rs.rename(columns={'Dividends': 'Values'}, inplace=True) rs_splits, rs_dividends = DataFrame(), DataFrame() # check to see if there is split data try: has_splits = rs.xs('SPLIT')['Values'].any() except AttributeError: has_splits = rs.xs('SPLIT')['Values'] except KeyError: # There is no split data has_splits = False split_format = splits or has_splits if (splits and has_splits and hasattr(rs.xs('SPLIT'), 'pivot')): # Yahoo! Finance returns additional info like 'STARTDATE' and # 'ENDDATE'. This selects only the data we want rs_splits = rs.xs('SPLIT').reset_index() # If Yahoo! Finance returns one value, the result of '.xs' will # be a Series instead of a DataFrame elif (splits and has_splits): d = {'index': ['SPLIT'], 'Date': [rs.xs('SPLIT')['Date']], 'Values': [rs.xs('SPLIT')['Values']]} rs_splits = DataFrame(d) if dividends and split_format: # check to see if there is dividend data try: has_dividends = rs.xs('DIVIDEND')['Values'].any() except AttributeError: has_dividends = rs.xs('DIVIDEND')['Values'] except KeyError: # There is no dividend data has_dividends = False if (has_dividends and hasattr(rs.xs('DIVIDEND'), 'pivot')): rs_dividends = rs.xs('DIVIDEND').reset_index() elif has_dividends: d = {'index': ['DIVIDEND'], 'Date': [rs.xs('DIVIDEND')['Date']], 'Values': [rs.xs('DIVIDEND')['Values']]} rs_dividends = DataFrame(d) elif dividends: # if there are no splits there won't be a 'DIVIDEND' section has_dividends = len(rs) > 0 if has_dividends: rs_dividends = rs # print(rs) rs = concat([rs_splits, rs_dividends]) # print(rs_splits) # print(rs_dividends) # print(rs) if (split_format and not rs.empty): # Dates in split format are yyyymmdd so convert to yyyy-mm-dd rs.Date = rs.Date.apply(lambda x: str(x)) rs.Date = rs.Date.apply( lambda x: '%s-%s-%s' % (x[:4], x[4:6], x[6:])) # pivot DataFrame to match format of a normal query rs = rs.pivot(index='Date', columns='index', values='Values') if (splits and has_splits): rs.rename(columns={'SPLIT': 'Splits'}, inplace=True) if (dividends and has_dividends and split_format): rs.rename(columns={'DIVIDEND': 'Dividends'}, inplace=True) elif (dividends and has_dividends): rs.rename(columns={'Values': 'Dividends'}, inplace=True) return rs time.sleep(pause) raise Exception("after %d tries, Yahoo did not " "return a 200 for url %s" % (pause, url))