def get_quote_yahoojp(code, start=None, end=None, interval='d'): base = 'http://info.finance.yahoo.co.jp/history/?code={0}.T&{1}&{2}&tm={3}&p={4}' start, end = web._sanitize_dates(start, end) start = 'sy={0}&sm={1}&sd={2}'.format(start.year, start.month, start.day) end = 'ey={0}&em={1}&ed={2}'.format(end.year, end.month, end.day) p = 1 results = [] if interval not in ['d', 'w', 'm', 'v']: raise ValueError("Invalid interval: valid values are 'd', 'w', 'm' and 'v'") while True: url = base.format(code, start, end, interval, p) tables = pd.read_html(url, header=0) if len(tables) < 2 or len(tables[1]) == 0: break results.append(tables[1]) p += 1 result = pd.concat(results, ignore_index=True) result.columns = ['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Adj Close'] if interval == 'm': result['Date'] = pd.to_datetime(result['Date'], format='%Y年%m月') else: result['Date'] = pd.to_datetime(result['Date'], format='%Y年%m月%d日') result = result.set_index('Date') result = result.sort_index() return result
def get(self, code, start=None, end=None, interval='d'): base = self.base_url() start, end = web._sanitize_dates(start, end) start = 'sy={0}&sm={1}&sd={2}'.format(start.year, start.month, start.day) end = 'ey={0}&em={1}&ed={2}'.format(end.year, end.month, end.day) p = 1 results = [] if interval not in ['d', 'w', 'm', 'v']: raise ValueError( "Invalid interval: valid values are 'd', 'w', 'm' and 'v'") while True: url = base.format(code, start, end, interval, p) print (url) tables = pd.read_html(url, header=0) if len(tables) < 2 or len(tables[1]) == 0: break results.append(tables[1]) p += 1 result = pd.concat(results, ignore_index=True) result.columns = [ 'Date', 'Open', 'High', 'Low', 'Close'] result['Date'] = pd.to_datetime(result['Date'], format='%Y年%m月%d日') result = result.set_index('Date') result = result.sort_index() return result.asfreq('B')
def get(self, code, start=None, end=None, interval='d'): base = self.base_url() start, end = web._sanitize_dates(start, end) start = 'sy={0}&sm={1}&sd={2}'.format(start.year, start.month, start.day) end = 'ey={0}&em={1}&ed={2}'.format(end.year, end.month, end.day) p = 1 results = [] if interval not in ['d', 'w', 'm', 'v']: raise ValueError( "Invalid interval: valid values are 'd', 'w', 'm' and 'v'") while True: url = base.format(code, start, end, interval, p) tables = pd.read_html(url, header=0) if len(tables) < 2 or len(tables[1]) == 0: break results.append(tables[1]) p += 1 result = pd.concat(results, ignore_index=True) result.columns = [ 'Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Adj Close'] result['Date'] = pd.to_datetime(result['Date'], format='%Y年%m月%d日') result = result.set_index('Date') result = result.sort_index() return result.asfreq('B')
def get_quote_yahoojp(code, start=None, end=None, interval='d'): base = 'http://info.finance.yahoo.co.jp/history/?code={0}.T&{1}&{2}&tm={3}&p={4}' start, end = web._sanitize_dates(start, end) start = 'sy={0}&sm={1}&sd={2}'.format(start.year, start.month, start.day) end = 'ey={0}&em={1}&ed={2}'.format(end.year, end.month, end.day) p = 1 #ページ results = [] if interval not in ['d', 'w', 'm', 'v']: raise ValueError( "Invalid interval: valid values are 'd', 'w', 'm' and 'v'") while True: url = base.format(code, start, end, interval, p) try: tables = pd.read_html(url, header=0) except ValueError: #print("Value Error") return [] if len(tables) < 2 or len(tables[1]) == 0: break results.append(tables[1]) p += 1 if len(results) == 0: return [] else: result = pd.concat(results, ignore_index=True) result.columns = [ 'Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Adj Close' ] if interval == 'm': result['Date'] = pd.to_datetime(result['Date'], format='%Y年%m月') else: result['Date'] = pd.to_datetime(result['Date'], format='%Y年%m月%d日') result = pd.concat( [result, pd.DataFrame({"code": [code] * len(result)})], axis=1) result = result.set_index('Date') result = result.sort_index() date = pd.DataFrame(result.index) result.index = range(len(result)) result = pd.concat([date, result], axis=1) return result
def get(self, code, start=None, end=None, interval='d'): base = self.base_url(code) start, end = web._sanitize_dates(start, end) start = 'sy={0}&sm={1}&sd={2}'.format(start.year, start.month, start.day) end = 'ey={0}&em={1}&ed={2}'.format(end.year, end.month, end.day) p = 1 results = [] print(start) print(end) if interval not in ['d', 'w', 'm', 'v']: raise ValueError( "Invalid interval: valid values are 'd', 'w', 'm' and 'v'") while True: url = base.format(code, start, end, interval, p) print("url=", url) tables = pd.read_html(url, header=0) if len(tables) < 2 or len(tables[1]) == 0: break results.append(tables[1]) print(tables[1]) # 複数ページをスクレープ p += 1 #ignore_index=True 行番号を先頭から振りなおす result = pd.concat(results, ignore_index=True) #欠損データ削除 result = result.dropna() # print("DEBUG columns") #日経平均の場合 if code == 998407: result.columns = ['Date', 'Open', 'High', 'Low', 'Close'] else: result.columns = ['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Adj Close'] # print("DEBUG to_datetime") #python2 if interval == "m": result['Date'] = pd.to_datetime(result['Date'], format=u'%Y年%m月') else: result['Date'] = pd.to_datetime(result['Date'], format=u'%Y年%m月%d日') # print("DEBUG set_index") #'Date'をindexに設定したため、columからは消える result = result.set_index('Date') # print("DEBUG sort_index") result = result.sort_index() # print("DEBUG asfreq", result) return result.asfreq('B')
def get_data_bbg_historical(symbol, flds, start=None, end=None): """ Get historical data from bloomberg. Retrieve the flds for the specified start to end date for the given symbol. symbol: Bloomberg identifier flds: list of bloomberg fields to retrieve """ bbg = Dispatch('Bloomberg.Data.1') from pandas.io.data import _sanitize_dates start, end = _sanitize_dates(start, end) data = bbg.BLPGetHistoricalData(symbol, flds, pywintypes.Time(start.timetuple()), pywintypes.Time(end.timetuple())) # Convert to datetime64 and ensure nan's for strings cdata = zip(*[map(_convert_value, r[0]) for r in data]) flds = ['Date'] + list(flds) frame = pandas.DataFrame(dict((cname, cdata[i]) for i, cname in enumerate(flds)), columns=flds) return frame.set_index('Date')
def get_data_bbg_historical(symbol, flds, start=None, end=None): """ Get historical data from bloomberg. Retrieve the flds for the specified start to end date for the given symbol. symbol: Bloomberg identifier flds: list of bloomberg fields to retrieve """ bbg = Dispatch('Bloomberg.Data.1') from pandas.io.data import _sanitize_dates start, end = _sanitize_dates(start, end) data = bbg.BLPGetHistoricalData(symbol, flds, pywintypes.Time(start.timetuple()), pywintypes.Time(end.timetuple())) # Convert to datetime64 and ensure nan's for strings cdata = zip(*[map(_convert_value, r[0]) for r in data]) flds = ['Date'] + list(flds) frame = pandas.DataFrame(dict( (cname, cdata[i]) for i, cname in enumerate(flds)), columns=flds) return frame.set_index('Date')
def get_quote_yahoojp(code, start=None, end=None, interval='d'): base = 'http://info.finance.yahoo.co.jp/history/?code={0}.T&{1}&{2}&tm={3}&p={4}' start, end = web._sanitize_dates(start, end) start = 'sy={0}&sm={1}&sd={2}'.format(start.year, start.month, start.day) end = 'ey={0}&em={1}&ed={2}'.format(end.year, end.month, end.day) p = 1 results = [] if interval not in ['d', 'w', 'm', 'v']: raise ValueError( "Invalid interval: valid values are 'd', 'w', 'm' and 'v'") while True: url = base.format(code, start, end, interval, p) print url title = lxml.html.parse(url).find(".//title").text tables = pd.read_html(url, header=0) if len(tables) < 2 or len(tables[1]) == 0: break results.append(tables[1]) p += 1 result = pd.concat(results, ignore_index=True) result.columns = [ 'Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Adj Close' ] # Use '年月日' to parse Japanese date. if interval == 'm': result['Date'] = pd.to_datetime(result['Date'], format='%Y年%m月') else: result['Date'] = pd.to_datetime(result['Date'], format='%Y年%m月%d日') result = result.set_index('Date') result = result.sort_index() return title, result