def importData(self): """ Import (New) Data from Yahoo. """ start = self._getLatestDate() end = self._getTodaysDate() Logger.log(logging.INFO, "Loading Data", {"scope":__name__, "tickerCode":self._tickerCode, "start":str(start), "end":str(end)}) self._data = DataReader(self._tickerCode, "yahoo", start, end) self._data['Code'] = self._tickerCode for item in ['Open', 'High', 'Low']: self._data[item] = self._data[item] * self._data['Adj Close'] / self._data['Close'] self._data.drop('Close', axis=1, inplace=True) self._data.rename(columns={'Adj Close':'Close'}, inplace=True) self._data['Volume'] = self._data['Volume'].astype(float) connection = sqlite3.connect(pyswing.database.pySwingDatabase) query = "insert or replace into Equities (Date, Open, High, Low, Volume, Close, Code) values (?,?,?,?,?,?,?)" connection.executemany(query, self._data.to_records(index=True)) connection.commit() connection.close()
def stockhistorynobackfilltodataframeusingcache(symbol, fromdate, todate): print('--------------------------') print('Initialized pullprices.stockhistorydailytodataframeusingcache') import pandas as pd #import numpy as np from pandas.io.data import DataReader #from datetime import datetime, timedelta import config mycachefolder = config.mycachefolder import mytools mytools.general().make_sure_path_exists(mycachefolder) cachedfilepathname = mycachefolder + '\\stockhistorynobackfill ' + symbol + ' ' + fromdate + ' ' + todate + '.csv' import os if os.path.isfile(cachedfilepathname): print(' Found cached file: ' + cachedfilepathname) df_hist = pd.read_csv(cachedfilepathname, index_col=0) else: print(' Getting new file:' + cachedfilepathname) df_hist = DataReader(symbol, "yahoo", fromdate, todate) df_hist.to_csv(cachedfilepathname, columns=('Open', 'High', 'Low', 'Close', 'Volume', 'Adj Close')) return df_hist
def readData(self, lookupTicker, source, start, end): '''Read the data - assumes start and end are datetime.date objects''' try: lookupTicker = str(lookupTicker) if source == 'Quandl': #use Quandl reader start = str(start) end = str(end) data = Quandl.get(lookupTicker, authtoken = self.quandlAuthToken, trim_start = start, trim_end= end) else: #use pandas.io DataReader data = DataReader(lookupTicker, source , start, end) data = data.reset_index() logging.info("Read ticker {}".format(lookupTicker)) except: logging.error("importData: Can't read ticker {}".format(lookupTicker)) raise else: return data
def main(): # db = psql.get_db() # product_codes = db.prepare('''SELECT p.code AS code, p.id as id # FROM products p LEFT JOIN companies c ON p.company_id = c.id # WHERE c.sector IS NOT NULL # and p.id < 9354 # ORDER BY p.id''')() product_codes = get_components_yahoo('^DJI').index total = len(product_codes) cur = 0 products=[] logger.info('Start downloading of %d products.', len(product_codes)) for code in product_codes: cur += 1 try: product = DataReader(code, 'yahoo', start=date_from, end=date_to) except: logger.warn('(%d/%d) Fail downloading %s.', cur, total, code) continue product.code = code products.append(product) logger.info('(%d/%d) Downloaded %s.', cur, total, code) logger.info('Download complete.') return opt(products)
def save_data(): start = '1/1/1990' # Get S&P 500 data from yahoo sp500 = get_data_yahoo('^GSPC', start=start)['Adj Close'] sp500.name = 'SP500' vix = get_data_yahoo('^VIX', start=start)['Adj Close'] vix.name = 'VIX' # Get ten year and 3 month t-bill rates ten_yr = DataReader('DGS10', 'fred', start=start) three_mon = DataReader('DGS3MO', 'fred', start=start) ten_yr = ten_yr.ix[ten_yr.DGS10.str.count(r'^\.') != 1].astype(float) three_mon = three_mon.ix[three_mon.DGS3MO.str.count(r'^\.') != 1].astype(float) data = ten_yr.join(three_mon) data = data.join(sp500) data = data.join(vix) # Drop non-like observations (obs on different days) data = data.dropna() data.save('SP_YC.db') data.to_csv('the_data.csv')
def getTimewindowStockPrice(ticker): timewindowStockPrice = {}#key: year; value: weekly price stockDF = DataReader(ticker, "google", "2009-01-01", datetime.today().date()) # print stockDF for idx, row in stockDF.iterrows(): # print row[0], row['Close'] # print datetime.fromtimestamp(idx) # print str(idx) dt = dateutil.parser.parse(str(idx)).date() year = dt.isocalendar()[0] week = dt.isocalendar()[1] price = row['Close'] print year, week, price if year not in timewindowStockPrice: timewindowStockPrice[year] = {} if week not in timewindowStockPrice[year]: timewindowStockPrice[year][week] = [] # print row['Close'] timewindowStockPrice[year][week].append(price) #normalized weekly price for year in timewindowStockPrice.keys(): for week in timewindowStockPrice[year].keys(): timewindowStockPrice[year][week] = scipy.mean(timewindowStockPrice[year][week]) # for year in timewindowStockPrice.keys(): # print timewindowStockPrice[year] return timewindowStockPrice
def pull_stocks_data(retries=2, start_date=None, end_date=None): """ Pulling stocks raw data, of the stocks in the symbol list. :param retries: number of retries for getting each stock's data :param start_date: the first day of the data (datetime format), default value is 2 years before end_date. :param end_date: the last day of the data (datetime format), default value is today """ symbols = get_stocks_symbols(write_to_files=False) log.notice("Starting to pull stocks data") end_date = datetime.today() if end_date is None else end_date start_date = end_date - timedelta(365*2) # take as a default 2 years backwards for retry in range(retries): for symbol in symbols: filepath = make_filepath(DATA_PATH+"symbols", symbol, 'csv') # optimize by avoiding calling this function every time try: data = DataReader(symbol, 'yahoo', start_date, end_date, retry_count=1) except IOError as e: log.error("IOError for data query of symbol: {}\n\tError msg: {}".format(symbol, e)) continue data.to_csv(filepath) symbols.pop(symbols.index(symbol)) log.warning("Unable to get {} symbols on try #{}".format(len(symbols), retry+1)) log.error("Unable to get {} symbols after {} retries:\n{}".format(len(symbols), retries, symbols))
def refreshYahooDatabase(db): rs = yahoo.yahooRecordset(db) ts = timeseries.timeseriesRecordset(db) # step 1: get the yahoo index end date idx = rs.select('*') for i in idx: try: r = ts.getLastDate(i['id']) start = r[0]['max_date'] data = pd.DataFrame() if start != None: start = start + dt.timedelta(days=1) data = DataReader( str(i['key']), 'yahoo', start) else: data = DataReader( str(i['key']), 'yahoo') # insert new data add = [] for index, row in data.iterrows(): add.append((i['id'], index, row[i['field']])) ts.insert(add) except Exception as e: print('error processing index ' + str(i['key'] \ + '/' + str(i['field']) + '\n'))
def get_riskfree_rate(self,startdate,enddate,freq="M",maturity='1M'): """ Rates from FRED http://research.stlouisfed.org/fred2/categories/116 """ rfcache = self.__class__._cacherfrate grabdata = False if rfcache == None: grabdata = True elif rfcache[0]< startdate or rfcache[1] > enddate: grabdata = True if grabdata: dt = DataReader('DTB4WK',"fred", startdate,enddate) dt.columns = ['RFRate'] dt.fillna(method='backfill',inplace=True) rfcache = (startdate,enddate,dt) self.__class__._cacherfrate= rfcache else: dt = rfcache[2] dsm = dt[startdate:enddate].resample('M') return dsm
def fetch_timeseries(symbol, dir_name='data', use_cache=True): """ Read time series data. Use cached version if it exists and use_cache is True, otherwise retrive, cache, then read. """ base_dir = '' try: conf = pf.read_config() base_dir = conf['base_dir'] except: pass finally: dir_name = os.path.join(base_dir, dir_name) if not os.path.exists(dir_name): os.makedirs(dir_name) timeseries_cache = os.path.join(dir_name, symbol + '.csv') if os.path.isfile(timeseries_cache) and use_cache: pass else: ts = DataReader(symbol, 'yahoo', start=datetime.datetime(1900, 1, 1)) ts.to_csv(timeseries_cache, encoding='utf-8') ts = pd.read_csv(timeseries_cache, index_col='Date', parse_dates=True) ts = _adj_column_names(ts) return ts
def get_data_from_yahoo(ticker): # No older computations OK start_date = date(2010, 01, 01) end_date = date.today() df = DataReader(ticker, 'yahoo', start_date, end_date) df.to_pickle(TMP_dir + ticker) print "Getting data for " + ticker + " from yahoo" return df
def Econ_env(YYYY, m, dd): start_date = datetime.datetime(YYYY, m, dd) GDP = DataReader('GDP', "fred", start=start_date) sp500 = DataReader('^GSPC', "yahoo", start=start_date) Array = DataFrame({'S&P':sp500["Adj Close"]}) return Array
def getPayOff(year,month): nifty = DataReader("^NSEI","yahoo",datetime(year,month,1),datetime(year,month,25)) bn=DataReader("^NSEBANK","yahoo",datetime(year,month,1),datetime(year,month,25)) nifty_daily=nifty.diff()['Close'] bn_daily=bn.diff()['Close'] nifty_daily_return =50*(nifty_daily) bn_daily_return =25*(bn_daily) return -nifty_daily_return.sum()+bn_daily_return.sum()
def hist_vol(sym, days=10): try: quotes = DataReader(sym, 'yahoo')['Close'][-days:] except Exception: print "Problem getting historical volatility!" raise SystemExit(code) return None, None logreturns = np.log(quotes / quotes.shift(1)) vol = np.sqrt(252*logreturns.var()) #252 trading days in year (annualized volatility) return float(vol)
def main(symbol): t1 = datetime.now() t2 = t1 - timedelta(days=PAST_DAYS) df = DataReader(symbol, FIN_SERVICE_PROVIDER , t2, t1) print df.head() print '...' * 20 print df.tail() return df
def ts(self, symbol): parse = self.parsesymbol(symbol) df = DataReader(parse['eq'], parse['proto'],start=datetime.datetime(1950,1,1)) df = df.rename(columns=lambda x: '_'.join(x.split()).lower()) # Need for Adj Close :( #print df.columns ts = df[parse['hlocv']] ts.index = map(lambda x: x.date(), ts.index) ts.name = parse['eq'] if '@' in symbol: ts.name += '@%s' % (parse['hlocv']) return ts
def getLongShortPayOff(year,month,longSymbol,longQty,shortSymbol,shortQty): shortStk = DataReader(shortSymbol,"yahoo",datetime(year,month,1),datetime(year,month,25)) longStk=DataReader(longSymbol,"yahoo",datetime(year,month,1),datetime(year,month,25)) short_daily=shortStk.diff()['Close'] long_daily=longStk.diff()['Close'] short_daily_return =shortQty*(short_daily) long_daily_return =longQty*(long_daily) #print abs(long_daily_return.sum())-abs(short_daily_return.sum()) return abs(long_daily_return.sum())-abs(short_daily_return.sum())
def _load_raw_yahoo_data(indexes=None, stocks=None, start=None, end=None): """Load closing prices from yahoo finance. :Optional: indexes : dict (Default: {'SPX': '^GSPC'}) Financial indexes to load. stocks : list (Default: ['AAPL', 'GE', 'IBM', 'MSFT', 'XOM', 'AA', 'JNJ', 'PEP', 'KO']) Stock closing prices to load. start : datetime (Default: datetime(1993, 1, 1, 0, 0, 0, 0, pytz.utc)) Retrieve prices from start date on. end : datetime (Default: datetime(2002, 1, 1, 0, 0, 0, 0, pytz.utc)) Retrieve prices until end date. :Note: This is based on code presented in a talk by Wes McKinney: http://wesmckinney.com/files/20111017/notebook_output.pdf """ assert indexes is not None or stocks is not None, """ must specify stocks or indexes""" if start is None: start = pd.datetime(1990, 1, 1, 0, 0, 0, 0, pytz.utc) if start is not None and end is not None: assert start < end, "start date is later than end date." data = OrderedDict() if stocks is not None: for stock in stocks: print(stock) stock_pathsafe = stock.replace(os.path.sep, '--') cache_filename = "{stock}-{start}-{end}.csv".format( stock=stock_pathsafe, start=start, end=end).replace(':', '-') cache_filepath = get_cache_filepath(cache_filename) if os.path.exists(cache_filepath): stkd = pd.DataFrame.from_csv(cache_filepath) else: stkd = DataReader(stock, 'yahoo', start, end).sort_index() stkd.to_csv(cache_filepath) data[stock] = stkd if indexes is not None: for name, ticker in iteritems(indexes): print(name) stkd = DataReader(ticker, 'yahoo', start, end).sort_index() data[name] = stkd return data
def get_prices_yahoo(self): """ It get prices data from yahoo """ try: self.df_prices = DataReader(self.symbol, "yahoo", self.from_date, self.to_date) self.df_prices['pct Adj Close'] = self.df_prices.pct_change()['Adj Close'] self.data = True except Exception, e: print e sleep(20)
def importData(): #Start Time start = datetime(2010,1,1) end = datetime.date(datetime.now()) data = DataReader(sp500constituents[0], "yahoo", start, end) en = enumerate(sp500constituents) [i for i, x in en if x=='WFMI'] sp500constituents[200:len(sp500constituents)] problems = [] dataImportProblems = [] for series in sp500constituents[485:len(sp500constituents)]: print series try: data = DataReader(series, "yahoo", start, end) data = data.reset_index() except: print "Can't read {}".format(series) dataImportProblems.append(series) continue con = sqlite3.connect("/home/phcostello/Documents/Data/FinanceData.sqlite") try: psql.write_frame( data, series, con) con.commit() except: print "Problems with {}".format(series) problems.append(series) finally: con.close() #changing tables to have date formats so RODBC driver recognizes #Should check that this is occuring above. con = sqlite3.connect("/home/phcostello/Documents/Data/FinanceData.sqlite") for tb in sp500constituents: if psql.has_table(tb, con): sqltxt = "SELECT * FROM {}".format(tb) #print sqltxt data = psql.read_frame(sqltxt, con) sqlDropTxt = 'DROP TABLE "main"."{}"'.format(tb) #print sqlDropTxt psql.execute(sqlDropTxt, con) con.commit() psql.write_frame( data, tb, con) con.commit() con.close()
def get_prices_yahoo(self): """ It get prices data from yahoo (last year) """ try: self.df_prices = DataReader(self.symbol, "yahoo", self.from_date, self.to_date) self.df_prices['pct Adj Close'] = self.df_prices.pct_change()['Adj Close'] except Exception, e: print e raise
def run(self): """Collects data from Yahoo finance and preprocesses them.""" # Get reference dates from SP500 df_ref = DataReader('SPY', 'yahoo', self.start, self.end)['Adj Close'] df_ref.sort_index(inplace=True) if 'SPY' in assets: self.df = df_ref self.df.rename({'AdjClose': 'SPY'}) else: self.df = pd.DataFrame(index=df_ref.index) # Retrieve AdjClose price for other assets for asset in self.assets: df_asset = DataReader(asset, 'yahoo', self.start, self.end)['Adj Close'] df_asset.rename(asset, inplace=True) self.df = self.df.join(df_asset, how='left') # Fill NaN self.df.fillna(method='ffill', inplace=True) self.df.fillna(method='bfill', inplace=True) # Compute assets simple returns self.df = (self.df / self.df.shift(1) - 1.0).ix[1:, :] # Reset indices self.df.reset_index(drop=True, inplace=True)
def download_ohlc(sector_tickers, start, end): sector_ohlc = {} for sector, tickers in sector_tickers.iteritems(): print 'Downloading data from Yahoo for %s sector' % sector data = DataReader(tickers, 'yahoo', start, end) for item in ['Open', 'High', 'Low']: data[item] = data[item] * data['Adj Close'] / data['Close'] data.rename(items={'Open': 'open', 'High': 'high', 'Low': 'low', 'Adj Close': 'close', 'Volume': 'volume'}, inplace=True) data.drop(['Close'], inplace=True) sector_ohlc[sector] = data print 'Finished downloading data' return sector_ohlc
def getHistoricalQuotes(self, symbol, index, market=None): assert (isinstance(index, pd.Index)) source = 'yahoo' try: quotes = DataReader(symbol, source, index[0], index[-1]) except: log.error('** Could not get {} quotes'.format(symbol)) return pd.DataFrame() if index.freq != pd.datetools.BDay() or index.freq != pd.datetools.Day(): #NOTE reIndexDF has a column arg but here not provided quotes = utils.reIndexDF(quotes, delta=index.freq, reset_hour=False) if not quotes.index.tzinfo: quotes.index = quotes.index.tz_localize(self.tz) quotes.columns = utils.Fields.QUOTES return quotes
def ADF(ticker, start, end): print('ADF') stock = DataReader(ticker, "yahoo", start, end) result = ts.adfuller(stock['Adj Close'], 1) print(result) print('') test = result[0] crit = result[4] one = crit['1%'] five = crit['5%'] ten = crit['10%'] if test < one: print('Lesser than 1%') print('-----------------------------------------') return stock if test < five: print('Lesser than 5%') print('-----------------------------------------') return stock if test < ten: print('Lesser than 10%') print('-----------------------------------------') return stock print('Cannot reject Null Hypothesis') print('-----------------------------------------') return stock
def peak_begin_dates(start="01/01/1972", end=datetime.now()): """ Use the fred dataset `USRECQ` to determine the beginning of the peaks before all recessions between dates start and end Parameters ---------- start : string or datetime.datetime, optional(default='01/01/1972') A string or other acceptable pandas date identifier that marks the beginning of the window for which we will search for starts of peaks end : string or datetime.datetime, optional(default=datetime.now()) The ending date of the search window Returns ------- rec_startind : pd.DatetimeIndex A pandas DatetimeIndex representing the starting points of each "peak" from start to end """ # Get quarterly recession dates from FRED rec_dates = DataReader("USRECQ", "fred", start=start) one_vals = np.where(rec_dates == 1)[0] rec_start = [one_vals[0]] # Find the beginning of the recession dates (Don't include ones that # begin within three years of a previous one -- hence the `+12`) for d in one_vals: if d > max(rec_start) + 12: rec_start.append(d) rec_startind = rec_dates.index[rec_start] return rec_startind
def create_lagged_series(symbol, start_date, end_date, lags=5): """ 这个函数创建一个pandas的DataFrame,存储某个来自于Yahoo财经的股票 的以调整收盘价计算的收益,以及一系列滞后的收益,还包括交易量以及某一天 变动的方向 """ ts = DataReader(symbol, "yahoo", start_date - datetime.timedelta(days=365), end_date) tslag = pd.DataFrame(index=ts.index) tslag["Today"] = ts["Adj Close"] tslag["Volume"] = ts["Volume"] for i in range(0, lags): tslag["Lag%s" % str(i + 1)] = ts["Adj Close"].shift(i + 1) tsret = pd.DataFrame(index=tslag.index) tsret["Volume"] = tslag["Volume"] tsret["Today"] = tslag["Today"].pct_change() * 100.0 for i, x in enumerate(tsret["Today"]): if (abs(x) < 0.0001): tsret["Today"][i] = 0.0001 for i in range(0, lags): tsret["Lag%s" % str(i + 1)] = tslag["Lag%s" % str(i + 1)].pct_change() * 100.0 tsret["Direction"] = np.sign(tsret["Today"]) tsret = tsret[tsret.index >= start_date] return tsret
def stockhistory(symbol, fromdate, todate): from pandas.io.data import DataReader #from datetime import datetime #dfromdate = fromdate.strftime('%b%d') #datetime(2000,1,1), datetime(2012,1,1) hist = DataReader(symbol, "yahoo", fromdate, todate) return hist
def historical_volatility(sym, days): "Return the annualized stddev of daily log returns of `sym`." try: quotes = DataReader(sym, 'yahoo')['Close'][-days:] except Exception, e: print "Error getting data for symbol '{}'.\n".format(sym), e return None, None
class Individual_screener(): """ This class will store the relevant information and functions for the signal detection of entry points of one Strategy in one Security """ def __init__(self, symbol, to_date=dt.datetime.today()): self.data = False self.symbol = symbol self.to_date = to_date self.from_date = self.to_date.replace(to_date.year - 1) self.signal = False self.get_prices_yahoo() def get_prices_yahoo(self): """ It get prices data from yahoo (last year) """ try: self.df_prices = DataReader(self.symbol, "yahoo", self.from_date, self.to_date) self.df_prices['pct Adj Close'] = self.df_prices.pct_change( )['Adj Close'] self.data = True except Exception, e: print e sleep(5)
class Individual_screener(): """ This class will store the relevant information and functions for the signal detection of entry points of one Strategy in one Security """ def __init__(self, symbol, to_date=dt.datetime.today()): self.data = False self.symbol = symbol self.to_date = to_date self.from_date = self.to_date.replace(to_date.year - 1) self.signal = False self.get_prices_yahoo() def get_prices_yahoo(self): """ It get prices data from yahoo (last year) """ try: self.df_prices = DataReader(self.symbol, "yahoo", self.from_date, self.to_date) self.df_prices['pct Adj Close'] = self.df_prices.pct_change()['Adj Close'] self.data = True except Exception, e: print e sleep(5)
def get_prices_df(self,ticker, date_start, date_end): try: cotation_data = DataReader(ticker, "yahoo", date_start, date_end) cotation_data = cotation_data[cotation_data.Volume != 0] # on ne prend pas les jours feriés p.ex 01/01 except Exception as e: raise ErrorInternetConnexion('yahoo DataReader', e) return cotation_data
def main(): ''' 1. The data from Yahoo! Finance is not grabbed by calling url apis, is by using Pandas APIs. 2. This program is to get TWSE data only, if wants OTC data, need to modify code. ''' #Setup figure stock_fig = plt.figure() stock_plt = plt.subplot2grid((1, 1), (0, 0), colspan=1) stock_title = "{} day price".format(stock_num) plt.suptitle(stock_title) startday = dtime.date(2000, 1, 1) # Add ".TW" to tell yahoo!Finance to query TWSE stock data. # If want to query OTC, please add ".TWO" stock_str = "{}.TW".format(stock_num) #print stock_str #about how the DataReader() works, please refer to data.py from pandas try: stock_data = DataReader(stock_str, 'yahoo', startday) #Clear the current axes stock_plt.cla() #Turn the axes grids on stock_plt.grid(True) #plot date and price stock_plt.plot(stock_data.index, stock_data['Close']) #show plt.show() except: exit("Error happened!!\nTry: python TwanStkEx1.py 2330")
def test_read_famafrench(self): for name in ("F-F_Research_Data_Factors", "F-F_Research_Data_Factors_weekly", "6_Portfolios_2x3", "F-F_ST_Reversal_Factor"): ff = DataReader(name, "famafrench") assert ff assert isinstance(ff, dict)
def set_source(self, source, tickers, start, end): prices = pd.DataFrame() counter = 0. for ticker in tickers: try: self._logger.info('Loading ticker %s' % (counter / len(tickers))) prices[ticker] = DataReader(ticker, source, start, end).loc[:, 'Close'] except Exception as e: self._logger.error(e) pass counter += 1 events = [] for row in prices.iterrows(): timestamp = row[0] series = row[1] vals = series.values indx = series.index for k in np.random.choice(len(vals), replace=False, size=len(vals)): # Shuffle! if np.isfinite(vals[k]): events.append((timestamp, indx[k], vals[k])) self._source = events self._logger.info('Loaded data!')
def get_data(stock, starttime, endtime): ibm = DataReader(stock, 'yahoo', starttime, endtime) #print(ibm['Adj Close']) daily_returns = deque(maxlen=c.normalize_std_len) daily_ret_arr = [] size = len(ibm['Adj Close']) return_array = [] i=0 lastAc = ibm['Adj Close'][0] for stock in ibm['Adj Close']: return_array.append(stock) i+=1 #for rec_date in (c.start + timedelta(days=n) for n in xrange((c.end-c.start).days)): #idx = next(i for i,d in enumerate(segment_start_dates) if rec_date >= d) try: #d = rec_date.strftime("%Y-%m-%d") ac = stock daily_return = (ac - lastAc)/lastAc #if len(daily_returns) == daily_returns.maxlen: # seq[idx].append(daily_return/np.std(daily_returns)) daily_returns.append(daily_return*scale) daily_ret_arr.append(daily_return*scale) lastAc = ac #print "---" #print stock #print daily_return except KeyError: pass print "Records found:" + str(len(daily_ret_arr)) return daily_ret_arr, return_array
def stockhistorybackfilledtodictionary(symbol, fromdate, todate): from pandas.io.data import DataReader from datetime import datetime, timedelta hist = DataReader(symbol, "yahoo", fromdate, todate) date_format = "%Y-%m-%d" d = datetime.strptime(fromdate, date_format) delta = timedelta(days=1) last_adjclose = 'NaN' dictAdjClose = {} while d <= datetime.strptime(todate, date_format): #print(d.strftime(date_format)) d_string = d.strftime(date_format) if d_string in hist.index: last_adjclose = hist.ix[d_string]['Adj Close'] print(d_string, last_adjclose) else: print(d_string, 'nothing', last_adjclose) dictAdjClose[d_string] = [('AdjClose', last_adjclose)] d += delta return dictAdjClose
def test_read_famafrench(self): for name in ("F-F_Research_Data_Factors", "F-F_Research_Data_Factors_weekly", "6_Portfolios_2x3", "F-F_ST_Reversal_Factor", "F-F_Momentum_Factor"): ff = DataReader(name, "famafrench") self.assertTrue(ff is not None) self.assertIsInstance(ff, dict)
def historical_pandas_yahoo(symbol, source='yahoo', start=None, end=None): ''' Fetch from yahoo! finance historical quotes ''' #NOTE Panel for multiple symbols ? #NOTE Adj Close column name not cool (a space) return DataReader(symbol, source, start=start, end=end)
def DownloadStocks(self, startingDate, endDate): for stock in self._names: print("Getting data from {0}...".format(stock)) stockData = DataReader(stock, "google", startingDate, endDate) self._columns = stockData.columns print(" Number of lines:{0}".format(stockData.shape[0])) self._data.append(stockData) return self._data
def downloadStock(ticker, dataSource, start, end): gtemp = pd.DataFrame() try: gtemp = DataReader(ticker, dataSource, start, end) print ticker except: pass return gtemp
def load_from_yahoo(indexes=None, stocks=None, start=None, end=None): """Load closing prices from yahoo finance. :Optional: indexes : dict (Default: {'SPX': '^GSPC'}) Financial indexes to load. stocks : list (Default: ['AAPL', 'GE', 'IBM', 'MSFT', 'XOM', 'AA', 'JNJ', 'PEP', 'KO']) Stock closing prices to load. start : datetime (Default: datetime(1993, 1, 1, 0, 0, 0, 0, pytz.utc)) Retrieve prices from start date on. end : datetime (Default: datetime(2002, 1, 1, 0, 0, 0, 0, pytz.utc)) Retrieve prices until end date. :Note: This is based on code presented in a talk by Wes McKinney: http://wesmckinney.com/files/20111017/notebook_output.pdf """ if indexes is None: indexes = {'SPX': '^GSPC'} if stocks is None: stocks = ['AAPL', 'GE', 'IBM', 'MSFT', 'XOM', 'AA', 'JNJ', 'PEP', 'KO'] if start is None: start = pd.datetime(1993, 1, 1, 0, 0, 0, 0, pytz.utc) if end is None: end = pd.datetime(2002, 1, 1, 0, 0, 0, 0, pytz.utc) assert start < end, "start date is later than end date." data = OrderedDict() for stock in stocks: print stock stkd = DataReader(stock, 'yahoo', start, end).sort_index() data[stock] = stkd for name, ticker in indexes.iteritems(): print name stkd = DataReader(ticker, 'yahoo', start, end).sort_index() data[name] = stkd df = pd.DataFrame({key: d['Close'] for key, d in data.iteritems()}) df.index = df.index.tz_localize(pytz.utc) return df
def get_stock_history(stock): prices = None try: start_date = date.today() - timedelta(days=365) prices = DataReader(stock, "yahoo", start=start_date) except (HTTPError, BadStatusLine): pass return prices
def test_read_famafrench(self): raise nose.SkipTest('buggy as of 2/14/16; maybe a data revision?') for name in ("F-F_Research_Data_Factors", "F-F_Research_Data_Factors_weekly", "6_Portfolios_2x3", "F-F_ST_Reversal_Factor", "F-F_Momentum_Factor"): ff = DataReader(name, "famafrench") self.assertTrue(ff is not None) self.assertIsInstance(ff, dict)
def get_history(self, stock): print "Retrieving data for %s" % stock prices = None try: start_date = datetime.today() - timedelta(days=365) prices = DataReader(stock, "yahoo", start=start_date) except (HTTPError, BadStatusLine): pass return prices
def data_StockView_import(self): self.data = DataReader("GOOGL", "google", self.start_date, self.end_date) source = urllib2.urlopen( 'http://hopey.netfonds.no/posdump.php?date=20140530&paper=AAPL.O&csv_format=txt' ) data = pandas.read_table(source) print data
def stocks(): tickers = ['AAPL', 'GOOG', 'MSFT', 'AMZN'] end = datetime.now() start = datetime(end.year-1, end.month, end.day) for ticker in tickers: globals()[ticker] = DataReader(ticker, 'yahoo', start, end) build_stock_analyses() return render_template('stocks.html', AAPL=AAPL, GOOG=GOOG, MSFT=MSFT, AMZN=AMZN)
def fetch_timeseries(symbol, dir_name='data', use_cache=True): """ Read time series data. Use cached version if it exists and use_cache is True, otherwise retrive, cache, then read. """ if not os.path.exists(dir_name): os.makedirs(dir_name) timeseries_cache = os.path.join(dir_name, symbol + '.csv') if os.path.isfile(timeseries_cache) and use_cache: pass else: ts = DataReader(symbol, 'yahoo', start=datetime.datetime(1900, 1, 1)) ts.to_csv(timeseries_cache, encoding='utf-8') ts = pd.read_csv(timeseries_cache, index_col='Date', parse_dates=True) ts = _adj_column_names(ts) return ts
def import_data_yahoo_to_files( list_symbols,path,startdate): list_error=[] logger.info("importing from "+str(startdate)) for symbol in list_symbols: try : prices_df = DataReader(symbol, "yahoo", startdate) count_newdata = len(prices_df) print symbol , " ", count_newdata if(count_newdata <=0): raise Exception("NO DATA for Dates for %s"%symbol) prices_df = prices_df.rename(columns={'Date': 'date', 'Open': 'open', 'High': 'high', 'Low': 'low', 'Close': 'actualclose', 'Adj Close': 'close', 'Volume': 'volume', 'Symbol': 'symbol'}) prices_df['symbol'] = symbol prices_df['symbol'] = prices_df.apply(lambda x: x['symbol'].replace('\r','').upper(), axis=1 ) prices_df.to_csv(path + "/" + symbol + '.csv') except Exception as ex: logger.error(ex) list_error.append(symbol) logger.error(traceback.format_exc())
def calculateTrend(sym, edate): DD = datetime.timedelta(days=365*5) sdate = (datetime.datetime.now() - DD).strftime("%Y-%m-%d") df = DataReader(sym, 'yahoo', sdate, edate) dfb = DataReader('^GSPC', 'yahoo', sdate, edate) # create a time-series of monthly data points rts = df.resample('M',how='last') rbts = dfb.resample('M',how='last') dfsm = pd.DataFrame({'s_adjclose' : rts['Adj Close'], 'b_adjclose' : rbts['Adj Close']}, index=rts.index) # compute returns dfsm[['s_returns', 'b_returns']] = dfsm[['s_adjclose', 'b_adjclose']]/\ dfsm[['s_adjclose', 'b_adjclose']].shift(1) -1 dfsm = dfsm.dropna() covmat = np.cov(dfsm["s_returns"], dfsm["b_returns"]) # calculate measures now beta = covmat[0,1]/covmat[1,1] alpha= np.mean(dfsm["s_returns"])-beta*np.mean(dfsm["b_returns"]) # r_squared = 1. - SS_res/SS_tot ypred = alpha + beta * dfsm["b_returns"] SS_res = np.sum(np.power(ypred-dfsm["s_returns"],2)) SS_tot = covmat[0,0]*(len(dfsm)-1) # SS_tot is sample_variance*(n-1) r_squared = 1. - SS_res/SS_tot # 5- year volatiity and 1-year momentum volatility = np.sqrt(covmat[0,0]) momentum = np.prod(1+dfsm["s_returns"].tail(12).values) -1 # annualize the numbers prd = 12. # used monthly returns; 12 periods to annualize alpha = alpha*prd volatility = volatility*np.sqrt(prd) return beta, alpha, r_squared, volatility, momentum
def determine_trend(symbol, trade_date=datetime.datetime.now(), trend_length=20, trend_end_days_ago=1): """ returns a "trend score" derived from performing a linear regression on the daily closing price of the stock identified by symbol. This score is on the following scale: "Negative Trend" ----- "Positive Trend" -1.0 -----------0-----------1.0 The score considers both the slope of the linear model and the "fit" (based on the r^2 output of the ols function) trade_date -- date used to determine the trend from symbol -- the stock symbol to determine trend for trend_length -- the number of days to derive trend for trend_end_days_ago -- the number of days prior to trend_date to determine when to end the trend analysis """ end_date = datetime.date.today() - datetime.timedelta(days=trend_end_days_ago) start_date = end_date - datetime.timedelta(days=trend_length) stock_df = DataReader(symbol, "yahoo", start=start_date, end=end_date) stock_df = stock_df.reset_index() result = ols(y=stock_df['Adj Close'], x=Series(stock_df.index)) # This is the formula for the score without adjusting to fit within the # -1.0 - 1.0 scale. Basically this takes the slope/starting price to get # the % change per day. This is divided by a somewhat arbitrary value of score = (result.beta['x']/result.beta['intercept'])/LARGE_DAILY_GAIN * result.r2 # Now adjust the score to keep it in our trend range: if score > 1.0: return 1.0 elif score < -1.0: return -1.0 else: return score
class Indicator1(): def __init__(self, indicator, stock_name, start_date, end_date, buy_value, sell_value): if not isinstance(stock_name, str): raise TypeError("Sorry. 'Stock Name' must be string") self.stock = DataReader(stock_name, "yahoo", start_date, end_date) self.stock['returnValue'] = 'NaN' self.stock['returnValue'] = 'NaN' high_array = [] for index, row in self.stock.iterrows(): high_array.append(row['High']) high_nparray = numpy.asarray(high_array) low_array = [] for index, row in self.stock.iterrows(): low_array.append(row['Low']) low_nparray = numpy.asarray(low_array) close_array = [] for index, row in self.stock.iterrows(): close_array.append(row['Close']) close_nparray = numpy.asarray(close_array) real = getattr(talib, indicator)(high_nparray, low_nparray, close_nparray, timeperiod=14) self.stock['indicator'] = real self.stock['indicator_trigger'] = "NaN" count = 0 flag = False for index, row in self.stock.iterrows(): if count < len(self.stock.index): if float(row['indicator']) < buy_value and (not flag): self.stock['indicator_trigger'][count] = "Buy" flag = True elif float(row['indicator']) > sell_value and flag: self.stock['indicator_trigger'][count] = "Sell" flag = False count += 1
def load(symbol, startDate="19910428", forceDownload=False): symbol = symbol.lower() startDate = datetools.to_datetime(startDate) + datetools.bday - datetools.bday dataPath = "data_from_yhoo" # first check if data is in directory, or if there is even a directory if not os.path.exists(dataPath): print "data path doesn't exist. creating." os.makedirs(dataPath) # check if file is even there # get all stock symbols by reading csv names from data folder symbols = [] os.chdir(dataPath + "/") for afile in glob.glob("*.df"): symbols.append(afile[:-3]) # slice out csv extension os.chdir("../") # reset dir # if it is, read data. right now, if you don't download if symbol in symbols and not forceDownload: df = DataFrame.load(dataPath + "/" + symbol + ".df") print "read " + symbol + " data from binary file" df = df[df.index >= startDate] return df else: # otherwise, redownload data from yahoo print symbol + " data not downloaded. downloading from yhoo now..." df = DataReader(symbol, "yahoo", startDate) # save locally df.save(dataPath + "/" + symbol + ".df") # return data df return df
class Simulation(): """ Class that will store all data and functions related to a simulation """ def __init__(self, symbol, from_date=None, to_date=None): """ If dates not entered it will take a default to determine ######### TODO """ self.symbol = symbol self.from_date = from_date self.to_date = to_date self.df_prices = pd.DataFrame() self.open = None # Price of current open trade self.close = None # Price of current closed trade self.status = 'out' # 'out' not invested, 'in' invested self.signal = False # It will get records in which the signal activates self.max_open = 0.0 # Max individual investment (for % profit calculation) ### Measures self.nperiods = 0 self.ntrades = 0 self.abs_profit = 0.0 # Accumulated abs_profit ($ value gained/loss) # self.pct_simple_profit = 0.0 # Profit over max investment (without reinvestment) self.pct_compound_profit = 1.0 # Profit over max investment (with reinvestment) # self.pct_annual_simple_profit = 0.0 # Annualized simple profit # self.pct_annual_compound_profit = 0.0 # Annualized compound profit self.volatility = 0.0 # Volatility of returns (annualized) self.sharpe = 0.0 # Sharpe ratio (Rf = 0) self.drawdown = 0.0 # It will store the worst abs_profit ### Years calculation d_from_date = date(int(from_date[0:4]), int(from_date[4:6]), int(from_date[6:8])) d_to_date = date(int(to_date[0:4]), int(to_date[4:6]), int(to_date[6:8])) self.years = (d_to_date-d_from_date).days/365.0 self.profit_trades = [] def get_prices_yahoo(self): """ It get prices data from yahoo """ try: self.df_prices = DataReader(self.symbol, "yahoo", self.from_date, self.to_date) self.df_prices['pct Adj Close'] = self.df_prices.pct_change()['Adj Close'] except Exception, e: print e raise
def get_historical(self): """ If mode = "online", download Yahoo quotes from start to end date in a pandas dataframe If mode = "disk", the data is extracted from the hard drive """ if self._mode == "online": self._historical = DataReader(self._symbol,'yahoo',self._start,self._end) else: # check dictionary of symbols and open the file at the correct location data = stock_database.extract_series(self._symbol) df = pd.DataFrame(data,columns = ['date','Open','High','Low','Close','Volume','Adj Close']) self._historical = df[pd.to_datetime(df['date']) > self._start] self._historical = self._historical[pd.to_datetime(df['date']) < self._end] self._historical = self._historical.set_index('date') return self._historical
# Parameters--------------------------------------------------------- start=dt.datetime(2007, 01, 01) end=dt.date.today() LI3=pd.date_range(start, end, freq='D') curr=['USD','GBP','EUR'] libor_1m=[] for i in curr: tick=i+'1MTD156N' libor_1m.append(tick) df_libor_1m=pd.DataFrame(index=LI3) for i in libor_1m: df2 = DataReader(i, "fred", start,end) df2=df2.applymap(f) df2=df2.ffill() df_libor_1m[i]=df2 df_libor_1m=df_libor_1m.ffill() df_libor_1m.columns=curr print df_libor_1m.head(8) libor_1w=[] for i in curr: tick=i+'1WKD156N' libor_1w.append(tick) df_libor_1w=pd.DataFrame(index=LI3) for i in libor_1w: