def loadBSC(): global date_index_internal, date_index_external data_sp500 = commons.read_dataframe(commons.data_path + 'WIKI_SP500.h5') data_sp500_marketcap = commons.read_dataframe(commons.data_path + 'MARKETCAP.h5') dix = [ 11628, 11586, 11522, 11459, 11395, 11334, 11271, 11208, 11145, 11083, 11020 ] price = [2, 80, 120, 145, 150, 162, 150, 142, 140, 115, 110] cap = [ 240000000, 9830000000, 11880000000, 13970000000, 17790000000, 19050000000, 18060000000, 16250000000, 16580000000, 15520000000, 12510000000 ] for i in range(0, len(dix)): data_sp500.ix[date_index_external[dix[i]], 'BSC_Open'] = price[i] data_sp500.ix[date_index_external[dix[i]], 'BSC_Close'] = price[i] data_sp500.ix[date_index_external[dix[i]], 'BSC_Low'] = price[i] data_sp500.ix[date_index_external[dix[i]], 'BSC_High'] = price[i] data_sp500_marketcap.ix[date_index_external[dix[i]], 'BSC'] = cap[i] data_sp500 = data_sp500.fillna(method='backfill') data_sp500_marketcap.fillna(method='ffill') data_sp500_marketcap.fillna(method='backfill') data_sp500.to_hdf(commons.data_path + 'WIKI_SP500.h5', 'table', mode='w') data_sp500_marketcap.to_hdf(commons.data_path + 'MARKETCAP.h5', 'table', mode='w')
def __init__(self): self.data_sp500_anb = commons.read_dataframe(commons.data_path + 'anb.h5') self.data_sp500_sector = commons.read_dataframe(commons.data_path + 'SECTOR_SP500.h5') self.data_sp500 = commons.read_dataframe(commons.data_path + 'WIKI_SP500.h5') self.data_sp500_prices = pd.DataFrame() column_selection = list([]) for c in self.data_sp500.columns: if c[-4:] in ['Open', 'lose', 'High', '_Low']: column_selection.append(c) self.data_sp500_prices = self.data_sp500.ix[:, column_selection] self.calcSectorBetas()
def __init__(self,refresh_data=False,reload_baseline=False,demo_scenario=True,quandlkey=''): Quandl.ApiConfig.api_key=quandlkey self.demo_scenario=demo_scenario self.refresh_data=refresh_data self.df1st_date=commons.data_sp500_1st_date self.dfWikiSp500=commons.read_dataframe(commons.data_path+'WIKI_SP500.h5') self.dfSector=commons.read_dataframe(commons.data_path+'SECTOR_SP500.h5') self.dfSentiment=commons.read_dataframe(commons.data_path+'SENT_SP500.h5') self.dfFundamentals=commons.read_dataframe(commons.data_path+'FUND_SP500.h5') self.dfShortSell=commons.read_dataframe(commons.data_path+'SHORT_SP500.h5') self.dfMarketcap=commons.read_dataframe(commons.data_path+'MARKETCAP.h5') self.dfanb=commons.read_dataframe(commons.data_path+'anb.h5') self.dfLastCalloff=commons.read_dataframe(commons.data_path+'last_calloff.h5') #select columns with low, high, open and close if commons.demo_scenario: column_selection=list() for ticker in commons.getHistSp500TickerList(1,1,False): column_selection.append(ticker+'_Open') column_selection.append(ticker+'_Close') column_selection.append(ticker+'_Low') column_selection.append(ticker+'_High') else: column_selection=list([]) for c in self.dfWikiSp500.columns: if c[-4:] in ['Open', 'lose', 'High', '_Low']: column_selection.append(c) self.dfPrices=self.dfWikiSp500.ix[:,column_selection]
def get_forecast_state(self,ticker,dix): # print 'Forecasting for: ',ticker,commons.date_index_external[dix] Xy_all=commons.read_dataframe(commons.data_path+'Xy_all_'+str(ticker)) select_columns=commons.Xy_columns(Xy_all,'Close') Xy=Xy_all.ix[commons.date_index_external[dix],select_columns] X=Xy[:-9] X_t=X del Xy_all state=dict() for y in commons.y_labels: model=self.get_best_model(ticker,y,dix) # print model[3] if len(model[0])!=0 or len(model[1])!=0 or len(model[2])!=0: model_key=model[0] X_t=self.transform_X(ticker,X,model[3]) try: clf=joblib.load(commons.model_path+model_key+'.pkl') except IOError: # print 'using model w/o pca no' model_key=model[1] try: clf=joblib.load(commons.model_path+model_key+'.pkl') except IOError: model_key=model[2] X_t=self.transform_X(ticker,X,model[4]) clf=joblib.load(commons.model_path+model_key+'.pkl') state[y]=clf.predict(X_t.reshape(1,-1)) state[y]=float(state[y]) else: state[y]=0 if int(state[y])==state[y]: state[y]=int(state[y]) return state
def updateSp500Matrix(): global date_index_internal, date_index_external global regularItemsClean global changeItemsClean global sp500CompMatrix global data_sp500 items = dict(regularItemsClean, **changeItemsClean) sp500CompMatrix = commons.read_dataframe(commons.data_path + 'SP500_COMP.h5') maxIndex = max(sp500CompMatrix.index) for ticker, dates in items.items(): if dates['startdate'] > maxIndex: dates['startdate'] = maxIndex if dates['enddate'] > max(data_sp500.index): dates['enddate'] = max(data_sp500.index) for dix in range(date_index_internal[dates['startdate']], date_index_internal[dates['enddate']] + 1): sp500CompMatrix.ix[date_index_external[dix], ticker] = 1 sp500CompMatrix = sp500CompMatrix.fillna(0) sp500CompMatrix.to_hdf(commons.data_path + 'SP500_COMP.h5', 'table', mode='w') for sector, index in commons.sp500_index.items(): index_t = index[-8:] sp500IndexMatrix = commons.read_dataframe(commons.data_path + 'HIST_' + index_t + '.h5') maxIndex = max(sp500IndexMatrix.index) for ticker, dates in items.items(): if dates['sector'] == index_t: if dates['startdate'] > maxIndex: dates['startdate'] = maxIndex if dates['enddate'] > max(data_sp500.index): dates['enddate'] = max(data_sp500.index) for dix in range(date_index_internal[dates['startdate']], date_index_internal[dates['enddate']] + 1): sp500IndexMatrix.ix[date_index_external[dix], ticker] = 1 sp500IndexMatrix = sp500IndexMatrix.fillna(0) sp500IndexMatrix.to_hdf(commons.data_path + 'HIST_' + index_t + '.h5', 'table', mode='w')
def getIndexComposition(self): for k,v in commons.sp500_index.items(): index_t=v[-8:] indexComp=commons.read_dataframe(commons.data_path+'HIST_'+index_t+'.h5') relMarketCap=self.dfMarketcap.ix[indexComp.index,indexComp.columns] dailySum=(indexComp*relMarketCap).sum(axis=1) marketCap=indexComp*relMarketCap for c in indexComp.columns: marketCap.ix[:,c]=marketCap.ix[:,c]/dailySum marketCap.to_hdf(commons.data_path+'PCT_'+index_t+'.h5','table',mode='w') print 'Index composition stored. Update finished.'
def getActionUntrained(self, p, sector, ticker, dix): pct = commons.read_dataframe(commons.data_path + 'PCT_' + sector + '.h5') try: actualVol = p.portfolio[ticker] except KeyError: actualVol = 0 targetVol = int((p.get_portfolio_value(sector, dix) + p.cash[sector]) * pct.ix[commons.date_index_external[dix], ticker] / self.m.get_closing_price(ticker, dix)) if actualVol >= targetVol: return commons.action_code['sell'], (actualVol - targetVol) else: return commons.action_code['buy'], (targetVol - actualVol)
def loadHistoricalSp500(self): global date_index_internal, date_index_external data_sp500 = commons.read_dataframe(commons.data_path + 'WIKI_SP500.h5') dropped = list() with open(commons.local_path + 'backup/SP500TickerDropped.csv', 'r') as csvfile: csvreader = csv.reader(csvfile, delimiter=',') for row in csvreader: dropped.append(row[0]) csvfile.close() columns = list() max_date = max(data_sp500.index) l_i = 0 for t in dropped: items = list([]) exampleColumn = t + '_Open' if exampleColumn not in data_sp500.columns: items.append('WIKI/' + t) if any(items): df = pd.DataFrame([]) df = Quandl.get(items, start_date=commons.min_date, end_date=dt.date.today()) columns = list([]) for x in df.columns: x = str(x).replace(' - ', '_').strip('WIKI').strip('/') columns.append(x) df.columns = columns for i in df.index: for c in df.columns: if 'Adj.' in c: target_c = c.replace('Adj. ', '') data_sp500.ix[i, target_c] = df.ix[i, c] print items, 'retrieved' #update storage if l_i == 50: data_sp500 = data_sp500.sort_index() data_sp500.to_hdf(commons.data_path + 'WIKI_SP500.h5', 'table', mode='w') l_i = 0 else: l_i += 1 print 'sp500 data refreshed'
def getXy(self, mode, modes, ticker, dates, lPca): #select relevant columns from Xy_all startdix = self.cutoffdix - self.offset Xy_all = commons.read_dataframe( commons.data_path + 'Xy_all_' + ticker).ix[commons.date_index_external[startdix]:commons. date_index_external[self.cutoffdix + 1], :] # Xy_all=commons.read_dataframe(commons.data_path+'Xy_all_'+ticker).ix[:commons.date_index_external[self.cutoffdix+1],:] select_columns = list([]) for c in Xy_all.columns: if mode in str(c): select_columns.append(c) else: m_found = False for m in modes: if m in str(c): m_found = True if m_found == False: select_columns.append(c) Xy_all1 = pd.DataFrame() for date in dates: Xy_all2 = pd.DataFrame() Xy_all2 = Xy_all.ix[date[0]:date[1], select_columns] Xy_all1 = pd.concat([Xy_all1, Xy_all2]) Xy_all = Xy_all1 X_all = Xy_all.ix[:, :-9] #reduce dimension space? if len(X_all.index) > self.minTraining: if lPca != 0: pca = PCA(n_components=lPca) pca = pca.fit(X_all) X_all = pd.DataFrame(data=pca.transform(X_all), index=X_all.index) joblib.dump(pca, commons.model_path + str(lPca) + '_PCA_' + ticker + '.pkl', compress=3) del pca #get labels and drop the % forecast, as not relevant for the investment decision y_all = Xy_all.ix[:, -9:] y_all = y_all.drop(['1dr_Close', '5dr_Close', '20dr_Close'], 1) return X_all, y_all
def getCloseFromSP1(): global data_sp500 for c in commons.read_dataframe(commons.data_path + 'SP500_COMP.h5').columns: cClose = c + '_Close' if cClose not in data_sp500.columns: print c df = pd.DataFrame() df = Quandl.get('SF1/' + commons.getSP1Ticker(c) + '_PRICE', start_date=commons.min_date, end_date=commons.max_date['WIKI_SP500']) for i in df.index: data_sp500.ix[i, c + '_Close'] = df.ix[i, 'Value'] data_sp500.ix[i, c + '_Open'] = df.ix[i, 'Value'] data_sp500.ix[i, c + '_Low'] = df.ix[i, 'Value'] data_sp500.ix[i, c + '_High'] = df.ix[i, 'Value'] data_sp500 = data_sp500.fillna(method='ffill') data_sp500 = data_sp500.fillna(method='backfill') data_sp500.to_hdf(commons.data_path + 'WIKI_SP500.h5', 'table', mode='w')
def getIndexData(self): self.dfSector=commons.read_dataframe(commons.data_path+'SECTOR_SP500.h5') if self.refresh_data==True: enddate=commons.max_date['WIKI_SP500'] startdate=commons.max_date['SECTOR_SP500'] for k,v in commons.sp500_index.items(): df=pd.DataFrame() df=Quandl.get(v,start_date=startdate,end_date=enddate) df.columns=[str(v[-8:])+'_Open',str(v[-8:])+'_High',str(v[-8:])+'_Low',str(v[-8:])+'_Close',str(v[-8:])+'_Volume'] for c in df.columns: if 'Volume' not in c: if c not in self.dfSector.columns: self.dfSector=self.dfSector.join(getattr(df,c),how='outer') else: for i in df.index: self.dfSector.ix[i,c]=df.ix[i,c] self.dfSector=self.processResults(df,self.dfSector,'SECTOR_SP500') print 'Index prices retrieved and stored' else: print 'Local index data loaded.'
for row in csvreader: if i > 1: sp500_dropped[row[0]] = row[2] sp500_ticker[row[0]] = row[1] i += 1 csvfile.close() #store ticker with open(commons.local_path + 'data/sp500_ticker.csv', 'w') as f: # Just use 'w' mode in 3.x w = csv.DictWriter(f, sp500_ticker.keys()) w.writeheader() w.writerow(sp500_ticker) f.close() sp500_comp = commons.read_dataframe(commons.local_path + 'data/sp500_composition.h5') sp500 = commons.read_dataframe(commons.local_path + 'data/wiki_sp500.h5') #get 1 for all that are in the index today np1 = np.ones((len(pd.date_range(dt.date(2000, 1, 1), dt.date.today())), len(sp500_ticker))) columns = list() for k, i in sp500_ticker.items(): columns.append(k) sp500_comp = pd.DataFrame(np1, index=pd.date_range(dt.date(2000, 1, 1), dt.date.today()), columns=columns) #cut of at the introduction for k, i in sp500_new.items(): for d in pd.date_range(dt.date(2000, 1, 1), i):
def loadSp500Data(): global data_sp500 global maxDixPreWikiRefresh data_sp500 = commons.read_dataframe(commons.data_path + 'WIKI_SP500.h5') maxDixPreWikiRefresh = dt.datetime.strptime('01/03/2006', '%m/%d/%Y')
def initializeIndex(self): for k,v in commons.getIndexCodes().items(): index_t=v[-8:] self.indexComposition[index_t]=commons.read_dataframe(commons.data_path+'PCT_'+index_t+'.h5')
def __init__(self,dba): self.dba=dba self.setLastTrainUuid() self.initializeIndex() self.data_sp500=commons.read_dataframe(commons.data_path+'WIKI_SP500.h5')