Exemple #1
0
def loadBSC():
    global date_index_internal, date_index_external
    data_sp500 = commons.read_dataframe(commons.data_path + 'WIKI_SP500.h5')
    data_sp500_marketcap = commons.read_dataframe(commons.data_path +
                                                  'MARKETCAP.h5')
    dix = [
        11628, 11586, 11522, 11459, 11395, 11334, 11271, 11208, 11145, 11083,
        11020
    ]
    price = [2, 80, 120, 145, 150, 162, 150, 142, 140, 115, 110]
    cap = [
        240000000, 9830000000, 11880000000, 13970000000, 17790000000,
        19050000000, 18060000000, 16250000000, 16580000000, 15520000000,
        12510000000
    ]
    for i in range(0, len(dix)):
        data_sp500.ix[date_index_external[dix[i]], 'BSC_Open'] = price[i]
        data_sp500.ix[date_index_external[dix[i]], 'BSC_Close'] = price[i]
        data_sp500.ix[date_index_external[dix[i]], 'BSC_Low'] = price[i]
        data_sp500.ix[date_index_external[dix[i]], 'BSC_High'] = price[i]
        data_sp500_marketcap.ix[date_index_external[dix[i]], 'BSC'] = cap[i]
    data_sp500 = data_sp500.fillna(method='backfill')
    data_sp500_marketcap.fillna(method='ffill')
    data_sp500_marketcap.fillna(method='backfill')
    data_sp500.to_hdf(commons.data_path + 'WIKI_SP500.h5', 'table', mode='w')
    data_sp500_marketcap.to_hdf(commons.data_path + 'MARKETCAP.h5',
                                'table',
                                mode='w')
Exemple #2
0
 def __init__(self):
     self.data_sp500_anb = commons.read_dataframe(commons.data_path +
                                                  'anb.h5')
     self.data_sp500_sector = commons.read_dataframe(commons.data_path +
                                                     'SECTOR_SP500.h5')
     self.data_sp500 = commons.read_dataframe(commons.data_path +
                                              'WIKI_SP500.h5')
     self.data_sp500_prices = pd.DataFrame()
     column_selection = list([])
     for c in self.data_sp500.columns:
         if c[-4:] in ['Open', 'lose', 'High', '_Low']:
             column_selection.append(c)
     self.data_sp500_prices = self.data_sp500.ix[:, column_selection]
     self.calcSectorBetas()
Exemple #3
0
 def __init__(self,refresh_data=False,reload_baseline=False,demo_scenario=True,quandlkey=''):    
     Quandl.ApiConfig.api_key=quandlkey
     self.demo_scenario=demo_scenario
     self.refresh_data=refresh_data
     self.df1st_date=commons.data_sp500_1st_date
     self.dfWikiSp500=commons.read_dataframe(commons.data_path+'WIKI_SP500.h5')
     self.dfSector=commons.read_dataframe(commons.data_path+'SECTOR_SP500.h5')
     self.dfSentiment=commons.read_dataframe(commons.data_path+'SENT_SP500.h5')
     self.dfFundamentals=commons.read_dataframe(commons.data_path+'FUND_SP500.h5')    
     self.dfShortSell=commons.read_dataframe(commons.data_path+'SHORT_SP500.h5')
     self.dfMarketcap=commons.read_dataframe(commons.data_path+'MARKETCAP.h5')  
     self.dfanb=commons.read_dataframe(commons.data_path+'anb.h5')  
     self.dfLastCalloff=commons.read_dataframe(commons.data_path+'last_calloff.h5')
     #select columns with low, high, open and close
     if commons.demo_scenario:
         column_selection=list()
         for ticker in commons.getHistSp500TickerList(1,1,False):
             column_selection.append(ticker+'_Open')
             column_selection.append(ticker+'_Close')
             column_selection.append(ticker+'_Low')
             column_selection.append(ticker+'_High')
     else:
         column_selection=list([])
         for c in self.dfWikiSp500.columns:
             if c[-4:] in ['Open', 'lose', 'High', '_Low']:
                 column_selection.append(c)
     self.dfPrices=self.dfWikiSp500.ix[:,column_selection]
Exemple #4
0
    def get_forecast_state(self,ticker,dix):
#        print 'Forecasting for: ',ticker,commons.date_index_external[dix]
        Xy_all=commons.read_dataframe(commons.data_path+'Xy_all_'+str(ticker))
        select_columns=commons.Xy_columns(Xy_all,'Close')
        Xy=Xy_all.ix[commons.date_index_external[dix],select_columns]
        X=Xy[:-9]
        X_t=X
        del Xy_all
        state=dict()
        for y in commons.y_labels:
            model=self.get_best_model(ticker,y,dix)
#            print model[3]
            if len(model[0])!=0 or len(model[1])!=0 or len(model[2])!=0:
                model_key=model[0]
                X_t=self.transform_X(ticker,X,model[3])
                try:
                    clf=joblib.load(commons.model_path+model_key+'.pkl')
                except IOError:
#                    print 'using model w/o pca no'
                    model_key=model[1]
                    try:
                        clf=joblib.load(commons.model_path+model_key+'.pkl')
                    except IOError:
                        model_key=model[2]
                        X_t=self.transform_X(ticker,X,model[4])
                        clf=joblib.load(commons.model_path+model_key+'.pkl')
                    
                state[y]=clf.predict(X_t.reshape(1,-1))
                state[y]=float(state[y])
            else:
                state[y]=0
            if int(state[y])==state[y]:
                state[y]=int(state[y])
        return state   
Exemple #5
0
def updateSp500Matrix():
    global date_index_internal, date_index_external
    global regularItemsClean
    global changeItemsClean
    global sp500CompMatrix
    global data_sp500
    items = dict(regularItemsClean, **changeItemsClean)

    sp500CompMatrix = commons.read_dataframe(commons.data_path +
                                             'SP500_COMP.h5')
    maxIndex = max(sp500CompMatrix.index)
    for ticker, dates in items.items():
        if dates['startdate'] > maxIndex:
            dates['startdate'] = maxIndex
        if dates['enddate'] > max(data_sp500.index):
            dates['enddate'] = max(data_sp500.index)
        for dix in range(date_index_internal[dates['startdate']],
                         date_index_internal[dates['enddate']] + 1):
            sp500CompMatrix.ix[date_index_external[dix], ticker] = 1
    sp500CompMatrix = sp500CompMatrix.fillna(0)
    sp500CompMatrix.to_hdf(commons.data_path + 'SP500_COMP.h5',
                           'table',
                           mode='w')

    for sector, index in commons.sp500_index.items():
        index_t = index[-8:]
        sp500IndexMatrix = commons.read_dataframe(commons.data_path + 'HIST_' +
                                                  index_t + '.h5')
        maxIndex = max(sp500IndexMatrix.index)
        for ticker, dates in items.items():
            if dates['sector'] == index_t:
                if dates['startdate'] > maxIndex:
                    dates['startdate'] = maxIndex
                if dates['enddate'] > max(data_sp500.index):
                    dates['enddate'] = max(data_sp500.index)
                for dix in range(date_index_internal[dates['startdate']],
                                 date_index_internal[dates['enddate']] + 1):
                    sp500IndexMatrix.ix[date_index_external[dix], ticker] = 1
        sp500IndexMatrix = sp500IndexMatrix.fillna(0)
        sp500IndexMatrix.to_hdf(commons.data_path + 'HIST_' + index_t + '.h5',
                                'table',
                                mode='w')
Exemple #6
0
 def getIndexComposition(self):
     for k,v in commons.sp500_index.items():
         index_t=v[-8:]
         indexComp=commons.read_dataframe(commons.data_path+'HIST_'+index_t+'.h5')
         relMarketCap=self.dfMarketcap.ix[indexComp.index,indexComp.columns]
         dailySum=(indexComp*relMarketCap).sum(axis=1)
         marketCap=indexComp*relMarketCap
         for c in indexComp.columns:
             marketCap.ix[:,c]=marketCap.ix[:,c]/dailySum
         marketCap.to_hdf(commons.data_path+'PCT_'+index_t+'.h5','table',mode='w')
     
     print 'Index composition stored. Update finished.'            
Exemple #7
0
 def getActionUntrained(self, p, sector, ticker, dix):
     pct = commons.read_dataframe(commons.data_path + 'PCT_' + sector +
                                  '.h5')
     try:
         actualVol = p.portfolio[ticker]
     except KeyError:
         actualVol = 0
     targetVol = int((p.get_portfolio_value(sector, dix) + p.cash[sector]) *
                     pct.ix[commons.date_index_external[dix], ticker] /
                     self.m.get_closing_price(ticker, dix))
     if actualVol >= targetVol:
         return commons.action_code['sell'], (actualVol - targetVol)
     else:
         return commons.action_code['buy'], (targetVol - actualVol)
Exemple #8
0
def loadHistoricalSp500(self):
    global date_index_internal, date_index_external
    data_sp500 = commons.read_dataframe(commons.data_path + 'WIKI_SP500.h5')
    dropped = list()
    with open(commons.local_path + 'backup/SP500TickerDropped.csv',
              'r') as csvfile:
        csvreader = csv.reader(csvfile, delimiter=',')
        for row in csvreader:
            dropped.append(row[0])
    csvfile.close()

    columns = list()

    max_date = max(data_sp500.index)
    l_i = 0
    for t in dropped:
        items = list([])
        exampleColumn = t + '_Open'
        if exampleColumn not in data_sp500.columns:
            items.append('WIKI/' + t)

        if any(items):
            df = pd.DataFrame([])
            df = Quandl.get(items,
                            start_date=commons.min_date,
                            end_date=dt.date.today())
            columns = list([])
            for x in df.columns:
                x = str(x).replace(' - ', '_').strip('WIKI').strip('/')
                columns.append(x)
            df.columns = columns

            for i in df.index:
                for c in df.columns:
                    if 'Adj.' in c:
                        target_c = c.replace('Adj. ', '')
                        data_sp500.ix[i, target_c] = df.ix[i, c]
            print items, 'retrieved'
            #update storage

            if l_i == 50:
                data_sp500 = data_sp500.sort_index()
                data_sp500.to_hdf(commons.data_path + 'WIKI_SP500.h5',
                                  'table',
                                  mode='w')
                l_i = 0
            else:
                l_i += 1
    print 'sp500 data refreshed'
Exemple #9
0
    def getXy(self, mode, modes, ticker, dates, lPca):
        #select relevant columns from Xy_all
        startdix = self.cutoffdix - self.offset
        Xy_all = commons.read_dataframe(
            commons.data_path + 'Xy_all_' +
            ticker).ix[commons.date_index_external[startdix]:commons.
                       date_index_external[self.cutoffdix + 1], :]
        #        Xy_all=commons.read_dataframe(commons.data_path+'Xy_all_'+ticker).ix[:commons.date_index_external[self.cutoffdix+1],:]
        select_columns = list([])
        for c in Xy_all.columns:
            if mode in str(c):
                select_columns.append(c)
            else:
                m_found = False
                for m in modes:
                    if m in str(c):
                        m_found = True
                if m_found == False:
                    select_columns.append(c)
        Xy_all1 = pd.DataFrame()
        for date in dates:
            Xy_all2 = pd.DataFrame()
            Xy_all2 = Xy_all.ix[date[0]:date[1], select_columns]
            Xy_all1 = pd.concat([Xy_all1, Xy_all2])
        Xy_all = Xy_all1
        X_all = Xy_all.ix[:, :-9]

        #reduce dimension space?
        if len(X_all.index) > self.minTraining:
            if lPca != 0:
                pca = PCA(n_components=lPca)
                pca = pca.fit(X_all)
                X_all = pd.DataFrame(data=pca.transform(X_all),
                                     index=X_all.index)
                joblib.dump(pca,
                            commons.model_path + str(lPca) + '_PCA_' + ticker +
                            '.pkl',
                            compress=3)
                del pca

        #get labels and drop the % forecast, as not relevant for the investment decision
        y_all = Xy_all.ix[:, -9:]
        y_all = y_all.drop(['1dr_Close', '5dr_Close', '20dr_Close'], 1)

        return X_all, y_all
Exemple #10
0
def getCloseFromSP1():
    global data_sp500
    for c in commons.read_dataframe(commons.data_path +
                                    'SP500_COMP.h5').columns:
        cClose = c + '_Close'
        if cClose not in data_sp500.columns:
            print c
            df = pd.DataFrame()
            df = Quandl.get('SF1/' + commons.getSP1Ticker(c) + '_PRICE',
                            start_date=commons.min_date,
                            end_date=commons.max_date['WIKI_SP500'])
            for i in df.index:
                data_sp500.ix[i, c + '_Close'] = df.ix[i, 'Value']
                data_sp500.ix[i, c + '_Open'] = df.ix[i, 'Value']
                data_sp500.ix[i, c + '_Low'] = df.ix[i, 'Value']
                data_sp500.ix[i, c + '_High'] = df.ix[i, 'Value']
    data_sp500 = data_sp500.fillna(method='ffill')
    data_sp500 = data_sp500.fillna(method='backfill')
    data_sp500.to_hdf(commons.data_path + 'WIKI_SP500.h5', 'table', mode='w')
Exemple #11
0
    def getIndexData(self):
        self.dfSector=commons.read_dataframe(commons.data_path+'SECTOR_SP500.h5')   

        if self.refresh_data==True:
            enddate=commons.max_date['WIKI_SP500']
            startdate=commons.max_date['SECTOR_SP500']

            for k,v in commons.sp500_index.items():
                df=pd.DataFrame()
                df=Quandl.get(v,start_date=startdate,end_date=enddate)
                df.columns=[str(v[-8:])+'_Open',str(v[-8:])+'_High',str(v[-8:])+'_Low',str(v[-8:])+'_Close',str(v[-8:])+'_Volume']
                for c in df.columns:
                    if 'Volume' not in c:
                        if c not in self.dfSector.columns:
                            self.dfSector=self.dfSector.join(getattr(df,c),how='outer')
                        else:
                            for i in df.index:
                                self.dfSector.ix[i,c]=df.ix[i,c]
            
            self.dfSector=self.processResults(df,self.dfSector,'SECTOR_SP500')
            print 'Index prices retrieved and stored'
        else:
            print 'Local index data loaded.'
Exemple #12
0
    for row in csvreader:
        if i > 1:
            sp500_dropped[row[0]] = row[2]
            sp500_ticker[row[0]] = row[1]
        i += 1
    csvfile.close()

#store ticker
with open(commons.local_path + 'data/sp500_ticker.csv',
          'w') as f:  # Just use 'w' mode in 3.x
    w = csv.DictWriter(f, sp500_ticker.keys())
    w.writeheader()
    w.writerow(sp500_ticker)
    f.close()

sp500_comp = commons.read_dataframe(commons.local_path +
                                    'data/sp500_composition.h5')
sp500 = commons.read_dataframe(commons.local_path + 'data/wiki_sp500.h5')
#get 1 for all that are in the index today
np1 = np.ones((len(pd.date_range(dt.date(2000, 1, 1),
                                 dt.date.today())), len(sp500_ticker)))
columns = list()
for k, i in sp500_ticker.items():
    columns.append(k)
sp500_comp = pd.DataFrame(np1,
                          index=pd.date_range(dt.date(2000, 1, 1),
                                              dt.date.today()),
                          columns=columns)

#cut of at the introduction
for k, i in sp500_new.items():
    for d in pd.date_range(dt.date(2000, 1, 1), i):
Exemple #13
0
def loadSp500Data():
    global data_sp500
    global maxDixPreWikiRefresh
    data_sp500 = commons.read_dataframe(commons.data_path + 'WIKI_SP500.h5')
    maxDixPreWikiRefresh = dt.datetime.strptime('01/03/2006', '%m/%d/%Y')
Exemple #14
0
 def initializeIndex(self):
     for k,v in commons.getIndexCodes().items():
         index_t=v[-8:]    
         self.indexComposition[index_t]=commons.read_dataframe(commons.data_path+'PCT_'+index_t+'.h5')
Exemple #15
0
 def __init__(self,dba):
     self.dba=dba
     self.setLastTrainUuid()
     self.initializeIndex()
     self.data_sp500=commons.read_dataframe(commons.data_path+'WIKI_SP500.h5')