def __init__( self, symbols='MSFT,BA,BX,CRM', startdate_string='2014-12-31', period='monthly' #or daily , source='Yahoo' #, enddate_string='xxx' ): self.Symbols = symbols self.StartDateString = startdate_string self.Period = period self.Source = source list_of_symbols = symbols.split(',') dict_of_filenames = {} mycachefolder = config.mycachefolder mytools.general().make_sure_path_exists(mycachefolder) for symbol in list_of_symbols: o = pullreturns.perform(symbol, startdate_string, period, source) #self.ReturnsDataframe = o.ReturnsDataframe df = o.ReturnsDataframe() print 'Length of returns dataframe', len(df) enddate = df.tail(1)['b_monthend'].values[0] print 'enddate', enddate cachedfilepathname = mycachefolder + '\\procoutput returnsyahoo ' + symbol + ' ' + startdate_string + ' ' + enddate + '.csv' watchfolderpathname = config.mywatcherfolder + '\\procoutput returnsyahoo ' + symbol + ' ' + startdate_string + ' ' + enddate + '.csv' df.to_csv(cachedfilepathname, columns=('a_symbol', 'b_monthend', 'd_end', 'e_pctchange')) if os.path.isfile(cachedfilepathname): print(' Found cached file: ' + cachedfilepathname) dict_of_filenames[len(dict_of_filenames)] = cachedfilepathname print '------ copying file to watchfolder ------' shutil.copy(cachedfilepathname, config.mywatcherfolder) if os.path.isfile(watchfolderpathname): print(' Found watch file: ' + watchfolderpathname) #df_hist = pd.read_csv(watchfolderpathname,index_col=0) else: print(' Did not find watch file: ' + watchfolderpathname) #df_hist = pd.read_csv(cachedfilepathname,index_col=0) else: print(' Getting new file:' + cachedfilepathname) #df_hist = DataReader(symbol, "yahoo", fromdate,todate) #os.system("C:/Batches/AutomationProjects/Watcher/code/bat/$execute-run-sql-script.bat ") print('RUNNING BATCH FILE: ' + config.mywatchbatchfolder + '\\$execute-run-sql-script.bat ') os.system(config.mywatchbatchfolder + '\\$execute-run-sql-script.bat ')
def __init__( self, symbol, startdate_string='2004-12-31', period='monthly' #or daily , source='Yahoo' #, enddate_string='xxx' ): self.Symbol = symbol self.StartDateString = startdate_string self.Period = period self.Source = source o = pullreturns.perform(symbol, startdate_string, period, source) self.ReturnsDataframe = o.ReturnsDataframe self.StockHistoryDataframe = o.StockHistoryDataframe
def _compilehistoricaldataframes(self, showresults=0): mysymbolslist = self.SymbolsList ser_annual = pd.Series() import pullreturns as pr o = pr.perform(symbols=mysymbolslist, startdate=self.StartDateString, enddate=self.EndDateString) df_alignedadjclosepricehistory = o.HistoryOfAdjClosePricesDataframe df_totalreturns = o.TotalReturnsDataframe df_aggregatedtotalreturns = o.AggregatedTotalReturnsDataframe dict_of_df_totalreturns = {} df_alignedclosepricehistory = o.HistoryOfClosePricesDataframe df_pricechangereturns = o.PriceChangeReturnsDataframe df_aggregatedpricechangereturns = o.AggregatedPriceChangeReturnsDataframe dict_of_df_pricechangereturns = {} for symbol in mysymbolslist: df_totalreturns_ticker = df_totalreturns[( df_totalreturns.ticker == symbol)] df_totalreturns_ticker.set_index(['curr_date'], inplace=True) dict_of_df_totalreturns[symbol] = df_totalreturns_ticker df_pricechangereturns_ticker = df_pricechangereturns[( df_pricechangereturns.ticker == symbol)] df_pricechangereturns_ticker.set_index(['curr_date'], inplace=True) dict_of_df_pricechangereturns[ symbol] = df_pricechangereturns_ticker index = ['X'] columns = ['A', 'B', 'C'] df_largestofreturns_total = pd.DataFrame(index=index, columns=columns) df_largestofreturns_total = df_largestofreturns_total.fillna( 0) # with 0s rather than NaNs keyoflargestreturnsdf_total = '' for k, v in dict_of_df_totalreturns.items(): if len(v) > len(df_largestofreturns_total): df_largestofreturns_total = v keyoflargestreturnsdf_total = k df_alignedtotalreturns = df_largestofreturns_total[['change_pct']] df_alignedtotalreturns.columns = [keyoflargestreturnsdf_total] df_alignedtotalreturns.sort_index for k, v in dict_of_df_totalreturns.items(): if not k == keyoflargestreturnsdf_total: df_new = v[['change_pct']] df_new.columns = [k] df_new.sort_index result = pd.concat([df_alignedtotalreturns, df_new], axis=1, join='inner') df_alignedtotalreturns = result df_largestofreturns_pricechange = pd.DataFrame(index=index, columns=columns) df_largestofreturns_pricechange = df_largestofreturns_pricechange.fillna( 0) # with 0s rather than NaNs keyoflargestreturnsdf_pricechange = '' for k, v in dict_of_df_totalreturns.items(): if len(v) > len(df_largestofreturns_pricechange): df_largestofreturns_pricechange = v keyoflargestreturnsdf_pricechange = k df_alignedpricechangereturns = df_largestofreturns_pricechange[[ 'change_pct' ]] df_alignedpricechangereturns.columns = [ keyoflargestreturnsdf_pricechange ] df_alignedpricechangereturns.sort_index for k, v in dict_of_df_pricechangereturns.items(): if not k == keyoflargestreturnsdf_pricechange: df_new = v[['change_pct']] df_new.columns = [k] df_new.sort_index result = pd.concat([df_alignedpricechangereturns, df_new], axis=1, join='inner') df_alignedpricechangereturns = result self.AlignedTotalReturnsDataframe = df_alignedtotalreturns self.AlignedAdjClosePriceHistoryDataframe = df_alignedadjclosepricehistory self.AlignedPriceChangeReturnsDataframe = df_alignedpricechangereturns self.AlignedClosePriceHistoryDataframe = df_alignedclosepricehistory self.ReturnsClass = o self.SymbolsList = o.SymbolsList print 'After processing _compilehistoricaldataframes: len(self.SymbolsList)', len( self.SymbolsList)
def _buildmonthlyreturnsdataframe(self, showresults=0): #'^GSmr ^OEX ^VIX ^OEX ^MID ^RUT ^DJI import pullreturns as pr dict_of_dfs = {} mysymbolslist = self.SymbolsList # list_of_symbols #['^GSPC','^DJI','^MID','^OEX','AAPL','LEO'] ser_annual = pd.Series() for symbol in mysymbolslist: o = pr.perform(symbol, self.StartDateString) df = o.MonthlyReturnsDataframe dict_of_dfs[symbol] = df annualizedreturn = o.annualizedreturns() ser_annual = ser_annual.set_value(symbol, annualizedreturn) if not showresults == 0: print '----- ser_annual -----' print ser_annual self.AnnualizedReturnsSeries = ser_annual #df = pr.monthlyreturnsusingyahoosymbol('^GSPC','2005-01-01') #dict_of_dfs['^GSPC'] = df # #df = pr.monthlyreturnsusingyahoosymbol('^DJI','2005-01-01') #dict_of_dfs['^DJI'] = df # #df = pr.monthlyreturnsusingyahoosymbol('^MID','2005-01-01') #dict_of_dfs['^MID'] = df # #df = pr.monthlyreturnsusingyahoosymbol('^VIX','2005-01-01') #dict_of_dfs['^VIX'] = df #passed = 0 #import datetime #import pandas as pd #import numpy as np #todays_date = datetime.datetime.now().date() #index = pd.date_range(todays_date-datetime.timedelta(10), periods=10, freq='D') index = ['X'] columns = ['A', 'B', 'C'] df_largest = pd.DataFrame(index=index, columns=columns) df_largest = df_largest.fillna(0) # with 0s rather than NaNs #print df_largest #while len(dict_of_dfs_bysize) < len(dict_of_dfs): keyoflargestdf = '' for k, v in dict_of_dfs.items(): if len(v) > len(df_largest): df_largest = v keyoflargestdf = k #break df_align = df_largest[['b_monthend', 'e_pctchange']] df_align = df_align.set_index('b_monthend') df_align.columns = [keyoflargestdf] df_align.sort_index #print df_align #print df_largest # if passed == 0: # df_align = v[['b_monthend','e_pctchange']] # df_align = df_align.set_index('b_monthend') # df_align.columns = [k] # df_align.sort_index # #sLength = len(df_align[k]) # #originalid = k # # else: for k, v in dict_of_dfs.items(): if not k == keyoflargestdf: df_new = v[['b_monthend', 'e_pctchange']] df_new = df_new.set_index('b_monthend') df_new.columns = [k] df_new.sort_index #print df_new #df_align[k] = df_new.loc[k].shape[0] #print df_new #df_align[k] = pd.Series(df_new, index=df_align.index) #df_align[k] = df_align[originalid].map(lambda x: df_new[k]) df_align[k] = df_new[k] if showresults == 1: print '----------------------------------------------------' print ' monthly returns' print '----------------------------------------------------' print df_align #self.MonthlyReturnsDataframe = df_align return df_align
def execute_old(self , symbols , startdate_string='2004-12-31' , period = 'monthly' #or daily , source = 'Yahoo' #, enddate_string='xxx' ): self.Symbols = symbols self.StartDateString = startdate_string self.Period = period self.Source = source dict_of_dfs = {} for symbol in self.Symbols: print symbol try: o = pullreturns.perform(symbol,self.StartDateString,self.Period,self.Source) dict_of_dfs[symbol] = o.ReturnsDataframe() except: print 'skipped', symbol, 'because of error' pass #print dict_of_dfs passed = 0 for k,v in dict_of_dfs.items(): if passed == 0: df_align = v[['b_monthend','e_pctchange']] df_align = df_align.set_index('b_monthend') df_align.columns = [k] print df_align sLength = len(df_align[k]) originalid = k else: df_new = v[['b_monthend','e_pctchange']] df_new = df_new.set_index('b_monthend') df_new.columns = [k] df_new.sort_index #print df_new #df_align[k] = df_new.loc[k].shape[0] #print df_new #df_align[k] = pd.Series(df_new, index=df_align.index) #df_align[k] = df_align[originalid].map(lambda x: df_new[k]) df_align[k] = df_new[k] print '-----+++++++++' #print df_align[k] passed = 1 print '----------------------------------------------------' print ' monthly returns' print '----------------------------------------------------' df_align = df_align.dropna() print df_align rows = np.array(list(df_align))[: np.newaxis] print '----------------------------------------------------' print ' correlation matrix' print '----------------------------------------------------' df_align = np.nan_to_num(df_align) corrmatrix_array = np.corrcoef(df_align.T.values.tolist()) df_corr = pd.DataFrame(corrmatrix_array, index=rows, columns=list(df_align)) return df_corr
def execute(self , symbols , startdate_string='2004-12-31' , period = 'monthly' #or daily , source = 'Yahoo' #, enddate_string='xxx' ): self.Symbols = symbols self.StartDateString = startdate_string self.Period = period self.Source = source dict_of_dfs = {} for symbol in self.Symbols: print symbol try: o = pullreturns.perform(symbol,self.StartDateString,self.Period,self.Source) dict_of_dfs[symbol] = o.ReturnsDataframe() print 'i = ',len(dict_of_dfs) except: print 'skipped', symbol, 'because of error' pass #print dict_of_dfs passed = 0 iterations = 0 for k,v in dict_of_dfs.items(): iterations += 1 print 'iterations',iterations if passed == 0: df_align = v[['b_monthend','e_pctchange']] df_align = df_align.set_index('b_monthend') df_align.columns = [k] #print '---------------------------------------------------------------df_align' #print df_align sLength = len(df_align[k]) originalid = k else: df_new = v[['b_monthend','e_pctchange']] df_new = df_new.set_index('b_monthend') df_new.columns = [k] df_new.sort_index #print df_new #df_align[k] = df_new.loc[k].shape[0] #print '---------------------------------------------------------------df_new' #print df_new #df_align[k] = pd.Series(df_new, index=df_align.index) #df_align[k] = df_align[originalid].map(lambda x: df_new[k]) df_align[k] = df_new[k] print '-----++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++' #print df_align[k] passed = 1 print '----------------------------------------------------' print ' monthly returns' print '----------------------------------------------------' #df_align = df_align.dropna() print df_align rows = np.array(list(df_align))[: np.newaxis] print '----------------------------------------------------' print ' correlation matrix' print '----------------------------------------------------' df = df_align.astype(float) #df2 = df.reset_index(drop=True) #df3 = pd.DataFrame(df2.values) #print df3 #print df.corr() #corrmatrix_array = np.corrcoef(df_align.T.values.tolist()) corrmatrix_array = df.corr() covmatrix_array = df.cov() #corrmatrix_array = df_align.corr() #print '====================corrmatrix_array' #print corrmatrix_array df_corr = pd.DataFrame(corrmatrix_array, index=rows, columns=list(df_align)) df_corr1 = np.round(df_corr, decimals=2) df_cov = pd.DataFrame(covmatrix_array, index=rows, columns=list(df_align)) df_cov1 = np.round(df_cov, decimals=6) self.CorrelationMatrix = df_corr1 self.CovarianceMatrix = df_cov1 return df_corr1,df_cov