Example #1
0
    def __init__(
            self,
            symbols='MSFT,BA,BX,CRM',
            startdate_string='2014-12-31',
            period='monthly'  #or daily
        ,
            source='Yahoo'
        #, enddate_string='xxx'
    ):
        self.Symbols = symbols
        self.StartDateString = startdate_string
        self.Period = period
        self.Source = source
        list_of_symbols = symbols.split(',')
        dict_of_filenames = {}
        mycachefolder = config.mycachefolder
        mytools.general().make_sure_path_exists(mycachefolder)

        for symbol in list_of_symbols:
            o = pullreturns.perform(symbol, startdate_string, period, source)
            #self.ReturnsDataframe = o.ReturnsDataframe
            df = o.ReturnsDataframe()
            print 'Length of returns dataframe', len(df)
            enddate = df.tail(1)['b_monthend'].values[0]
            print 'enddate', enddate

            cachedfilepathname = mycachefolder + '\\procoutput returnsyahoo ' + symbol + ' ' + startdate_string + ' ' + enddate + '.csv'
            watchfolderpathname = config.mywatcherfolder + '\\procoutput returnsyahoo ' + symbol + ' ' + startdate_string + ' ' + enddate + '.csv'
            df.to_csv(cachedfilepathname,
                      columns=('a_symbol', 'b_monthend', 'd_end',
                               'e_pctchange'))

            if os.path.isfile(cachedfilepathname):
                print('   Found cached file:  ' + cachedfilepathname)
                dict_of_filenames[len(dict_of_filenames)] = cachedfilepathname
                print '------ copying file to watchfolder ------'
                shutil.copy(cachedfilepathname, config.mywatcherfolder)
                if os.path.isfile(watchfolderpathname):
                    print('   Found watch file:  ' + watchfolderpathname)
                    #df_hist = pd.read_csv(watchfolderpathname,index_col=0)
                else:
                    print('   Did not find watch file:  ' +
                          watchfolderpathname)

                #df_hist = pd.read_csv(cachedfilepathname,index_col=0)
            else:
                print('   Getting new file:' + cachedfilepathname)
                #df_hist = DataReader(symbol,  "yahoo", fromdate,todate)

        #os.system("C:/Batches/AutomationProjects/Watcher/code/bat/$execute-run-sql-script.bat     ")
        print('RUNNING BATCH FILE:  ' + config.mywatchbatchfolder +
              '\\$execute-run-sql-script.bat     ')
        os.system(config.mywatchbatchfolder +
                  '\\$execute-run-sql-script.bat     ')
 def __init__(
         self,
         symbol,
         startdate_string='2004-12-31',
         period='monthly'  #or daily
     ,
         source='Yahoo'
     #, enddate_string='xxx'
 ):
     self.Symbol = symbol
     self.StartDateString = startdate_string
     self.Period = period
     self.Source = source
     o = pullreturns.perform(symbol, startdate_string, period, source)
     self.ReturnsDataframe = o.ReturnsDataframe
     self.StockHistoryDataframe = o.StockHistoryDataframe
Example #3
0
    def _compilehistoricaldataframes(self, showresults=0):

        mysymbolslist = self.SymbolsList
        ser_annual = pd.Series()

        import pullreturns as pr
        o = pr.perform(symbols=mysymbolslist,
                       startdate=self.StartDateString,
                       enddate=self.EndDateString)

        df_alignedadjclosepricehistory = o.HistoryOfAdjClosePricesDataframe
        df_totalreturns = o.TotalReturnsDataframe
        df_aggregatedtotalreturns = o.AggregatedTotalReturnsDataframe
        dict_of_df_totalreturns = {}

        df_alignedclosepricehistory = o.HistoryOfClosePricesDataframe
        df_pricechangereturns = o.PriceChangeReturnsDataframe
        df_aggregatedpricechangereturns = o.AggregatedPriceChangeReturnsDataframe
        dict_of_df_pricechangereturns = {}

        for symbol in mysymbolslist:

            df_totalreturns_ticker = df_totalreturns[(
                df_totalreturns.ticker == symbol)]
            df_totalreturns_ticker.set_index(['curr_date'], inplace=True)
            dict_of_df_totalreturns[symbol] = df_totalreturns_ticker

            df_pricechangereturns_ticker = df_pricechangereturns[(
                df_pricechangereturns.ticker == symbol)]
            df_pricechangereturns_ticker.set_index(['curr_date'], inplace=True)
            dict_of_df_pricechangereturns[
                symbol] = df_pricechangereturns_ticker

        index = ['X']
        columns = ['A', 'B', 'C']

        df_largestofreturns_total = pd.DataFrame(index=index, columns=columns)
        df_largestofreturns_total = df_largestofreturns_total.fillna(
            0)  # with 0s rather than NaNs

        keyoflargestreturnsdf_total = ''
        for k, v in dict_of_df_totalreturns.items():
            if len(v) > len(df_largestofreturns_total):
                df_largestofreturns_total = v
                keyoflargestreturnsdf_total = k

        df_alignedtotalreturns = df_largestofreturns_total[['change_pct']]
        df_alignedtotalreturns.columns = [keyoflargestreturnsdf_total]
        df_alignedtotalreturns.sort_index

        for k, v in dict_of_df_totalreturns.items():
            if not k == keyoflargestreturnsdf_total:
                df_new = v[['change_pct']]
                df_new.columns = [k]
                df_new.sort_index
                result = pd.concat([df_alignedtotalreturns, df_new],
                                   axis=1,
                                   join='inner')
                df_alignedtotalreturns = result

        df_largestofreturns_pricechange = pd.DataFrame(index=index,
                                                       columns=columns)
        df_largestofreturns_pricechange = df_largestofreturns_pricechange.fillna(
            0)  # with 0s rather than NaNs

        keyoflargestreturnsdf_pricechange = ''
        for k, v in dict_of_df_totalreturns.items():
            if len(v) > len(df_largestofreturns_pricechange):
                df_largestofreturns_pricechange = v
                keyoflargestreturnsdf_pricechange = k

        df_alignedpricechangereturns = df_largestofreturns_pricechange[[
            'change_pct'
        ]]
        df_alignedpricechangereturns.columns = [
            keyoflargestreturnsdf_pricechange
        ]
        df_alignedpricechangereturns.sort_index

        for k, v in dict_of_df_pricechangereturns.items():
            if not k == keyoflargestreturnsdf_pricechange:
                df_new = v[['change_pct']]
                df_new.columns = [k]
                df_new.sort_index
                result = pd.concat([df_alignedpricechangereturns, df_new],
                                   axis=1,
                                   join='inner')
                df_alignedpricechangereturns = result

        self.AlignedTotalReturnsDataframe = df_alignedtotalreturns
        self.AlignedAdjClosePriceHistoryDataframe = df_alignedadjclosepricehistory

        self.AlignedPriceChangeReturnsDataframe = df_alignedpricechangereturns
        self.AlignedClosePriceHistoryDataframe = df_alignedclosepricehistory

        self.ReturnsClass = o
        self.SymbolsList = o.SymbolsList
        print 'After processing _compilehistoricaldataframes:  len(self.SymbolsList)', len(
            self.SymbolsList)
    def _buildmonthlyreturnsdataframe(self, showresults=0):

        #'^GSmr   ^OEX    ^VIX    ^OEX    ^MID   ^RUT   ^DJI

        import pullreturns as pr
        dict_of_dfs = {}

        mysymbolslist = self.SymbolsList  # list_of_symbols #['^GSPC','^DJI','^MID','^OEX','AAPL','LEO']
        ser_annual = pd.Series()
        for symbol in mysymbolslist:
            o = pr.perform(symbol, self.StartDateString)
            df = o.MonthlyReturnsDataframe
            dict_of_dfs[symbol] = df
            annualizedreturn = o.annualizedreturns()
            ser_annual = ser_annual.set_value(symbol, annualizedreturn)
        if not showresults == 0:
            print '----- ser_annual -----'
            print ser_annual
        self.AnnualizedReturnsSeries = ser_annual

        #df = pr.monthlyreturnsusingyahoosymbol('^GSPC','2005-01-01')
        #dict_of_dfs['^GSPC'] = df
        #
        #df = pr.monthlyreturnsusingyahoosymbol('^DJI','2005-01-01')
        #dict_of_dfs['^DJI'] = df
        #
        #df = pr.monthlyreturnsusingyahoosymbol('^MID','2005-01-01')
        #dict_of_dfs['^MID'] = df
        #
        #df = pr.monthlyreturnsusingyahoosymbol('^VIX','2005-01-01')
        #dict_of_dfs['^VIX'] = df

        #passed = 0
        #import datetime
        #import pandas as pd
        #import numpy as np

        #todays_date = datetime.datetime.now().date()
        #index = pd.date_range(todays_date-datetime.timedelta(10), periods=10, freq='D')
        index = ['X']
        columns = ['A', 'B', 'C']
        df_largest = pd.DataFrame(index=index, columns=columns)
        df_largest = df_largest.fillna(0)  # with 0s rather than NaNs
        #print df_largest
        #while len(dict_of_dfs_bysize) < len(dict_of_dfs):
        keyoflargestdf = ''
        for k, v in dict_of_dfs.items():
            if len(v) > len(df_largest):
                df_largest = v
                keyoflargestdf = k
                #break

        df_align = df_largest[['b_monthend', 'e_pctchange']]
        df_align = df_align.set_index('b_monthend')
        df_align.columns = [keyoflargestdf]
        df_align.sort_index
        #print df_align
        #print df_largest

        #            if passed == 0:
        #                df_align = v[['b_monthend','e_pctchange']]
        #                df_align = df_align.set_index('b_monthend')
        #                df_align.columns = [k]
        #                df_align.sort_index
        #                #sLength = len(df_align[k])
        #                #originalid = k
        #
        #            else:

        for k, v in dict_of_dfs.items():
            if not k == keyoflargestdf:
                df_new = v[['b_monthend', 'e_pctchange']]
                df_new = df_new.set_index('b_monthend')
                df_new.columns = [k]
                df_new.sort_index
                #print df_new
                #df_align[k] = df_new.loc[k].shape[0]
                #print df_new
                #df_align[k] = pd.Series(df_new, index=df_align.index)
                #df_align[k] = df_align[originalid].map(lambda x: df_new[k])
                df_align[k] = df_new[k]
        if showresults == 1:
            print '----------------------------------------------------'
            print '                 monthly returns'
            print '----------------------------------------------------'
            print df_align
        #self.MonthlyReturnsDataframe = df_align
        return df_align
Example #5
0
    def execute_old(self
                    , symbols
                    , startdate_string='2004-12-31'
                    , period = 'monthly' #or daily
                    , source = 'Yahoo'
                    #, enddate_string='xxx'
                ):
        self.Symbols = symbols
        self.StartDateString = startdate_string
        self.Period = period
        self.Source = source
        
        
        dict_of_dfs = {}
        for symbol in self.Symbols:
            print symbol
            try:
                o = pullreturns.perform(symbol,self.StartDateString,self.Period,self.Source)
                dict_of_dfs[symbol] = o.ReturnsDataframe()
            except:
                print 'skipped', symbol, 'because of error'
                pass
        #print dict_of_dfs
        passed = 0
        for k,v in dict_of_dfs.items():
            if passed == 0:        
                df_align = v[['b_monthend','e_pctchange']]
                df_align = df_align.set_index('b_monthend')
                df_align.columns = [k]
                print df_align
                sLength = len(df_align[k])
                originalid = k
                
            else:
                df_new = v[['b_monthend','e_pctchange']]
                df_new = df_new.set_index('b_monthend')
                df_new.columns = [k]
                df_new.sort_index
                #print df_new
                #df_align[k] = df_new.loc[k].shape[0]
                #print df_new
                #df_align[k] = pd.Series(df_new, index=df_align.index)
                #df_align[k] = df_align[originalid].map(lambda x: df_new[k])
                df_align[k] = df_new[k]
            print '-----+++++++++'
            #print df_align[k]
            passed = 1

        print '----------------------------------------------------'
        print '                 monthly returns'
        print '----------------------------------------------------'
        df_align = df_align.dropna()
        print df_align

        rows = np.array(list(df_align))[: np.newaxis]

        print '----------------------------------------------------'
        print '                 correlation matrix'
        print '----------------------------------------------------'
        df_align = np.nan_to_num(df_align)
        
        corrmatrix_array = np.corrcoef(df_align.T.values.tolist())
        df_corr = pd.DataFrame(corrmatrix_array, index=rows, columns=list(df_align))
        return df_corr
Example #6
0
    def execute(self
                    , symbols
                    , startdate_string='2004-12-31'
                    , period = 'monthly' #or daily
                    , source = 'Yahoo'
                    #, enddate_string='xxx'
                ):
        self.Symbols = symbols
        self.StartDateString = startdate_string
        self.Period = period
        self.Source = source
        
        
        dict_of_dfs = {}
        for symbol in self.Symbols:
            print symbol
            try:
                o = pullreturns.perform(symbol,self.StartDateString,self.Period,self.Source)
                dict_of_dfs[symbol] = o.ReturnsDataframe()
                print 'i = ',len(dict_of_dfs)
            except:
                print 'skipped', symbol, 'because of error'
                pass
        #print dict_of_dfs
        passed = 0
        iterations = 0
        for k,v in dict_of_dfs.items():
            iterations += 1
            print 'iterations',iterations
            if passed == 0:        
                df_align = v[['b_monthend','e_pctchange']]
                df_align = df_align.set_index('b_monthend')
                df_align.columns = [k]
                #print '---------------------------------------------------------------df_align'
                #print df_align
                sLength = len(df_align[k])
                originalid = k
                
            else:
                df_new = v[['b_monthend','e_pctchange']]
                df_new = df_new.set_index('b_monthend')
                df_new.columns = [k]
                df_new.sort_index
                #print df_new
                #df_align[k] = df_new.loc[k].shape[0]
                #print '---------------------------------------------------------------df_new'
                #print df_new
                #df_align[k] = pd.Series(df_new, index=df_align.index)
                #df_align[k] = df_align[originalid].map(lambda x: df_new[k])
                df_align[k] = df_new[k]
            print '-----++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++'
            #print df_align[k]
            passed = 1

        print '----------------------------------------------------'
        print '                 monthly returns'
        print '----------------------------------------------------'
        #df_align = df_align.dropna()
        print df_align
        
        
        rows = np.array(list(df_align))[: np.newaxis]
        
        
        print '----------------------------------------------------'
        print '                 correlation matrix'
        print '----------------------------------------------------'
        df = df_align.astype(float)
        
        #df2 = df.reset_index(drop=True) 
        #df3 = pd.DataFrame(df2.values)
        #print df3
        #print df.corr()

        #corrmatrix_array = np.corrcoef(df_align.T.values.tolist())
        corrmatrix_array = df.corr()
        covmatrix_array = df.cov()
        #corrmatrix_array = df_align.corr()
        #print '====================corrmatrix_array'
        #print corrmatrix_array
        
        df_corr = pd.DataFrame(corrmatrix_array, index=rows, columns=list(df_align))
        df_corr1 = np.round(df_corr, decimals=2)

        

        df_cov = pd.DataFrame(covmatrix_array, index=rows, columns=list(df_align))
        df_cov1 = np.round(df_cov, decimals=6)
        
        self.CorrelationMatrix = df_corr1
        self.CovarianceMatrix = df_cov1
        
        return df_corr1,df_cov