def get_vol( self, com1, com2, date1, date2 ): begin = datetime.strptime( date1, '%Y-%m-%d' ) end = datetime.strptime( date2, '%Y-%m-%d' ) filepath = retrieve.adjustedClosesFilepath( filename = 'cleanSP.csv' ) dailyPrices = matrices.getEmpiricalDataFrame( [ com1, com2 ], begin, end, csvPath = filepath ) p1 = dailyPrices[ com1 ] # ystockquote.get_historical_prices( [com1], '20' + date1, '20' + date2 ) p2 = dailyPrices[ com2 ] # ystockquote.get_historical_prices( [com2], '20' + date1, '20' + date2 ) n1 = p1 / p1.iat[ 0 ] n2 = p2 / p2.iat[ 0 ] diff = n1 - n2 return numpy.std( diff, axis = 0 )
def getEmpiricalDataFrame( tickerList, fromDate, toDate, csvPath = retrieve.adjustedClosesFilepath() ): """Returns a :py:class:`pandas.DataFrame` according to selected stocks and dates :param tickerList: A list of the tickers to be added into the table :param fromDate: Time from which to begin the table :type fromDate: :py:class:`datetime.date` :param toDate: TIme from which to end the table :type toDate: :py:class:`datetime.date` :param csvPath: The name of the file within the Empirical file store """ df = pd.read_csv( csvPath, index_col = 0, parse_dates = True, na_values = 'NA' ) tickers = set( tickerList ) extraColumns = [ column for column in df.columns if column not in tickers ] start = df.index.searchsorted( fromDate ) end = df.index.searchsorted( toDate ) return df[ start:end ].drop( extraColumns, 1 )
columns = columns, default_fill_value = 0 ) def _getCountRows( args ): timestamp, aggregator, wordCounter = args try: date = timestamp.date() except AttributeError: date = timestamp counts = wordCounter( retrieve.getDailyArticles( date ) ) try: return ( date, aggregator( counts ) ) except TypeError: return ( date, counts ) if __name__ == "__main__": begin = datetime.date( 2011, 1, 3 ) end = datetime.date( 2013, 11, 27 ) keywordsFile = join( settings.KEYWORDS_DIR, 'splist.csv' ) tickerList = keywords.getTickerList( keywordsFile ) keywordsMap = keywords.getKeywordToIndexMap( keywordsFile ) empiricalDf = getEmpiricalDataFrame( tickerList, begin, end, retrieve.adjustedClosesFilepath( filename = 'cleanSP.csv' ) ) countDf = getCountDataFrame( tickerList, count.WordCounter( keywordsMap ), empiricalDf.index ) tfidf = normalize.TfIdf()( countDf ) empiricalDf = empiricalDf.ix[ tfidf.index ] tfidf.corr().to_csv( join( settings.RESULTS_DIR, 'hft_CountCorr.csv' ) ) empiricalDf.corr().to_csv( join( settings.RESULTS_DIR, 'hft_EmpCorr.csv' ) ) # corr.to_csv( join( settings.RESULTS_DIR, 'corrtest_withSent_all.csv' ) )
import pandas as pd import datetime import os if __name__ == '__main__': begin = datetime.date( 2011, 1, 3 ) end = datetime.date( 2013, 11, 27 ) tickerList = keywords.getTickerList() keywordsMap = keywords.getKeywordToIndexMap() sentCounter = count.SentimentWordCounter( keywordsMap, sentiment.classifier() ) mentionCounter = count.WordCounter( keywordsMap ) empiricalDf = matrices.getEmpiricalDataFrame( tickerList, begin, end ) constrained = False minVarBenchmark = { True : 'minvarConstrained.csv', False : 'minvarAnalytical.csv' } maxDivBenchmark = { True : 'maxDivConstrained.csv', False : 'maxDivAnalytical.csv' } minvarBenchmarkDf = matrices.getEmpiricalDataFrame( [ strats.MinimumVariance().getName() ], begin, end, retrieve.adjustedClosesFilepath( filename = minVarBenchmark[ constrained ] ) ) maxDivBenchmarkDf = matrices.getEmpiricalDataFrame( [ strats.MaximumDiversification().getName() ], begin, end, retrieve.adjustedClosesFilepath( filename = maxDivBenchmark[ constrained ] ) ) riskParityDf = matrices.getEmpiricalDataFrame( [ strats.RiskParity().getName() ], begin, end, retrieve.adjustedClosesFilepath( filename = 'riskParity.csv' ) ) benchmarkDf = matrices.getEmpiricalDataFrame( [ 'OEF', 'SPY' ], begin, end, retrieve.benchmarkFilepath() ) summedSentDf = matrices.getCountDataFrame( tickerList, sentCounter, empiricalDf.index, aggregator = np.sum ) articleSentDf = matrices.getCountDataFrame( tickerList, sentCounter, empiricalDf.index ) summedMentionDf = matrices.getCountDataFrame( tickerList, mentionCounter, empiricalDf.index, aggregator = np.sum ) articleMentionDf = matrices.getCountDataFrame( tickerList, mentionCounter, empiricalDf.index ) empiricalDf = empiricalDf.ix[:, summedMentionDf.columns ] empiricalCov = strats.EmpiricalCovariance( empiricalDf ) saveBenchmarks = False if saveBenchmarks: beginBench = begin + datetime.timedelta( 20 ) for constrained in [ True, False ]: minvar = strats.Backtest( empiricalDf, empiricalCov, strats.MinimumVariance( constrained = constrained ), beginBench, end ).run().portfolioValues()
""" Created on Mar 13, 2014 @author: curly """ import afp.keywords as keywords import afp.matrices as matrices import afp.settings as settings import cleaner.retrieve as retrieve import pandas as pd import datetime import os if __name__ == "__main__": begin = datetime.date(2011, 1, 3) end = datetime.date(2013, 11, 27) tickerList = keywords.getTickerList(os.path.join(settings.KEYWORDS_DIR, "splist.csv")) filepath = retrieve.adjustedClosesFilepath(filename="cleanSP.csv") dailyPrices = matrices.getEmpiricalDataFrame(tickerList, begin, end, csvPath=filepath) normalizedPrices = dailyPrices / dailyPrices.ix[0] pairs = dict( (first + "|" + second, sum((normalizedPrices[first] - normalizedPrices[second]) ** 2)) for first in normalizedPrices.columns for second in normalizedPrices.columns if first < second ) pairDf = pd.DataFrame(pairs, index=["Pairs"]).T pairDf.to_csv(os.path.join(settings.RESULTS_DIR, "leastSqPairs.csv"))
''' Created on Mar 12, 2014 @author: curly ''' import datetime import afp.matrices as matrices import afp.keywords as keywords import afp.settings as settings import cleaner.retrieve as retrieve import os if __name__ == '__main__': begin = datetime.date( 2011, 1, 3 ) end = datetime.date( 2013, 11, 27 ) tickerList = keywords.getTickerList( os.path.join( settings.KEYWORDS_DIR, 'splist.csv' ) ) filepath = retrieve.adjustedClosesFilepath( filename = 'hftDaily.csv' ) dailyPrices = matrices.getEmpiricalDataFrame( tickerList, begin, end, csvPath = filepath ) n = dailyPrices.shape[ 0 ] threshold = .01 removedCols = dailyPrices.ix[ :, ( dailyPrices != dailyPrices ).sum() < threshold * n ] removedCols.to_csv( os.path.join( os.path.join( settings.RESULTS_DIR, 'cleanSP.csv' ) ) )