예제 #1
0
파일: match.py 프로젝트: ccphillippi/AFP
 def get_vol( self, com1, com2, date1, date2 ):
     begin = datetime.strptime( date1, '%Y-%m-%d' )
     end = datetime.strptime( date2, '%Y-%m-%d' )
     filepath = retrieve.adjustedClosesFilepath( filename = 'cleanSP.csv' )
     dailyPrices = matrices.getEmpiricalDataFrame( [ com1, com2 ], begin, end, csvPath = filepath )
     
     p1 = dailyPrices[ com1 ]  # ystockquote.get_historical_prices( [com1], '20' + date1, '20' + date2 )
     p2 = dailyPrices[ com2 ]  # ystockquote.get_historical_prices( [com2], '20' + date1, '20' + date2 )
     n1 = p1 / p1.iat[ 0 ]
     n2 = p2 / p2.iat[ 0 ]
     diff = n1 - n2
     return numpy.std( diff, axis = 0 )
예제 #2
0
파일: matrices.py 프로젝트: ccphillippi/AFP
def getEmpiricalDataFrame( tickerList,
                           fromDate,
                           toDate,
                           csvPath = retrieve.adjustedClosesFilepath() ):
    """Returns a :py:class:`pandas.DataFrame` according to selected stocks and dates
    
    :param tickerList: A list of the tickers to be added into the table
    :param fromDate: Time from which to begin the table
    :type fromDate: :py:class:`datetime.date`
    :param toDate: TIme from which to end the table
    :type toDate: :py:class:`datetime.date`
    :param csvPath: The name of the file within the Empirical file store
    
    """ 
    df = pd.read_csv( csvPath, index_col = 0, parse_dates = True, na_values = 'NA' )
    tickers = set( tickerList )
    extraColumns = [ column for column in df.columns if column not in tickers ]
    start = df.index.searchsorted( fromDate )
    end = df.index.searchsorted( toDate )
    return df[ start:end ].drop( extraColumns, 1 )
예제 #3
0
파일: matrices.py 프로젝트: ccphillippi/AFP
                               columns = columns,
                               default_fill_value = 0 )

def _getCountRows( args ):
    timestamp, aggregator, wordCounter = args
    try:
        date = timestamp.date()
    except AttributeError:
        date = timestamp
    counts = wordCounter( retrieve.getDailyArticles( date ) )
    try:
        return ( date, aggregator( counts ) )
    except TypeError:
        return ( date, counts )
    
if __name__ == "__main__":    
    begin = datetime.date( 2011, 1, 3 )
    end = datetime.date( 2013, 11, 27 )
    keywordsFile = join( settings.KEYWORDS_DIR, 'splist.csv' )
    tickerList = keywords.getTickerList( keywordsFile )
    keywordsMap = keywords.getKeywordToIndexMap( keywordsFile )
    empiricalDf = getEmpiricalDataFrame( tickerList, begin, end, retrieve.adjustedClosesFilepath( filename = 'cleanSP.csv' ) )
    countDf = getCountDataFrame( tickerList,
                                 count.WordCounter( keywordsMap ),
                                 empiricalDf.index )
    tfidf = normalize.TfIdf()( countDf )
    empiricalDf = empiricalDf.ix[ tfidf.index ]
    tfidf.corr().to_csv( join( settings.RESULTS_DIR, 'hft_CountCorr.csv' ) )
    empiricalDf.corr().to_csv( join( settings.RESULTS_DIR, 'hft_EmpCorr.csv' ) )
    # corr.to_csv( join( settings.RESULTS_DIR, 'corrtest_withSent_all.csv' ) )
예제 #4
0
import pandas as pd
import datetime
import os

if __name__ == '__main__':
    begin = datetime.date( 2011, 1, 3 )
    end = datetime.date( 2013, 11, 27 )
    tickerList = keywords.getTickerList()
    keywordsMap = keywords.getKeywordToIndexMap()
    sentCounter = count.SentimentWordCounter( keywordsMap, sentiment.classifier() )
    mentionCounter = count.WordCounter( keywordsMap )
    empiricalDf = matrices.getEmpiricalDataFrame( tickerList, begin, end )
    constrained = False
    minVarBenchmark = { True : 'minvarConstrained.csv', False : 'minvarAnalytical.csv' }
    maxDivBenchmark = { True : 'maxDivConstrained.csv', False : 'maxDivAnalytical.csv' }
    minvarBenchmarkDf = matrices.getEmpiricalDataFrame( [ strats.MinimumVariance().getName() ], begin, end, retrieve.adjustedClosesFilepath( filename = minVarBenchmark[ constrained ] ) )  
    maxDivBenchmarkDf = matrices.getEmpiricalDataFrame( [ strats.MaximumDiversification().getName() ], begin, end, retrieve.adjustedClosesFilepath( filename = maxDivBenchmark[ constrained ] ) )
    riskParityDf = matrices.getEmpiricalDataFrame( [ strats.RiskParity().getName() ], begin, end, retrieve.adjustedClosesFilepath( filename = 'riskParity.csv' ) )   
    benchmarkDf = matrices.getEmpiricalDataFrame( [ 'OEF', 'SPY' ], begin, end, retrieve.benchmarkFilepath() )
    summedSentDf = matrices.getCountDataFrame( tickerList, sentCounter, empiricalDf.index, aggregator = np.sum )
    articleSentDf = matrices.getCountDataFrame( tickerList, sentCounter, empiricalDf.index )
    summedMentionDf = matrices.getCountDataFrame( tickerList, mentionCounter, empiricalDf.index, aggregator = np.sum )
    articleMentionDf = matrices.getCountDataFrame( tickerList, mentionCounter, empiricalDf.index )
    empiricalDf = empiricalDf.ix[:, summedMentionDf.columns ]
    empiricalCov = strats.EmpiricalCovariance( empiricalDf )
    
    saveBenchmarks = False
    if saveBenchmarks:
        beginBench = begin + datetime.timedelta( 20 ) 
        for constrained in [ True, False ]:
            minvar = strats.Backtest( empiricalDf, empiricalCov, strats.MinimumVariance( constrained = constrained ), beginBench, end ).run().portfolioValues()
예제 #5
0
"""
Created on Mar 13, 2014

@author: curly
"""

import afp.keywords as keywords
import afp.matrices as matrices
import afp.settings as settings
import cleaner.retrieve as retrieve
import pandas as pd
import datetime
import os

if __name__ == "__main__":
    begin = datetime.date(2011, 1, 3)
    end = datetime.date(2013, 11, 27)
    tickerList = keywords.getTickerList(os.path.join(settings.KEYWORDS_DIR, "splist.csv"))
    filepath = retrieve.adjustedClosesFilepath(filename="cleanSP.csv")
    dailyPrices = matrices.getEmpiricalDataFrame(tickerList, begin, end, csvPath=filepath)
    normalizedPrices = dailyPrices / dailyPrices.ix[0]
    pairs = dict(
        (first + "|" + second, sum((normalizedPrices[first] - normalizedPrices[second]) ** 2))
        for first in normalizedPrices.columns
        for second in normalizedPrices.columns
        if first < second
    )
    pairDf = pd.DataFrame(pairs, index=["Pairs"]).T
    pairDf.to_csv(os.path.join(settings.RESULTS_DIR, "leastSqPairs.csv"))
예제 #6
0
'''
Created on Mar 12, 2014

@author: curly
'''

import datetime
import afp.matrices as matrices
import afp.keywords as keywords
import afp.settings as settings
import cleaner.retrieve as retrieve
import os

if __name__ == '__main__':
    begin = datetime.date( 2011, 1, 3 )
    end = datetime.date( 2013, 11, 27 )
    tickerList = keywords.getTickerList( os.path.join( settings.KEYWORDS_DIR, 'splist.csv' ) )
    filepath = retrieve.adjustedClosesFilepath( filename = 'hftDaily.csv' )
    dailyPrices = matrices.getEmpiricalDataFrame( tickerList, begin, end, csvPath = filepath )
    n = dailyPrices.shape[ 0 ] 
    threshold = .01
    removedCols = dailyPrices.ix[ :, ( dailyPrices != dailyPrices ).sum() < threshold * n ]
    removedCols.to_csv( os.path.join( os.path.join( settings.RESULTS_DIR, 'cleanSP.csv' ) ) )