Esempio n. 1
0
def findEvents(symbols, startday,endday, marketSymbol):
	# Reading the Data for the list of Symbols.
	timeofday=dt.timedelta(hours=16)
	timestamps = du.getNYSEdays(startday,endday,timeofday)
	dataobj = da.DataAccess('Yahoo')

	# Reading the Data
	close = dataobj.get_data(timestamps, symbols, closefield)
	    
	np_eventmat = copy.deepcopy(close)
	for sym in symbols:
		for time in timestamps:
			np_eventmat[sym][time]=np.NAN
	f = open('order.csv','w')
	totaldays = len(timestamps)
	for symbol in symbols:
		for i in range(1,totaldays):
			if close[symbol][i-1] >= 6. and close[symbol][i] < 6. :
				#print timestamps[i].year,',',timestamps[i].month,',',timestamps[i].day,',Buy,',symbol,',100'
				soutput = str(timestamps[i].year)+','+str(timestamps[i].month)+','+str(timestamps[i].day)+','+symbol+',Buy,100\n'
				f.write(soutput)
				j = i+5
				if j >= totaldays:
					j = totaldays-1
				soutput = str(timestamps[j].year)+','+str(timestamps[j].month)+','+str(timestamps[j].day)+','+symbol+',Sell,100\n'
                		f.write(soutput)
	f.close()
def marketsim(cash, orders_file, data_item):
    # Read orders
    orders = defaultdict(list)
    symbols = set([])
    for year, month, day, sym, action, num in csv.reader(open(orders_file, "rU")):
        orders[date(int(year), int(month), int(day))].append((sym, action, int(num)))
        symbols.add(sym)
    
    days = orders.keys()
    days.sort()
    day, end = days[0], days[-1]
    
    # Reading the Data for the list of Symbols.
    timestamps = getNYSEdays(datetime(day.year,day.month,day.day),
                             datetime(end.year,end.month,end.day+1),
                             timedelta(hours=16))
    
    dataobj = DataAccess('Yahoo')
    close = dataobj.get_data(timestamps, symbols, data_item)
    
    values = []
    portfolio = Portfolio(cash)
    for i, t in enumerate(timestamps):
        for sym, action, num in orders[date(t.year, t.month, t.day)]:
            if action == 'Sell': num *= -1
            portfolio.update(sym, num, close[sym][i])
        
        entry = (t.year, t.month, t.day, portfolio.value(close, i))
        values.append(entry)
    
    return values
Esempio n. 3
0
 def load_from_csv(self, tickers, index, fields=Fields.QUOTES, **kwargs):
     ''' Return a quote panel '''
     #TODO Replace adj_close with actual_close
     #TODO Add reindex methods, and start, end, delta parameters
     reverse = kwargs.get('reverse', False)
     verbose = kwargs.get('verbose', False)
     if self.connected['database']:
         symbols, markets = self.db.getTickersCodes(tickers)
     elif not symbols:
         self._logger.error('** No database neither informations provided')
         return None
     timestamps = du.getNYSEdays(index[0], index[-1],
                                 dt.timedelta(hours=16))
     csv = da.DataAccess('Yahoo')
     df = csv.get_data(timestamps,
                       symbols.values(),
                       fields,
                       verbose=verbose)
     quotes_dict = dict()
     for ticker in tickers:
         j = 0
         quotes_dict[ticker] = dict()
         for field in fields:
             serie = df[j][symbols[ticker]].groupby(
                 index.freq.rollforward).aggregate(np.mean)
             #TODO add a function parameter to decide what to do about it
             clean_serie = serie.fillna(method='pad')
             quotes_dict[ticker][field] = clean_serie
             j += 1
     if reverse:
         return Panel.from_dict(quotes_dict, intersect=True, orient='minor')
     return Panel.from_dict(quotes_dict, intersect=True)
Esempio n. 4
0
    def _generate_data(self):

        year = 2009        
        startday = dt.datetime(year-1, 12, 1)
        endday = dt.datetime(year+1, 1, 31)

        l_symbols = ['$SPX']

        #Get desired timestamps
        timeofday = dt.timedelta(hours = 16)
        ldt_timestamps = du.getNYSEdays(startday, endday, timeofday)

        dataobj = da.DataAccess('Norgate')
        self.df_close = dataobj.get_data( \
                        ldt_timestamps, l_symbols, "close", verbose=True)

        self.df_alloc = pand.DataFrame( \
                        index=[dt.datetime(year, 1, 1)], \
                                data=[1], columns=l_symbols)

        for i in range(11):
            self.df_alloc = self.df_alloc.append( \
                     pand.DataFrame(index=[dt.datetime(year, i+2, 1)], \
                                      data=[1], columns=l_symbols))

        self.df_alloc['_CASH'] = 0.0

        #Based on hand calculation using the transaction costs and slippage.
        self.i_open_result = 1.15921341122
def findEvents(symbols, startday, endday, marketSymbol, verbose=False):

    timeofday = dt.timedelta(hours=16)
    timestamps = du.getNYSEdays(startday, endday, timeofday)

    if verbose:
        print __name__ + " reading data"

    close = dataobj.get_data(timestamps, symbols, closefield)
    close = (close.fillna(method="ffill")).fillna(method="backfill")

    np_eventmat = copy.deepcopy(close)
    for sym in symbols:
        for time in timestamps:
            np_eventmat[sym][time] = np.NAN

    if verbose:
        print __name__ + " finding events"

    price = 7.0
    for symbol in symbols:
        for i in range(1, len(close[symbol])):
            if close[symbol][i - 1] >= price and close[symbol][i] < price:
                np_eventmat[symbol][i] = 1.0

    return np_eventmat
Esempio n. 6
0
def alloc_backtest(alloc, start):
    """
    @summary: Back tests an allocation from a pickle file. Uses a starting 
              portfolio value of start.
    @param alloc: Name of allocation pickle file. Pickle file contains a 
                  DataMatrix with timestamps as indexes and stock symbols as
                  columns, with the last column being the _CASH symbol, 
                  indicating how much
    of the allocation is in cash.
    @param start: integer specifying the starting value of the portfolio
    @return funds: List of fund values indicating the value of the portfolio 
                   throughout the back test.
    @rtype timeSeries
    """

    #read in alloc table from command line arguements
    alloc_input_file = open(alloc, "r")
    alloc = pickle.load(alloc_input_file)

    # Get the data from the data store
    dataobj = da.DataAccess('Norgate')
    startday = alloc.index[0] - dt.timedelta(days=10)
    endday = alloc.index[-1]

    # Get desired timestamps
    timeofday = dt.timedelta(hours=16)
    timestamps = du.getNYSEdays(startday, endday, timeofday)
    historic = dataobj.get_data(timestamps, list(alloc.columns[0:-1]), "close")
    #backtestx
    [fund, leverage, commissions,
     slippage] = qs.tradesim(alloc, historic, int(start), 1, True, 0.02, 5,
                             0.02)

    return [fund, leverage, commissions, slippage]
Esempio n. 7
0
def print_industry_coer(fund_ts, ostream):
    """
    @summary prints standard deviation of returns for a fund
    @param fund_ts: pandas fund time series
    @param years: list of years to print out
    @param ostream: stream to print to
    """
    industries = [['$DJUSBM', 'Materials'], ['$DJUSNC', 'Goods'],
                  ['$DJUSCY', 'Services'], ['$DJUSFN', 'Financials'],
                  ['$DJUSHC', 'Health'], ['$DJUSIN', 'Industrial'],
                  ['$DJUSEN', 'Oil & Gas'], ['$DJUSTC', 'Technology'],
                  ['$DJUSTL', 'TeleComm'], ['$DJUSUT', 'Utilities']]
    for i in range(0, len(industries)):
        if (i % 2 == 0):
            ostream.write("\n")
        #load data
        norObj = de.DataAccess('mysql')
        ldtTimestamps = du.getNYSEdays(fund_ts.index[0], fund_ts.index[-1],
                                       dt.timedelta(hours=16))
        ldfData = norObj.get_data(ldtTimestamps, [industries[i][0]], ['close'])
        #get corelation
        ldfData[0] = ldfData[0].fillna(method='pad')
        ldfData[0] = ldfData[0].fillna(method='bfill')
        a = np.corrcoef(np.ravel(tsu.daily(ldfData[0][industries[i][0]])),
                        np.ravel(tsu.daily(fund_ts.values)))
        b = np.ravel(tsu.daily(ldfData[0][industries[i][0]]))
        f = np.ravel(tsu.daily(fund_ts))
        fBeta, unused = np.polyfit(b, f, 1)
        ostream.write("%10s(%s):%+6.2f,   %+6.2f   " %
                      (industries[i][1], industries[i][0], a[0, 1], fBeta))
def findEvents(symbols, startday,endday, marketSymbol,verbose=False):

	# Reading the Data for the list of Symbols.	
	timeofday=dt.timedelta(hours=16)
	timestamps = du.getNYSEdays(startday,endday,timeofday)
	dataobj = da.DataAccess(storename)
	if verbose:
            print __name__ + " reading data"
	# Reading the Data
	close = dataobj.get_data(timestamps, symbols, closefield)
	
	# Completing the Data - Removing the NaN values from the Matrix
	#close = (close.fillna(method='ffill')).fillna(method='backfill')

	# Calculating the Returns of the Stock Relative to the Market 
	# So if a Stock went up 5% and the Market rose 3%, the return relative to market is 2% 
	np_eventmat = copy.deepcopy(close)
	for sym in symbols:
		for time in timestamps:
			np_eventmat[sym][time]=np.NAN

	if verbose:
            print __name__ + " finding events"

	# Generating the Event Matrix
	# Event described is : when the actual close of the stock price drops below $5.00

	for symbol in symbols:
		
	    for i in range(2,len(close[symbol])):
	        if close[symbol][i-1] >=7.0 and close[symbol][i] < 7.0 : 
             		np_eventmat[symbol][i] = 1.0  #overwriting by the bit, marking the event
			
	return np_eventmat
Esempio n. 9
0
def speedTest(lfcFeature, ldArgs):
    '''
    @Author: Tingyu Zhu
    @summary: Function to test the runtime for a list of features, and output them by speed
    @param lfcFeature: a list of features that will be sorted by runtime
    @param dArgs: Arguments to pass into feature function
    @return: A list of sorted tuples of format (time, function name/param string)
    '''
    '''pulling out 2 years data to run test'''
    daData = de.DataAccess('mysql')
    dtStart = dt.datetime(2010, 1, 1)
    dtEnd = dt.datetime(2011, 12, 31)
    dtTimeofday = dt.timedelta(hours=16)
    lsSym = ['AAPL', 'GOOG', 'XOM', 'AMZN', 'BA', 'GILD', '$SPX']

    #print lsSym
    '''set up variables for applyFeatures'''
    lsKeys = ['open', 'high', 'low', 'close', 'volume', 'actual_close']
    ldtTimestamps = du.getNYSEdays(dtStart, dtEnd, dtTimeofday)
    ldfData = daData.get_data(ldtTimestamps, lsSym, lsKeys)
    dData = dict(zip(lsKeys, ldfData))
    '''loop through features'''
    ltResults = []
    for i in range(len(lfcFeature)):
        dtFuncStart = dt.datetime.now()
        ldfFeatures = applyFeatures(dData, [lfcFeature[i]], [ldArgs[i]],
                                    sMarketRel='$SPX')
        ltResults.append((dt.datetime.now() - dtFuncStart,
                          lfcFeature[i].__name__ + ' : ' + str(ldArgs[i])))
    ltResults.sort()
    '''print out result'''
    for tResult in ltResults:
        print tResult[1], ':', tResult[0]

    return ltResults
Esempio n. 10
0
def print_other_coer(fund_ts, ostream):
    """
    @summary prints standard deviation of returns for a fund
    @param fund_ts: pandas fund time series
    @param years: list of years to print out
    @param ostream: stream to print to
    """
    industries = [['$SPX', '    S&P Index'], ['$DJI', '    Dow Jones'],
                  ['$DJUSEN', 'Oil & Gas'], ['$DJGSP', '     Metals']]
    for i in range(0, len(industries)):
        if (i % 2 == 0):
            ostream.write("\n")
        #load data
        norObj = de.DataAccess('mysql')
        ldtTimestamps = du.getNYSEdays(fund_ts.index[0], fund_ts.index[-1],
                                       dt.timedelta(hours=16))
        ldfData = norObj.get_data(ldtTimestamps, [industries[i][0]], ['close'])
        #get corelation
        ldfData[0] = ldfData[0].fillna(method='pad')
        ldfData[0] = ldfData[0].fillna(method='bfill')
        a = np.corrcoef(np.ravel(tsu.daily(ldfData[0][industries[i][0]])),
                        np.ravel(tsu.daily(fund_ts.values)))
        b = np.ravel(tsu.daily(ldfData[0][industries[i][0]]))
        f = np.ravel(tsu.daily(fund_ts))
        fBeta, unused = np.polyfit(b, f, 1)
        ostream.write("%10s(%s):%+6.2f,   %+6.2f   " %
                      (industries[i][1], industries[i][0], a[0, 1], fBeta))
Esempio n. 11
0
 def load_from_csv(self, tickers, index, fields=Fields.QUOTES, **kwargs):
     ''' Return a quote panel '''
     #TODO Replace adj_close with actual_close
     #TODO Add reindex methods, and start, end, delta parameters
     reverse = kwargs.get('reverse', False)
     verbose = kwargs.get('verbose', False)
     if self.connected['database']:
         symbols, markets = self.db.getTickersCodes(tickers)
     elif not symbols:
         self._logger.error('** No database neither informations provided')
         return None
     timestamps = du.getNYSEdays(index[0], index[-1], dt.timedelta(hours=16))
     csv = da.DataAccess('Yahoo')
     df = csv.get_data(timestamps, symbols.values(), fields, verbose=verbose)
     quotes_dict = dict()
     for ticker in tickers:
         j = 0
         quotes_dict[ticker] = dict()
         for field in fields:
             serie = df[j][symbols[ticker]].groupby(index.freq.rollforward).aggregate(np.mean)
             #TODO add a function parameter to decide what to do about it
             clean_serie = serie.fillna(method='pad')
             quotes_dict[ticker][field] = clean_serie
             j += 1
     if reverse:
         return Panel.from_dict(quotes_dict, intersect=True, orient='minor')
     return Panel.from_dict(quotes_dict, intersect=True)
Esempio n. 12
0
def main():
    print "Creating Stock data from Sine Waves"
    dt_start = dt.datetime(2000, 1, 1)
    dt_end = dt.datetime(2012, 10, 31)
    ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt.timedelta(hours=16))

    x = np.array(range(len(ldt_timestamps)))

    ls_symbols = ['SINE_FAST', 'SINE_SLOW', 'SINE_FAST_NOISE', 'SINE_SLOW_NOISE']
    sine_fast = 10*np.sin(x/10.) + 100
    sine_slow = 10*np.sin(x/30.) + 100

    sine_fast_noise = 10*(np.sin(x/10.) + np.random.randn(x.size)) + 100
    sine_slow_noise = 10*(np.sin(x/30.) + np.random.randn(x.size)) + 100

    d_data = dict(zip(ls_symbols, [sine_fast, sine_slow, sine_fast_noise, sine_slow_noise]))

    write(ls_symbols, d_data, ldt_timestamps)

    plt.clf()
    plt.plot(ldt_timestamps, sine_fast)
    plt.plot(ldt_timestamps, sine_slow)
    plt.plot(ldt_timestamps, sine_fast_noise)
    plt.plot(ldt_timestamps, sine_slow_noise)
    plt.ylim(50,150)
    plt.xticks(size='xx-small')
    plt.legend(ls_symbols, loc='best')
    plt.savefig('test.png',format='png')
Esempio n. 13
0
def findEvents(symbols, startday,endday, marketSymbol,verbose=False):

	# Reading the Data for the list of Symbols.	
	timeofday=dt.timedelta(hours=16)
	timestamps = du.getNYSEdays(startday,endday,timeofday)
	dataobj = da.DataAccess('Yahoo')
	if verbose:
            print __name__ + " reading data"
	# Reading the Data
	close = dataobj.get_data(timestamps, symbols, closefield)
	
	# Calculating the Returns of the Stock Relative to the Market 
	# So if a Stock went up 5% and the Market rised 3%. The the return relative to market is 2% 
	#mktneutDM = close - close[marketSymbol]
	np_eventmat = copy.deepcopy(close)
	for sym in symbols:
		for time in timestamps:
			np_eventmat[sym][time]=np.NAN

	if verbose:
            print __name__ + " finding events"

	# Generating the Event Matrix
	# Event described is : Market falls more than 3% plus the stock falls 5% more than the Market
	# Suppose : The market fell 3%, then the stock should fall more than 8% to mark the event.
	# And if the market falls 5%, then the stock should fall more than 10% to mark the event.

	for symbol in symbols:
		
	    for i in range(1,len(close[symbol])):
	        if close[symbol][i] < 25.0 and close[symbol][i-1] >= 30.0 : # When market fall is more than 3% and also the stock compared to market is also fell by more than 5%.
             		np_eventmat[symbol][i] = 1.0  #overwriting by the bit, marking the event
			
	return np_eventmat
Esempio n. 14
0
def strat_backtest2(strat, start, end, diff, dur, startval):
    """
    @summary: Back tests a strategy defined in a python script that takes in a
             start and end date along with a starting value over a given
             period.
    @param strat: filename of python script strategy
    @param start: starting date in a datetime object
    @param end: ending date in a datetime object
    @param diff: offset in days of the tests
    @param dur: length of a test
    @param startval: starting value of fund during back tests
    @return fundsmatrix: Datamatrix of fund values returned from each test
    @rtype datamatrix
    """
    fundsmatrix = []
    startdates = du.getNYSEdays(start, end, dt.timedelta(hours=16))
    for i in range(0, len(startdates), diff):
        if(i + dur >= len(startdates)):
            enddate = startdates[-1]
        else:
            enddate = startdates[i + dur]
        cmd = "python %s %s %s temp_alloc.pkl" % (
            strat,
            startdates[i].strftime("%m-%d-%Y"),
            enddate.strftime("%m-%d-%Y")
        )
        os.system(cmd)
        funds = alloc_backtest('temp_alloc.pkl', startval)
        fundsmatrix.append(funds)
    return fundsmatrix
Esempio n. 15
0
def log500( sLog ):
    '''
    @summary: Loads cached features.
    @param sLog: Filename of features.
    @return: Nothing, logs features to desired location
    '''
    
    
    lsSym = ['A', 'AA', 'AAPL', 'ABC', 'ABT', 'ACE', 'ACN', 'ADBE', 'ADI', 'ADM', 'ADP', 'ADSK', 'AEE', 'AEP', 'AES', 'AET', 'AFL', 'AGN', 'AIG', 'AIV', 'AIZ', 'AKAM', 'AKS', 'ALL', 'ALTR', 'AMAT', 'AMD', 'AMGN', 'AMP', 'AMT', 'AMZN', 'AN', 'ANF', 'ANR', 'AON', 'APA', 'APC', 'APD', 'APH', 'APOL', 'ARG', 'ATI', 'AVB', 'AVP', 'AVY', 'AXP', 'AZO', 'BA', 'BAC', 'BAX', 'BBBY', 'BBT', 'BBY', 'BCR', 'BDX', 'BEN', 'BF.B', 'BHI', 'BIG', 'BIIB', 'BK', 'BLK', 'BLL', 'BMC', 'BMS', 'BMY', 'BRCM', 'BRK.B', 'BSX', 'BTU', 'BXP', 'C', 'CA', 'CAG', 'CAH', 'CAM', 'CAT', 'CB', 'CBG', 'CBS', 'CCE', 'CCL', 'CEG', 'CELG', 'CERN', 'CF', 'CFN', 'CHK', 'CHRW', 'CI', 'CINF', 'CL', 'CLF', 'CLX', 'CMA', 'CMCSA', 'CME', 'CMG', 'CMI', 'CMS', 'CNP', 'CNX', 'COF', 'COG', 'COH', 'COL', 'COP', 'COST', 'COV', 'CPB', 'CPWR', 'CRM', 'CSC', 'CSCO', 'CSX', 'CTAS', 'CTL', 'CTSH', 'CTXS', 'CVC', 'CVH', 'CVS', 'CVX', 'D', 'DD', 'DE', 'DELL', 'DF', 'DFS', 'DGX', 'DHI', 'DHR', 'DIS', 'DISCA', 'DNB', 'DNR', 'DO', 'DOV', 'DOW', 'DPS', 'DRI', 'DTE', 'DTV', 'DUK', 'DV', 'DVA', 'DVN', 'EBAY', 'ECL', 'ED', 'EFX', 'EIX', 'EL', 'EMC', 'EMN', 'EMR', 'EOG', 'EP', 'EQR', 'EQT', 'ERTS', 'ESRX', 'ETFC', 'ETN', 'ETR', 'EW', 'EXC', 'EXPD', 'EXPE', 'F', 'FAST', 'FCX', 'FDO', 'FDX', 'FE', 'FFIV', 'FHN', 'FII', 'FIS', 'FISV', 'FITB', 'FLIR', 'FLR', 'FLS', 'FMC', 'FO', 'FRX', 'FSLR', 'FTI', 'FTR', 'GAS', 'GCI', 'GD', 'GE', 'GILD', 'GIS', 'GLW', 'GME', 'GNW', 'GOOG', 'GPC', 'GPS', 'GR', 'GS', 'GT', 'GWW', 'HAL', 'HAR', 'HAS', 'HBAN', 'HCBK', 'HCN', 'HCP', 'HD', 'HES', 'HIG', 'HNZ', 'HOG', 'HON', 'HOT', 'HP', 'HPQ', 'HRB', 'HRL', 'HRS', 'HSP', 'HST', 'HSY', 'HUM', 'IBM', 'ICE', 'IFF', 'IGT', 'INTC', 'INTU', 'IP', 'IPG', 'IR', 'IRM', 'ISRG', 'ITT', 'ITW', 'IVZ', 'JBL', 'JCI', 'JCP', 'JDSU', 'JEC', 'JNJ', 'JNPR', 'JNS', 'JOYG', 'JPM', 'JWN', 'K', 'KEY', 'KFT', 'KIM', 'KLAC', 'KMB', 'KMX', 'KO', 'KR', 'KSS', 'L', 'LEG', 'LEN', 'LH', 'LIFE', 'LLL', 'LLTC', 'LLY', 'LM', 'LMT', 'LNC', 'LO', 'LOW', 'LSI', 'LTD', 'LUK', 'LUV', 'LXK', 'M', 'MA', 'MAR', 'MAS', 'MAT', 'MCD', 'MCHP', 'MCK', 'MCO', 'MDT', 'MET', 'MHP', 'MHS', 'MJN', 'MKC', 'MMC', 'MMI', 'MMM', 'MO', 'MOLX', 'MON', 'MOS', 'MPC', 'MRK', 'MRO', 'MS', 'MSFT', 'MSI', 'MTB', 'MU', 'MUR', 'MWV', 'MWW', 'MYL', 'NBL', 'NBR', 'NDAQ', 'NE', 'NEE', 'NEM', 'NFLX', 'NFX', 'NI', 'NKE', 'NOC', 'NOV', 'NRG', 'NSC', 'NTAP', 'NTRS', 'NU', 'NUE', 'NVDA', 'NVLS', 'NWL', 'NWSA', 'NYX', 'OI', 'OKE', 'OMC', 'ORCL', 'ORLY', 'OXY', 'PAYX', 'PBCT', 'PBI', 'PCAR', 'PCG', 'PCL', 'PCLN', 'PCP', 'PCS', 'PDCO', 'PEG', 'PEP', 'PFE', 'PFG', 'PG', 'PGN', 'PGR', 'PH', 'PHM', 'PKI', 'PLD', 'PLL', 'PM', 'PNC', 'PNW', 'POM', 'PPG', 'PPL', 'PRU', 'PSA', 'PWR', 'PX', 'PXD', 'QCOM', 'QEP', 'R', 'RAI', 'RDC', 'RF', 'RHI', 'RHT', 'RL', 'ROK', 'ROP', 'ROST', 'RRC', 'RRD', 'RSG', 'RTN', 'S', 'SAI', 'SBUX', 'SCG', 'SCHW', 'SE', 'SEE', 'SHLD', 'SHW', 'SIAL', 'SJM', 'SLB', 'SLE', 'SLM', 'SNA', 'SNDK', 'SNI', 'SO', 'SPG', 'SPLS', 'SRCL', 'SRE', 'STI', 'STJ', 'STT', 'STZ', 'SUN', 'SVU', 'SWK', 'SWN', 'SWY', 'SYK', 'SYMC', 'SYY', 'T', 'TAP', 'TDC', 'TE', 'TEG', 'TEL', 'TER', 'TGT', 'THC', 'TIE', 'TIF', 'TJX', 'TLAB', 'TMK', 'TMO', 'TROW', 'TRV', 'TSN', 'TSO', 'TSS', 'TWC', 'TWX', 'TXN', 'TXT', 'TYC', 'UNH', 'UNM', 'UNP', 'UPS', 'URBN', 'USB', 'UTX', 'V', 'VAR', 'VFC', 'VIA.B', 'VLO', 'VMC', 'VNO', 'VRSN', 'VTR', 'VZ', 'WAG', 'WAT', 'WDC', 'WEC', 'WFC', 'WFM', 'WFR', 'WHR', 'WIN', 'WLP', 'WM', 'WMB', 'WMT', 'WPI', 'WPO', 'WU', 'WY', 'WYN', 'WYNN', 'X', 'XEL', 'XL', 'XLNX', 'XOM', 'XRAY', 'XRX', 'YHOO', 'YUM', 'ZION', 'ZMH']
    lsSym.append('$SPX')
    lsSym.sort()
    
    
    ''' Max lookback is 6 months '''
    dtEnd = dt.datetime.now()
    dtEnd = dtEnd.replace(hour=16, minute=0, second=0, microsecond=0)
    dtStart = dtEnd - relativedelta(months=6)
    
    
    ''' Pull in current data '''
    norObj = da.DataAccess('Norgate')
    ''' Get 2 extra months for moving averages and future returns '''
    ldtTimestamps = du.getNYSEdays( dtStart - relativedelta(months=2), \
                                    dtEnd   + relativedelta(months=2), dt.timedelta(hours=16) )
    
    dfPrice = norObj.get_data( ldtTimestamps, lsSym, 'close' )
    dfVolume = norObj.get_data( ldtTimestamps, lsSym, 'volume' )

    ''' Imported functions from qstkfeat.features, NOTE: last function is classification '''
    lfcFeatures, ldArgs, lsNames = getFeatureFuncs()                
    
    ''' Generate a list of DataFrames, one for each feature, with the same index/column structure as price data '''
    applyFeatures( dfPrice, dfVolume, lfcFeatures, ldArgs, sLog=sLog )
Esempio n. 16
0
def readdata(valuefile,closefield='close',stores='Yahoo'):
	
	funddata = nu.loadtxt(valuefile, delimiter=',', dtype='i4,i4,i4,f8') #  values = readcsv(valuefile)
	datelist = []
	fundvalue = []
	for record in funddata:
		fundvalue.append(record[3])
		date = dt.datetime(record[0],record[1],record[2])
		datelist.append(date)
	
	# read in the $SPX data	
	timeofday = dt.timedelta(hours=16)
	startdate = datelist[0]
	enddate   = datelist[-1] + dt.timedelta(days=1)  # fix the off-by-1 error
	#enddate = datelist[-1]
	timestamps = du.getNYSEdays(startdate,enddate, timeofday)

	# get the value for benchmark
	dataobj = da.DataAccess(stores)
	symbols = [bench_symbol]
	close = dataobj.get_data(timestamps,symbols,closefield)
	
	benchmark_price = []
	benchmark_value = []
	for time in timestamps:
		benchmark_price.append(close[bench_symbol][time])
	bench_shares = fundvalue[0]/benchmark_price[0]
	for i in range(len(benchmark_price)):
		benchmark_value.append(bench_shares*benchmark_price[i])
	
	return timestamps,fundvalue,benchmark_value
Esempio n. 17
0
def alloc_backtest(alloc, start):
    """
    @summary: Back tests an allocation from a pickle file. Uses a starting
              portfolio value of start.
    @param alloc: Name of allocation pickle file. Pickle file contains a
                  DataMatrix with timestamps as indexes and stock symbols as
                  columns, with the last column being the _CASH symbol,
                  indicating how much
    of the allocation is in cash.
    @param start: integer specifying the starting value of the portfolio
    @return funds: List of fund values indicating the value of the portfolio
                   throughout the back test.
    @rtype timeSeries
    """

    #read in alloc table from command line arguements
    alloc_input_file = open(alloc, "r")
    alloc = cPickle.load(alloc_input_file)

    # Get the data from the data store
    dataobj = da.DataAccess('Norgate')
    startday = alloc.index[0] - dt.timedelta(days=10)
    endday = alloc.index[-1]

    # Get desired timestamps
    timeofday = dt.timedelta(hours=16)
    timestamps = du.getNYSEdays(startday, endday, timeofday)
    historic = dataobj.get_data(timestamps, list(alloc.columns[0:-1]), "close")
    #backtestx
    [fund, leverage, commissions, slippage] = qs.tradesim(alloc, historic, int(start), 1, True, 0.02, 5, 0.02)

    return [fund, leverage, commissions, slippage]
Esempio n. 18
0
def print_other_coer(fund_ts, ostream):
    """
    @summary prints standard deviation of returns for a fund
    @param fund_ts: pandas fund time series
    @param years: list of years to print out
    @param ostream: stream to print to
    """
    industries = [['$SPX', '    S&P Index'],
    ['$DJI', '    Dow Jones'],
    ['$DJUSEN', 'Oil & Gas'],
    ['$DJGSP', '     Metals']]
    for i in range(0, len(industries) ):
        if(i%2==0):
            ostream.write("\n")
        #load data
        norObj =de.DataAccess('mysql')
        ldtTimestamps = du.getNYSEdays( fund_ts.index[0], fund_ts.index[-1], dt.timedelta(hours=16) )
        ldfData = norObj.get_data( ldtTimestamps, [industries[i][0]], ['close'] )
        #get corelation
        ldfData[0]=ldfData[0].fillna(method='pad')
        ldfData[0]=ldfData[0].fillna(method='bfill')
        a=np.corrcoef(np.ravel(tsu.daily(ldfData[0][industries[i][0]])),np.ravel(tsu.daily(fund_ts.values)))
        b=np.ravel(tsu.daily(ldfData[0][industries[i][0]]))
        f=np.ravel(tsu.daily(fund_ts))
        fBeta, unused = np.polyfit(b,f,1)
        ostream.write("%10s(%s):%+6.2f,   %+6.2f   " % (industries[i][1], industries[i][0], a[0,1], fBeta))
Esempio n. 19
0
def readdata(valuefile, closefield='close', stores='Yahoo'):

    funddata = nu.loadtxt(valuefile, delimiter=',',
                          dtype='i4,i4,i4,f8')  #  values = readcsv(valuefile)
    datelist = []
    fundvalue = []
    for record in funddata:
        fundvalue.append(record[3])
        date = dt.datetime(record[0], record[1], record[2])
        datelist.append(date)

    # read in the $SPX data
    timeofday = dt.timedelta(hours=16)
    startdate = datelist[0]
    enddate = datelist[-1] + dt.timedelta(days=1)  # fix the off-by-1 error
    #enddate = datelist[-1]
    timestamps = du.getNYSEdays(startdate, enddate, timeofday)

    # get the value for benchmark
    dataobj = da.DataAccess(stores)
    symbols = [bench_symbol]
    close = dataobj.get_data(timestamps, symbols, closefield)

    benchmark_price = []
    benchmark_value = []
    for time in timestamps:
        benchmark_price.append(close[bench_symbol][time])
    bench_shares = fundvalue[0] / benchmark_price[0]
    for i in range(len(benchmark_price)):
        benchmark_value.append(bench_shares * benchmark_price[i])

    return timestamps, fundvalue, benchmark_value
def findEvents(symbols, startday,endday, marketSymbol,verbose=False):

	# Reading the Data for the list of Symbols.	
	timeofday=dt.timedelta(hours=16)
	timestamps = du.getNYSEdays(startday,endday,timeofday)
	dataobj = da.DataAccess(storename)
	if verbose:
            print __name__ + " reading data"
	# Reading the Data
	close = dataobj.get_data(timestamps, symbols, closefield)
	
	# Completing the Data - Removing the NaN values from the Matrix
	#close = (close.fillna(method='ffill')).fillna(method='backfill')

	if verbose:
            print __name__ + " finding events"

	# Generating the orders
	# Event described is : when the actual close of the stock price drops below $5.00

	f = open('orders.csv', 'wt')
	writer = csv.writer(f)

	for symbol in symbols:
		
		for i in range(2,len(close[symbol])):
			if close[symbol][i-1] >=5.0 and close[symbol][i] < 5.0 : 
				writer.writerow( (close.index[i].year, close.index[i].month, close.index[i].day, symbol, 'BUY', 100) )

				j = i + 5
				if (j > len(close[symbol])) : 
					j = len(close[ysmbol])

				writer.writerow( (close.index[j].year, close.index[j].month, close.index[j].day, symbol, 'SELL', 100) )
	f.close()
Esempio n. 21
0
def strat_backtest2(strat, start, end, diff, dur, startval):
    """
    @summary: Back tests a strategy defined in a python script that takes in a
             start and end date along with a starting value over a given 
             period.
    @param strat: filename of python script strategy
    @param start: starting date in a datetime object
    @param end: ending date in a datetime object
    @param diff: offset in days of the tests
    @param dur: length of a test
    @param startval: starting value of fund during back tests
    @return fundsmatrix: Datamatrix of fund values returned from each test
    @rtype datamatrix
    """
    fundsmatrix = []
    startdates = du.getNYSEdays(start, end, dt.timedelta(hours=16))
    for i in range(0, len(startdates), diff):
        if (i + dur >= len(startdates)):
            enddate = startdates[-1]
        else:
            enddate = startdates[i + dur]
        os.system('python ' + strat + ' ' + startdates[i].strftime("%m-%d-%Y")\
                   + ' ' + enddate.strftime("%m-%d-%Y") + ' temp_alloc.pkl')
        funds = alloc_backtest('temp_alloc.pkl', startval)
        fundsmatrix.append(funds)
    return fundsmatrix
Esempio n. 22
0
    def _generate_data(self):

        year = 2009        
        startday = dt.datetime(year-1, 12, 1)
        endday = dt.datetime(year+1, 1, 31)

        l_symbols = ['$SPX']

        #Get desired timestamps
        timeofday = dt.timedelta(hours = 16)
        ldt_timestamps = du.getNYSEdays(startday, endday, timeofday)

        dataobj = da.DataAccess('Norgate')
        self.df_close = dataobj.get_data( \
                        ldt_timestamps, l_symbols, "close", verbose=True)

        self.df_alloc = pand.DataFrame( \
                        index=[dt.datetime(year, 1, 1)], \
                                data=[-1], columns=l_symbols)

        for i in range(11):
            self.df_alloc = self.df_alloc.append( \
                     pand.DataFrame(index=[dt.datetime(year, i+2, 1)], \
                                      data=[-1], columns=l_symbols))

        self.df_alloc['_CASH'] = 0.0

        #Based on hand calculation using the transaction costs and slippage.
        self.i_open_result = 0.7541428779600005
def findEvents(symbols,startday,endday,marketSymbol,verbose = False):
    
    timeofday = dt.timedelta(hours = 16)
    timestamps = du.getNYSEdays(startday,endday,timeofday)
  
    if verbose: 
        print  __name__ + " reading data"
    
    close = dataobj.get_data(timestamps,symbols,closefield)
    close = (close.fillna(method="ffill")).fillna(method="backfill")
    
    np_eventmat = copy.deepcopy(close)
    for sym in symbols:
        for time in timestamps:
            np_eventmat[sym][time] = np.NAN
            
    if verbose:
        print __name__ + " finding events"
    
    price = 7.0     
    for symbol in symbols:
        for i in range(1,len(close[symbol])):
            if close[symbol][i-1] >= price and close[symbol][i] < price:
                np_eventmat[symbol][i] = 1.0
    
    return np_eventmat
Esempio n. 24
0
def marketsim(cash, orders_file, data_item):
    # Read orders
    orders = defaultdict(list)
    symbols = set([])
    for year, month, day, sym, action, num in csv.reader(
            open(orders_file, "rU")):
        orders[date(int(year), int(month), int(day))].append(
            (sym, action, int(num)))
        symbols.add(sym)

    days = orders.keys()
    days.sort()
    day, end = days[0], days[-1]

    # Reading the Data for the list of Symbols.
    timestamps = getNYSEdays(datetime(day.year, day.month, day.day),
                             datetime(end.year, end.month, end.day + 1),
                             timedelta(hours=16))

    dataobj = DataAccess('Yahoo')
    close = dataobj.get_data(timestamps, symbols, data_item)

    values = []
    portfolio = Portfolio(cash)
    for i, t in enumerate(timestamps):
        for sym, action, num in orders[date(t.year, t.month, t.day)]:
            if action == 'Sell': num *= -1
            portfolio.update(sym, num, close[sym][i])

        entry = (t.year, t.month, t.day, portfolio.value(close, i))
        values.append(entry)

    return values
Esempio n. 25
0
def log500(sLog):
    '''
    @summary: Loads cached features.
    @param sLog: Filename of features.
    @return: Nothing, logs features to desired location
    '''

    lsSym = ['A', 'AA', 'AAPL', 'ABC', 'ABT', 'ACE', 'ACN', 'ADBE', 'ADI', 'ADM', 'ADP', 'ADSK', 'AEE', 'AEP', 'AES', 'AET', 'AFL', 'AGN', 'AIG', 'AIV', 'AIZ', 'AKAM', 'AKS', 'ALL', 'ALTR', 'AMAT', 'AMD', 'AMGN', 'AMP', 'AMT', 'AMZN', 'AN', 'ANF', 'ANR', 'AON', 'APA', 'APC', 'APD', 'APH', 'APOL', 'ARG', 'ATI', 'AVB', 'AVP', 'AVY', 'AXP', 'AZO', 'BA', 'BAC', 'BAX', 'BBBY', 'BBT', 'BBY', 'BCR', 'BDX', 'BEN', 'BF.B', 'BHI', 'BIG', 'BIIB', 'BK', 'BLK', 'BLL', 'BMC', 'BMS', 'BMY', 'BRCM', 'BRK.B', 'BSX', 'BTU', 'BXP', 'C', 'CA', 'CAG', 'CAH', 'CAM', 'CAT', 'CB', 'CBG', 'CBS', 'CCE', 'CCL', 'CEG', 'CELG', 'CERN', 'CF', 'CFN', 'CHK', 'CHRW', 'CI', 'CINF', 'CL', 'CLF', 'CLX', 'CMA', 'CMCSA', 'CME', 'CMG', 'CMI', 'CMS', 'CNP', 'CNX', 'COF', 'COG', 'COH', 'COL', 'COP', 'COST', 'COV', 'CPB', 'CPWR', 'CRM', 'CSC', 'CSCO', 'CSX', 'CTAS', 'CTL', 'CTSH', 'CTXS', 'CVC', 'CVH', 'CVS', 'CVX', 'D', 'DD', 'DE', 'DELL', 'DF', 'DFS', 'DGX', 'DHI', 'DHR', 'DIS', 'DISCA', 'DNB', 'DNR', 'DO', 'DOV', 'DOW', 'DPS', 'DRI', 'DTE', 'DTV', 'DUK', 'DV', 'DVA', 'DVN', 'EBAY', 'ECL', 'ED', 'EFX', 'EIX', 'EL', 'EMC', 'EMN', 'EMR', 'EOG', 'EP', 'EQR', 'EQT', 'ERTS', 'ESRX', 'ETFC', 'ETN', 'ETR', 'EW', 'EXC', 'EXPD', 'EXPE', 'F', 'FAST', 'FCX', 'FDO', 'FDX', 'FE', 'FFIV', 'FHN', 'FII', 'FIS', 'FISV', 'FITB', 'FLIR', 'FLR', 'FLS', 'FMC', 'FO', 'FRX', 'FSLR', 'FTI', 'FTR', 'GAS', 'GCI', 'GD', 'GE', 'GILD', 'GIS', 'GLW', 'GME', 'GNW', 'GOOG', 'GPC', 'GPS', 'GR', 'GS', 'GT', 'GWW', 'HAL', 'HAR', 'HAS', 'HBAN', 'HCBK', 'HCN', 'HCP', 'HD', 'HES', 'HIG', 'HNZ', 'HOG', 'HON', 'HOT', 'HP', 'HPQ', 'HRB', 'HRL', 'HRS', 'HSP', 'HST', 'HSY', 'HUM', 'IBM', 'ICE', 'IFF', 'IGT', 'INTC', 'INTU', 'IP', 'IPG', 'IR', 'IRM', 'ISRG', 'ITT', 'ITW', 'IVZ', 'JBL', 'JCI', 'JCP', 'JDSU', 'JEC', 'JNJ', 'JNPR', 'JNS', 'JOYG', 'JPM', 'JWN', 'K', 'KEY', 'KFT', 'KIM', 'KLAC', 'KMB', 'KMX', 'KO', 'KR', 'KSS', 'L', 'LEG', 'LEN', 'LH', 'LIFE', 'LLL', 'LLTC', 'LLY', 'LM', 'LMT', 'LNC', 'LO', 'LOW', 'LSI', 'LTD', 'LUK', 'LUV', 'LXK', 'M', 'MA', 'MAR', 'MAS', 'MAT', 'MCD', 'MCHP', 'MCK', 'MCO', 'MDT', 'MET', 'MHP', 'MHS', 'MJN', 'MKC', 'MMC', 'MMI', 'MMM', 'MO', 'MOLX', 'MON', 'MOS', 'MPC', 'MRK', 'MRO', 'MS', 'MSFT', 'MSI', 'MTB', 'MU', 'MUR', 'MWV', 'MWW', 'MYL', 'NBL', 'NBR', 'NDAQ', 'NE', 'NEE', 'NEM', 'NFLX', 'NFX', 'NI', 'NKE', 'NOC', 'NOV', 'NRG', 'NSC', 'NTAP', 'NTRS', 'NU', 'NUE', 'NVDA', 'NVLS', 'NWL', 'NWSA', 'NYX', 'OI', 'OKE', 'OMC', 'ORCL', 'ORLY', 'OXY', 'PAYX', 'PBCT', 'PBI', 'PCAR', 'PCG', 'PCL', 'PCLN', 'PCP', 'PCS', 'PDCO', 'PEG', 'PEP', 'PFE', 'PFG', 'PG', 'PGN', 'PGR', 'PH', 'PHM', 'PKI', 'PLD', 'PLL', 'PM', 'PNC', 'PNW', 'POM', 'PPG', 'PPL', 'PRU', 'PSA', 'PWR', 'PX', 'PXD', 'QCOM', 'QEP', 'R', 'RAI', 'RDC', 'RF', 'RHI', 'RHT', 'RL', 'ROK', 'ROP', 'ROST', 'RRC', 'RRD', 'RSG', 'RTN', 'S', 'SAI', 'SBUX', 'SCG', 'SCHW', 'SE', 'SEE', 'SHLD', 'SHW', 'SIAL', 'SJM', 'SLB', 'SLE', 'SLM', 'SNA', 'SNDK', 'SNI', 'SO', 'SPG', 'SPLS', 'SRCL', 'SRE', 'STI', 'STJ', 'STT', 'STZ', 'SUN', 'SVU', 'SWK', 'SWN', 'SWY', 'SYK', 'SYMC', 'SYY', 'T', 'TAP', 'TDC', 'TE', 'TEG', 'TEL', 'TER', 'TGT', 'THC', 'TIE', 'TIF', 'TJX', 'TLAB', 'TMK', 'TMO', 'TROW', 'TRV', 'TSN', 'TSO', 'TSS', 'TWC', 'TWX', 'TXN', 'TXT', 'TYC', 'UNH', 'UNM', 'UNP', 'UPS', 'URBN', 'USB', 'UTX', 'V', 'VAR', 'VFC', 'VIA.B', 'VLO', 'VMC', 'VNO', 'VRSN', 'VTR', 'VZ', 'WAG', 'WAT', 'WDC', 'WEC', 'WFC', 'WFM', 'WFR', 'WHR', 'WIN', 'WLP', 'WM', 'WMB', 'WMT', 'WPI', 'WPO', 'WU', 'WY', 'WYN', 'WYNN', 'X', 'XEL', 'XL', 'XLNX', 'XOM', 'XRAY', 'XRX', 'YHOO', 'YUM', 'ZION', 'ZMH']
    lsSym.append('$SPX')
    lsSym.sort()

    ''' Max lookback is 6 months '''
    dtEnd = dt.datetime.now()
    dtEnd = dtEnd.replace(hour=16, minute=0, second=0, microsecond=0)
    dtStart = dtEnd - relativedelta(months=6)

    ''' Pull in current data '''
    norObj = da.DataAccess('Norgate')
    ''' Get 2 extra months for moving averages and future returns '''
    ldtTimestamps = du.getNYSEdays(dtStart - relativedelta(months=2),
                                   dtEnd + relativedelta(months=2), dt.timedelta(hours=16))

    dfPrice = norObj.get_data(ldtTimestamps, lsSym, 'close')
    dfVolume = norObj.get_data(ldtTimestamps, lsSym, 'volume')

    ''' Imported functions from qstkfeat.features, NOTE: last function is classification '''
    lfcFeatures, ldArgs, lsNames = getFeatureFuncs()

    ''' Generate a list of DataFrames, one for each feature, with the same index/column structure as price data '''
    applyFeatures(dfPrice, dfVolume, lfcFeatures, ldArgs, sLog=sLog)
Esempio n. 26
0
def simulate(symbols, allocations, startday, endday):
  """
  @symbols: list of symbols
  @allocations: list of weights
  @startday: ...
  @endday: ...
  """
  timeofday = dt.timedelta(hours=16)
  timestamps = du.getNYSEdays(startday,endday,timeofday)

  dataobj = da.DataAccess('Yahoo')
  close = dataobj.get_data(timestamps, symbols, "close", verbose=False)
  close = close.values
  norm_close = close / close[0, :]

  allocations = allocations / np.sum(allocations)

  portfolio_value = np.dot(norm_close, allocations)
  portfolio_return = portfolio_value.copy()
  tsu.returnize0(portfolio_return)

  sharpe = tsu.get_sharpe_ratio(portfolio_return)
  accum = portfolio_value[-1] / portfolio_value[0]
  average = np.mean(portfolio_return)
  stddev = np.std(portfolio_return)

  result = {"sharpe":sharpe, "cumulative_return":accum, "average":average, "stddev":stddev}

  return result
Esempio n. 27
0
def print_industry_coer(fund_ts, ostream):
    """
    @summary prints standard deviation of returns for a fund
    @param fund_ts: pandas fund time series
    @param years: list of years to print out
    @param ostream: stream to print to
    """
    industries = [['$DJUSBM', 'Materials'],
    ['$DJUSNC', 'Goods'],
    ['$DJUSCY', 'Services'],
    ['$DJUSFN', 'Financials'],
    ['$DJUSHC', 'Health'],
    ['$DJUSIN', 'Industrial'],
    ['$DJUSEN', 'Oil & Gas'],
    ['$DJUSTC', 'Technology'],
    ['$DJUSTL', 'TeleComm'],
    ['$DJUSUT', 'Utilities']]
    for i in range(0, len(industries) ):
        if(i%2==0):
            ostream.write("\n")
        #load data
        norObj = de.DataAccess('mysql')
        ldtTimestamps = du.getNYSEdays( fund_ts.index[0], fund_ts.index[-1], dt.timedelta(hours=16) )
        ldfData = norObj.get_data( ldtTimestamps, [industries[i][0]], ['close'] )
        #get corelation
        ldfData[0]=ldfData[0].fillna(method='pad')
        ldfData[0]=ldfData[0].fillna(method='bfill')
        a=np.corrcoef(np.ravel(tsu.daily(ldfData[0][industries[i][0]])),np.ravel(tsu.daily(fund_ts.values)))
        b=np.ravel(tsu.daily(ldfData[0][industries[i][0]]))
        f=np.ravel(tsu.daily(fund_ts))
        fBeta, unused = np.polyfit(b,f,1)
        ostream.write("%10s(%s):%+6.2f,   %+6.2f   " % (industries[i][1], industries[i][0], a[0,1], fBeta))
Esempio n. 28
0
def get_data(syms, startday, endday):
    endday = endday - dt.timedelta(days=1)
    timeofday=dt.timedelta(hours=16)
    timestamps = du.getNYSEdays(startday, endday, timeofday)
    dataobj = da.DataAccess('Yahoo')
    price_data = dataobj.get_data(timestamps, syms, 'close')
    price_data = (price_data.fillna(method='ffill')).fillna(method='backfill')
    return price_data
Esempio n. 29
0
def time_price(startdate,enddate,portsyms):
	# set the time boundaries
	timestamps = du.getNYSEdays(startdate,enddate,timeofday)
	#get the close price
	dataobj = da.DataAccess(storename)
	close = dataobj.get_data(timestamps, portsyms, closefield)  # close is not the same as 'actual close'
	
	return (timestamps,close)
Esempio n. 30
0
def genData(startday, endday, datadirectory, symbols):

	coredirectory = os.environ['QS']+'Tools/Visualizer/Data/'

	directorylocation= coredirectory+datadirectory+'_'+startday.date().isoformat() +'_'+endday.date().isoformat()

	if not os.path.exists(directorylocation):
		os.mkdir(directorylocation)

	directorylocation = directorylocation +'/'

	timeofday = dt.timedelta(hours=16)
	timestamps = du.getNYSEdays(startday,endday,timeofday)
	
	#Creating a txt file of timestamps
	file = open(directorylocation +'TimeStamps.txt', 'w')
	for onedate in timestamps:
		stringdate=dt.date.isoformat(onedate)
		file.write(stringdate+'\n')
	file.close()

	# Reading the Stock Price Data
	dataobj = da.DataAccess('Norgate')
	all_symbols = dataobj.get_all_symbols()
	badsymbols=set(symbols)-set(all_symbols)
	if len(list(badsymbols))>0:
		print "Some Symbols are not valid" + str(badsymbols)
	symbols=list(set(symbols)-badsymbols)

	lsKeys = ['open', 'high', 'low', 'close', 'volume']

	ldfData = dataobj.get_data( timestamps, symbols, lsKeys )
	dData = dict(zip(lsKeys, ldfData))
	
	
	# Creating the 3D Matrix

	(lfcFeatures, ldArgs, lsNames)= feat.getFeatureFuncs22()	

	FinalData = feat.applyFeatures( dData, lfcFeatures, ldArgs, sMarketRel='SPY')
	
	#Creating a txt file of symbols
	file = open(directorylocation +'Symbols.txt', 'w')
	for sym in symbols:
		file.write(str(sym)+'\n')
	file.close()

	#Creating a txt file of Features
	file = open(directorylocation +'Features.txt', 'w')
	for f in lsNames:
		file.write(f+'\n')
	file.close()
	
	Numpyarray=[]
	for IndicatorData in FinalData:
		Numpyarray.append(IndicatorData.values)

	pickle.dump(Numpyarray,open(directorylocation +'ALLDATA.pkl', 'wb' ),-1)
Esempio n. 31
0
def genData(startday, endday, datadirectory, symbols):

	coredirectory = os.environ['QS']+'Tools/Visualizer/Data/'

	directorylocation= coredirectory+datadirectory+'_'+startday.date().isoformat() +'_'+endday.date().isoformat()

	if not os.path.exists(directorylocation):
		os.mkdir(directorylocation)

	directorylocation = directorylocation +'/'

	timeofday = dt.timedelta(hours=16)
	timestamps = du.getNYSEdays(startday,endday,timeofday)
	
	#Creating a txt file of timestamps
	file = open(directorylocation +'TimeStamps.txt', 'w')
	for onedate in timestamps:
		stringdate=dt.date.isoformat(onedate)
		file.write(stringdate+'\n')
	file.close()

	# Reading the Stock Price Data
	dataobj = da.DataAccess('Norgate')
	all_symbols = dataobj.get_all_symbols()
	badsymbols=set(symbols)-set(all_symbols)
	if len(list(badsymbols))>0:
		print "Some Symbols are not valid" + str(badsymbols)
	symbols=list(set(symbols)-badsymbols)

	lsKeys = ['open', 'high', 'low', 'close', 'volume']

	ldfData = dataobj.get_data( timestamps, symbols, lsKeys )
	dData = dict(zip(lsKeys, ldfData))
	
	
	# Creating the 3D Matrix

	(lfcFeatures, ldArgs, lsNames)= feat.getFeatureFuncs22()	

	FinalData = feat.applyFeatures( dData, lfcFeatures, ldArgs, sMarketRel='SPY')
	
	#Creating a txt file of symbols
	file = open(directorylocation +'Symbols.txt', 'w')
	for sym in symbols:
		file.write(str(sym)+'\n')
	file.close()

	#Creating a txt file of Features
	file = open(directorylocation +'Features.txt', 'w')
	for f in lsNames:
		file.write(f+'\n')
	file.close()
	
	Numpyarray=[]
	for IndicatorData in FinalData:
		Numpyarray.append(IndicatorData.values)

	pickle.dump(Numpyarray,open(directorylocation +'ALLDATA.pkl', 'wb' ),-1)
def main():
    '''Main Function'''

    # List of symbols
    ls_symbols = ["AAPL", "GOOG"]

    # Start and End date of the charts
    dt_start = dt.datetime(2008, 1, 1)
    dt_end = dt.datetime(2010, 12, 31)

    # We need closing prices so the timestamp should be hours=16.
    dt_timeofday = dt.timedelta(hours=16)

    # Get a list of trading days between the start and the end.
    ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday)

    # Creating an object of the dataaccess class with Yahoo as the source.
    c_dataobj = da.DataAccess('Yahoo')

    # Reading just the close prices
    df_close = c_dataobj.get_data(ldt_timestamps, ls_symbols, "close")

    # Creating the allocation dataframe
    # We offset the time for the simulator to have atleast one
    # datavalue before the allocation.
    df_alloc = pd.DataFrame(np.array([[0.5, 0.5]]),
                index=[ldt_timestamps[0] + dt.timedelta(hours=5)],
                columns=ls_symbols)

    dt_last_date = ldt_timestamps[0]
    # Looping through all dates and creating monthly allocations
    for dt_date in ldt_timestamps[1:]:
        if dt_last_date.month != dt_date.month:
            # Create allocation
            na_vals = np.random.randint(0, 1000, len(ls_symbols))
            na_vals = na_vals / float(sum(na_vals))
            na_vals = na_vals.reshape(1, -1)
            # Append to the dataframe
            df_new_row = pd.DataFrame(na_vals, index=[dt_date],
                                        columns=ls_symbols)
            df_alloc = df_alloc.append(df_new_row)
        dt_last_date = dt_date

    # Adding cash to the allocation matrix
    df_alloc['_CASH'] = 0.0

    # Running the simulator on the allocation frame
    (ts_funds, ts_leverage, f_commission, f_slippage, f_borrow_cost) = qstksim.tradesim(df_alloc,
                    df_close, f_start_cash=10000.0, i_leastcount=1, b_followleastcount=True,
                    f_slippage=0.0005, f_minimumcommision=5.0, f_commision_share=0.0035,
                    i_target_leverage=1, f_rate_borrow=3.5, log="transaction.csv")

    print "Simulated Fund Time Series : "
    print ts_funds
    print "Transaction Costs : "
    print "Commissions : ", f_commission
    print "Slippage : ", f_slippage
    print "Borrowing Cost : ", f_borrow_cost
Esempio n. 33
0
def getData(symbols, startday,endday):

	# Reading the Data for the list of Symbols.	
	timeofday=dt.timedelta(hours=16)
	timestamps = du.getNYSEdays(startday,endday,timeofday)
	dataobj = da.DataAccess("Yahoo")
	# Reading the Data
	close = dataobj.get_data(timestamps, symbols, closefield)
	return close
Esempio n. 34
0
def getData(symbols, startday, endday):

    # Reading the Data for the list of Symbols.
    timeofday = dt.timedelta(hours=16)
    timestamps = du.getNYSEdays(startday, endday, timeofday)
    dataobj = da.DataAccess("Yahoo")
    # Reading the Data
    close = dataobj.get_data(timestamps, symbols, closefield)
    return close
Esempio n. 35
0
def getTradingDays(orders):
    '''(pandas) -> list of timestamp
    Return list of timestamps for all trading dates between first and
    last day of order.
    '''
    last = len(orders) - 1
    startday = dt.datetime(orders['year'][0], orders['month'][0], orders['day'][0])
    endday = dt.datetime(orders['year'][last], orders['month'][last], orders['day'][last]+1)
    timeofday=dt.timedelta(hours=16)
    return du.getNYSEdays(startday,endday,timeofday)
Esempio n. 36
0
 def previous_nyse_day(self, date):
     ''' the date give has to be a trading day '''
     timeofday = dt.timedelta(hours=16)
     if date.hour == 0:
         date = date + timeofday
     timestamps = du.getNYSEdays(date - dt.timedelta(days=10), date, timeofday)
     if timestamps[-1] == date:
         return timestamps[-2]
     else:
         raise Exception('not a trading day')
Esempio n. 37
0
def time_price(startdate, enddate, portsyms):
    # set the time boundaries
    timestamps = du.getNYSEdays(startdate, enddate, timeofday)
    #get the close price
    dataobj = da.DataAccess(storename)
    close = dataobj.get_data(
        timestamps, portsyms,
        closefield)  # close is not the same as 'actual close'

    return (timestamps, close)
Esempio n. 38
0
def findEvents(symbols, startday, endday, verbose=False, generateOrders=False,targetPrice=target_price):
    
    # Reading data
    timeofday=dt.timedelta(hours=16)
    timestamps = du.getNYSEdays(startday,endday,timeofday)
    dataobj = da.DataAccess(storename)
    if verbose:
        print __name__ + " reading data from " + storename
    
    # Read 'actual close' field data value
    actual_close = dataobj.get_data(timestamps, symbols, actual_close_field)
    
    # trim the data - removing the NaN values from the matrix
    #actual_close = (actual_close.fillna(method='ffill')).fillna(method='backfill')
       
    np_eventmat = copy.deepcopy(actual_close)  
    
    # create event matrix with np value
    for sym in symbols:
        for t in timestamps:
            np_eventmat[sym][t]=np.NAN
            
    # Create Trading Orders File based on Event
    if generateOrders:
        orderCSV = open(order_filename, "wb")
        write = csv.writer(orderCSV, delimiter=',')
    
    
    # fill in event
    event_count=0
    for symbol in symbols:
        for t in range(1, len(actual_close[symbol])):
            if(actual_close[symbol][t-1]>= targetPrice and actual_close[symbol][t]< targetPrice):
                if generateOrders:
                    # generate a order Buy and Sell after 5 trading days
                    write.writerow([timestamps[t].year, timestamps[t].month, timestamps[t].day, symbol, buy, str(shares)])
                    if t + 5 < len(timestamps):
                        fiveDaysLater = timestamps[t+5]
                    else:
                        fiveDaysLater = timestamps[len(timestamps)-1]
                    write.writerow([fiveDaysLater.year, fiveDaysLater.month, fiveDaysLater.day, symbol, sell, str(shares)])
                if verbose:
                    print __name__ + " found event for symbol: " + symbol, actual_close[symbol][t-1] , actual_close[symbol][t]
                np_eventmat[symbol][t] = 1.0
                event_count = event_count +1
    # print out event matrix for debug
    
    print __name__ + "Event Matrix"
    for sym in symbols:
        for t in range(1, len(actual_close[sym])):
            if(np_eventmat[sym][t] == 1.0):
                print timestamps[t] , sym , " has event"
    print "####### Found ", event_count, " in total #######"
    return np_eventmat
Esempio n. 39
0
 def timestamps(cls, start_time, end_time):
     start_day = dt.datetime(start_time.year, start_time.month, start_time.day)
     end_day = dt.datetime(end_time.year, end_time.month, end_time.day)
     day_timestamps = du.getNYSEdays(start_day, end_day + dt.timedelta(days=1), dt.timedelta(hours=9, minutes=30))
     timestamps = []
     delta =  300
     for day in day_timestamps:
         for x in range(79):
             time = day + dt.timedelta(seconds = (x * delta))
             if time >= start_time and time <= end_time:
                 timestamps.append(time)
     return timestamps
Esempio n. 40
0
def getTradingDays(orders):
    '''(pandas) -> list of timestamp
    Return list of timestamps for all trading dates between first and
    last day of order.
    '''
    last = len(orders) - 1
    startday = dt.datetime(orders['year'][0], orders['month'][0],
                           orders['day'][0])
    endday = dt.datetime(orders['year'][last], orders['month'][last],
                         orders['day'][last] + 1)
    timeofday = dt.timedelta(hours=16)
    return du.getNYSEdays(startday, endday, timeofday)
Esempio n. 41
0
def share_table2fund(share_table):
    """
    @summary converts data frame of shares into fund values
    @param share_table: data frame containing shares on days transactions occured
    @return fund : time series containing fund value over time
    @return leverage : time series containing fund value over time
    """
    # Get the data from the data store
    dataobj = de.DataAccess('mysql')
    startday = share_table.index[0]
    endday = share_table.index[-1]

    symbols = list(share_table.columns)
    symbols.remove('_CASH')

    # print symbols

    # Get desired timestamps
    timeofday = dt.timedelta(hours=16)
    timestamps = du.getNYSEdays(startday - dt.timedelta(days=5),
                                endday + dt.timedelta(days=1), timeofday)
    historic = dataobj.get_data(timestamps, symbols, ["close"])[0]
    historic["_CASH"] = 1
    closest = historic[historic.index <= share_table.index[0]].ix[:]
    ts_leverage = pandas.Series(0, index=[closest.index[-1]])

    # start shares/fund out as 100% cash
    first_val = closest.ix[-1] * share_table.ix[0]
    fund_ts = pandas.Series([first_val.sum(axis=1)], index=[closest.index[-1]])
    prev_row = share_table.ix[0]
    for row_index, row in share_table.iterrows():
        # print row_index
        trade_price = historic.ix[row_index:].ix[0:1]
        trade_date = trade_price.index[0]

        # print trade_date

        # get stock prices on all the days up until this trade
        to_calculate = historic[(historic.index <= trade_date)
                                & (historic.index > fund_ts.index[-1])]
        # multiply prices by our current shares
        values_by_stock = to_calculate * prev_row

        # for date, sym in values_by_stock.iteritems():
        #     print date,sym
        # print values_by_stock
        prev_row = row
        #update leverage
        ts_leverage = _calculate_leverage(values_by_stock, ts_leverage)

        # calculate total value and append to our fund history
        fund_ts = fund_ts.append([values_by_stock.sum(axis=1)])
    return [fund_ts, ts_leverage]
Esempio n. 42
0
def generate_report(funds_list, graph_names, out_file, i_start_cash=10000):
    """
    @summary generates a report given a list of fund time series
    """
    html_file = open("report.html", "w")
    print_header(html_file, out_file)
    html_file.write("<IMG SRC = \'./funds.png\' width = 400/>\n")
    html_file.write("<BR/>\n\n")
    i = 0
    pyplot.clf()
    #load spx for time frame
    symbol = ["$SPX"]
    start_date = 0
    end_date = 0
    for fund in funds_list:
        if (type(fund) != type(list())):
            if (start_date == 0 or start_date > fund.index[0]):
                start_date = fund.index[0]
            if (end_date == 0 or end_date < fund.index[-1]):
                end_date = fund.index[-1]
            mult = i_start_cash / fund.values[0]
            pyplot.plot(fund.index, fund.values * mult, label = \
                                 path.basename(graph_names[i]))
        else:
            if (start_date == 0 or start_date > fund[0].index[0]):
                start_date = fund[0].index[0]
            if (end_date == 0 or end_date < fund[0].index[-1]):
                end_date = fund[0].index[-1]
            mult = i_start_cash / fund[0].values[0]
            pyplot.plot(fund[0].index, fund[0].values * mult, label = \
                                      path.basename(graph_names[i]))
        i += 1
    timeofday = dt.timedelta(hours=16)
    timestamps = du.getNYSEdays(start_date, end_date, timeofday)
    dataobj = de.DataAccess('mysql')
    benchmark_close = dataobj.get_data(timestamps, symbol, ["close"], \
                                            verbose = False)[0]
    mult = i_start_cash / benchmark_close.values[0]
    i = 0
    for fund in funds_list:
        if (type(fund) != type(list())):
            print_stats(fund, ["$SPX"], graph_names[i])
        else:
            print_stats(fund[0], ["$SPX"], graph_names[i])
        i += 1
    pyplot.plot(benchmark_close.index, \
                 benchmark_close.values*mult, label = "SSPX")
    pyplot.ylabel('Fund Value')
    pyplot.xlabel('Date')
    pyplot.legend()
    savefig('funds.png', format='png')
    print_footer(html_file)
Esempio n. 43
0
def testFeature( fcFeature, dArgs ):
    '''
    @summary: Quick function to run a feature on some data and plot it to see if it works.
    @param fcFeature: Feature function to test
    @param dArgs: Arguments to pass into feature function 
    @return: Void
    '''
    
    ''' Get Train data for 2009-2010 '''
    dtStart = dt.datetime(2009, 1, 1)
    dtEnd = dt.datetime(2009, 5, 1)
         
    ''' Pull in current training data and test data '''
    norObj = da.DataAccess('Norgate')
    ''' Get 2 extra months for moving averages and future returns '''
    ldtTimestamps = du.getNYSEdays( dtStart, dtEnd, dt.timedelta(hours=16) )
    
    lsSym = ['GOOG']
    lsSym.append('WMT')
    lsSym.append('$SPX')
    lsSym.append('$VIX')
    lsSym.sort()
    
    lsKeys = ['open', 'high', 'low', 'close', 'volume']
    ldfData = norObj.get_data( ldtTimestamps, lsSym, lsKeys )
    dData = dict(zip(lsKeys, ldfData))
    dfPrice = dData['close']


    #print dfPrice.values
    
    ''' Generate a list of DataFrames, one for each feature, with the same index/column structure as price data '''
    dtStart = dt.datetime.now()
    ldfFeatures = applyFeatures( dData, [fcFeature], [dArgs], sMarketRel='$SPX' )
    print 'Runtime:', dt.datetime.now() - dtStart
    
    ''' Use last 3 months of index, to avoid lookback nans '''

    dfPrint = ldfFeatures[0]['GOOG']
    print 'GOOG values:', dfPrint.values
    print 'GOOG Sum:', dfPrint.ix[dfPrint.notnull()].sum()
    
    for sSym in lsSym:
        plt.subplot( 211 )
        plt.plot( ldfFeatures[0].index[-60:], dfPrice[sSym].values[-60:] )
        plt.plot( ldfFeatures[0].index[-60:], dfPrice['$SPX'].values[-60:] * dfPrice[sSym].values[-60] / dfPrice['$SPX'].values[-60] )
        plt.legend((sSym, '$SPX'))
        plt.title(sSym)
        plt.subplot( 212 )
        plt.plot( ldfFeatures[0].index[-60:], ldfFeatures[0][sSym].values[-60:] )
        plt.title( '%s-%s'%(fcFeature.__name__, str(dArgs)) )
        plt.show()
Esempio n. 44
0
def share_table2fund(share_table):
    """
    @summary converts data frame of shares into fund values
    @param share_table: data frame containing shares on days transactions occured
    @return fund : time series containing fund value over time
    @return leverage : time series containing fund value over time
    """
    # Get the data from the data store
    dataobj = de.DataAccess('mysql')
    startday = share_table.index[0]
    endday = share_table.index[-1]

    symbols = list(share_table.columns)
    symbols.remove('_CASH')

    # print symbols

    # Get desired timestamps
    timeofday = dt.timedelta(hours=16)
    timestamps = du.getNYSEdays(startday - dt.timedelta(days=5), endday + dt.timedelta(days=1), timeofday)
    historic = dataobj.get_data(timestamps, symbols, ["close"])[0]
    historic.fillna(method='ffill', inplace=True)
    historic["_CASH"] = 1
    closest = historic[historic.index <= share_table.index[0]].ix[:]
    ts_leverage = pandas.Series(0, index=[closest.index[-1]])

    # start shares/fund out as 100% cash
    first_val = closest.ix[-1] * share_table.ix[0]
    fund_ts = pandas.Series([first_val.sum(axis=1)], index=[closest.index[-1]])
    prev_row = share_table.ix[0]
    for row_index, row in share_table.iterrows():
        # print row_index
        trade_price = historic.ix[row_index:].ix[0:1]
        trade_date = trade_price.index[0]

        # print trade_date

        # get stock prices on all the days up until this trade
        to_calculate = historic[(historic.index <= trade_date) & (historic.index > fund_ts.index[-1])]
        # multiply prices by our current shares
        values_by_stock = to_calculate * prev_row

        # for date, sym in values_by_stock.iteritems():
        #     print date,sym
        # print values_by_stock
        prev_row = row
        #update leverage
        ts_leverage = _calculate_leverage(values_by_stock, ts_leverage)

        # calculate total value and append to our fund history
        fund_ts = fund_ts.append([values_by_stock.sum(axis=1)])
    return [fund_ts, ts_leverage]
Esempio n. 45
0
def testFeature( fcFeature, dArgs ):
    '''
    @summary: Quick function to run a feature on some data and plot it to see if it works.
    @param fcFeature: Feature function to test
    @param dArgs: Arguments to pass into feature function 
    @return: Void
    '''
    
    ''' Get Train data for 2009-2010 '''
    dtStart = dt.datetime(2009, 1, 1)
    dtEnd = dt.datetime(2009, 5, 1)
         
    ''' Pull in current training data and test data '''
    norObj = da.DataAccess('Norgate')
    ''' Get 2 extra months for moving averages and future returns '''
    ldtTimestamps = du.getNYSEdays( dtStart, dtEnd, dt.timedelta(hours=16) )
    
    lsSym = ['GOOG']
    lsSym.append('WMT')
    lsSym.append('$SPX')
    lsSym.append('$VIX')
    lsSym.sort()
    
    lsKeys = ['open', 'high', 'low', 'close', 'volume']
    ldfData = norObj.get_data( ldtTimestamps, lsSym, lsKeys )
    dData = dict(zip(lsKeys, ldfData))
    dfPrice = dData['close']


    #print dfPrice.values
    
    ''' Generate a list of DataFrames, one for each feature, with the same index/column structure as price data '''
    dtStart = dt.datetime.now()
    ldfFeatures = applyFeatures( dData, [fcFeature], [dArgs], sMarketRel='$SPX' )
    print 'Runtime:', dt.datetime.now() - dtStart
    
    ''' Use last 3 months of index, to avoid lookback nans '''

    dfPrint = ldfFeatures[0]['GOOG']
    print 'GOOG values:', dfPrint.values
    print 'GOOG Sum:', dfPrint.ix[dfPrint.notnull()].sum()
    
    for sSym in lsSym:
        plt.subplot( 211 )
        plt.plot( ldfFeatures[0].index[-60:], dfPrice[sSym].values[-60:] )
        plt.plot( ldfFeatures[0].index[-60:], dfPrice['$SPX'].values[-60:] * dfPrice[sSym].values[-60] / dfPrice['$SPX'].values[-60] )
        plt.legend((sSym, '$SPX'))
        plt.title(sSym)
        plt.subplot( 212 )
        plt.plot( ldfFeatures[0].index[-60:], ldfFeatures[0][sSym].values[-60:] )
        plt.title( '%s-%s'%(fcFeature.__name__, str(dArgs)) )
        plt.show()
Esempio n. 46
0
def generate_report(funds_list, graph_names, out_file, i_start_cash = 10000):
    """
    @summary generates a report given a list of fund time series
    """
    html_file  =  open("report.html","w")
    print_header(html_file, out_file)
    html_file.write("<IMG SRC = \'./funds.png\' width = 400/>\n")
    html_file.write("<BR/>\n\n")
    i = 0
    pyplot.clf()
    #load spx for time frame
    symbol = ["$SPX"]
    start_date = 0
    end_date = 0
    for fund in funds_list:
        if(type(fund)!= type(list())):
            if(start_date == 0 or start_date>fund.index[0]):
                start_date = fund.index[0]
            if(end_date == 0 or end_date<fund.index[-1]):
                end_date = fund.index[-1]
            mult = i_start_cash/fund.values[0]
            pyplot.plot(fund.index, fund.values * mult, label = \
                                 path.basename(graph_names[i]))
        else:
            if(start_date == 0 or start_date>fund[0].index[0]):
                start_date = fund[0].index[0]
            if(end_date == 0 or end_date<fund[0].index[-1]):
                end_date = fund[0].index[-1]
            mult = i_start_cash/fund[0].values[0]
            pyplot.plot(fund[0].index, fund[0].values * mult, label = \
                                      path.basename(graph_names[i]))
        i += 1
    timeofday = dt.timedelta(hours = 16)
    timestamps = du.getNYSEdays(start_date, end_date, timeofday)
    dataobj = de.DataAccess('mysql')
    benchmark_close = dataobj.get_data(timestamps, symbol, ["close"], \
                                            verbose = False)[0]
    mult = i_start_cash/benchmark_close.values[0]
    i = 0
    for fund in funds_list:
        if(type(fund)!= type(list())):
            print_stats(fund, ["$SPX"], graph_names[i])
        else:
            print_stats( fund[0], ["$SPX"], graph_names[i])
        i += 1
    pyplot.plot(benchmark_close.index, \
                 benchmark_close.values*mult, label = "SSPX")
    pyplot.ylabel('Fund Value')
    pyplot.xlabel('Date')
    pyplot.legend()
    savefig('funds.png', format = 'png')
    print_footer(html_file)
def runOther(funds,symbols):
	tsstart =dt.datetime(funds.index[0].year,funds.index[0].month,funds.index[0].day)
	tsend =dt.datetime(funds.index[-1].year,funds.index[-1].month,funds.index[-1].day)
	timeofday=dt.timedelta(hours=16)
	timestamps=du.getNYSEdays(tsstart,tsend,timeofday)
	dataobj=da.DataAccess('Norgate')
	historic=dataobj.get_data(timestamps,symbols,"close")
	alloc_val=float(0.1/(float(len(symbols))+1))
	alloc_vals=alloc_val*ones(len(symbols))
	alloc=DataMatrix(index=[historic.index[0]],data=[alloc_vals], columns=symbols)
	alloc=alloc.append(DataMatrix(index=[historic.index[-1]], data=[alloc_vals], columns=symbols))
	alloc['_CASH']=alloc_val
	return qs.quickSim(alloc,historic,1000)
    def __init__(self, dataAccess, listOfStocks, startTime, endTime):

        self.dataAccess = dataAccess
       	timeofday=dt.timedelta(hours=16)
        self.timestampIndex = du.getNYSEdays(startTime,endTime,timeofday)
        self.symbolIndex = listOfStocks

        print __name__ + " reading data"

	# Reading the Data
	self.priceArray = dataAccess.get_data(self.timestampIndex, self.symbolIndex, "actual_close")

        self.prevTsIdx = 0
Esempio n. 49
0
def findEvents(symbols, startday,endday, marketSymbol,verbose=False):

	# Reading the Data for the list of Symbols.	
	timeofday=dt.timedelta(hours=16)
	timestamps = du.getNYSEdays(startday,endday,timeofday)
	dataobj = da.DataAccess('Yahoo')
	if verbose:
            print __name__ + " reading data"
	# Reading the Data
	close = dataobj.get_data(timestamps, symbols, closefield)
	
	# Completing the Data - Removing the NaN values from the Matrix
#	close = (close.fillna(method='ffill')).fillna(method='backfill')

	
	# Calculating Daily Returns for the Market
#!!!	tsu.returnize0(close.values)
#	SPYValues=close[marketSymbol]

	# Calculating the Returns of the Stock Relative to the Market 
	# So if a Stock went up 5% and the Market rised 3%. The the return relative to market is 2% 
#	mktneutDM = close - close[marketSymbol]
	np_eventmat = copy.deepcopy(close)
	for sym in symbols:
		for time in timestamps:
			np_eventmat[sym][time]=np.NAN

	if verbose:
            print __name__ + " finding events"

	# Generating the Event Matrix
	# Event described is : Market falls more than 3% plus the stock falls 5% more than the Market
	# Suppose : The market fell 3%, then the stock should fall more than 8% to mark the event.
	# And if the market falls 5%, then the stock should fall more than 10% to mark the event.
        output = open("hw4.csv", "w")

        totalDays = len(close[marketSymbol])
        for i in range(1, totalDays):
                for symbol in symbols:
                        if close[symbol][timestamps[i-1]] >= eventThreachold and close[symbol][timestamps[i]] < eventThreachold :
                                moment = timestamps[i]
                                output.write('%(year)04d,%(month)02d,%(day)02d,%(symbol)s,Buy, 100 \n' % {"year":moment.year, "month":moment.month, "day":moment.day, "symbol":symbol })
                                sellMomentIndex = min(i + 5, totalDays-1)
#                                print "i:", i, ", totalDays: ", totalDays, "===", sellMomentIndex
                                moment = timestamps[sellMomentIndex]
                                output.write('%(year)04d,%(month)02d,%(day)02d,%(symbol)s,Sell,100 \n' % {"year":moment.year, "month":moment.month, "day":moment.day, "symbol":symbol })
                                np_eventmat[symbol][i] = 1.0  #overwriting by the bit, marking the event

        output.close()
	return np_eventmat
Esempio n. 50
0
def get_price(symbols,
              start_day,
              end_day,
              close_field='close',
              verbose='False'):
    '''
    return prices in pandas dataframe type
    '''
    time_of_day = datetime.timedelta(hours=16)
    time_stamps = qsdateutil.getNYSEdays(start_day, end_day, time_of_day)
    data_obj = DataAccess.DataAccess('Yahoo')
    if verbose:
        print __name__ + "reading data ..."
    price = data_obj.get_data(time_stamps, symbols, close_field)
    return price
Esempio n. 51
0
def daily(lfFunds):
    """
    @summary Computes daily returns centered around 0
    @param funds: A time series containing daily fund values
    @return an array of daily returns
    """
    if type(lfFunds) == type(pd.Series()):
        ldt_timestamps = du.getNYSEdays(lfFunds.index[0], lfFunds.index[-1],
                                        dt.timedelta(hours=16))
        lfFunds = lfFunds.reindex(index=ldt_timestamps, method='ffill')
    nds = np.asarray(deepcopy(lfFunds))
    s = np.shape(nds)
    if len(s) == 1:
        nds = np.expand_dims(nds, 1)
    returnize0(nds)
    return (nds)
Esempio n. 52
0
def findEvents(symbols, startday,endday, marketSymbol,verbose=False):

	# Reading the Data for the list of Symbols.	
	timeofday=dt.timedelta(hours=16)
	timestamps = du.getNYSEdays(startday,endday,timeofday)
	dataobj = da.DataAccess('Yahoo')
	if verbose:
            print __name__ + " reading data"
	# Reading the Data
	close = dataobj.get_data(timestamps, symbols, closefield)
	
	# Completing the Data - Removing the NaN values from the Matrix
#	close = (close.fillna(method='ffill')).fillna(method='backfill')

	
	# Calculating Daily Returns for the Market
#!!!	tsu.returnize0(close.values)
#	SPYValues=close[marketSymbol]

	# Calculating the Returns of the Stock Relative to the Market 
	# So if a Stock went up 5% and the Market rised 3%. The the return relative to market is 2% 
#	mktneutDM = close - close[marketSymbol]
	np_eventmat = copy.deepcopy(close)
	for sym in symbols:
		for time in timestamps:
			np_eventmat[sym][time]=np.NAN

	if verbose:
            print __name__ + " finding events"

	# Generating the Event Matrix
	# Event described is : Market falls more than 3% plus the stock falls 5% more than the Market
	# Suppose : The market fell 3%, then the stock should fall more than 8% to mark the event.
	# And if the market falls 5%, then the stock should fall more than 10% to mark the event.

	for symbol in symbols:
		
	    for i in range(1,len(close[symbol])):
	        if close[symbol][timestamps[i-1]] >= 7.0 and close[symbol][timestamps[i]] < 7.0 :
             		np_eventmat[symbol][i] = 1.0  #overwriting by the bit, marking the event

#	        if SPYValues[i]<-0.03 and mktneutDM[symbol][i] < -0.05 : # When
                        # market fall is more than 3% and also the stock
                        # compared to market is also fell by more than 5%.
			
	return np_eventmat
Esempio n. 53
0
def get_price(symbols,
              start_day,
              end_day,
              close_field='close',
              data_source='Yahoo',
              verbose='False'):
    '''
    return prices in pandas dataframe type
    '''
    time_of_day = datetime.timedelta(hours=16)
    time_stamps = qsdateutil.getNYSEdays(start_day, end_day, time_of_day)
    data_obj = DataAccess.DataAccess(data_source)
    if verbose:
        print __name__ + " is reading data from %s..." % data_source
    market_data = data_obj.get_data(time_stamps, symbols, close_field)
    if verbose:
        print __name__ + "finished reading data."
    return market_data
Esempio n. 54
0
def findEvents(symbols, startday, endday, verbose=False):
    timeofday = dt.timedelta(hours=16)
    timestamps = du.getNYSEdays(startday, endday, timeofday)
    dataobj = da.DataAccess('Yahoo')
    if verbose:
        print __name__ + " reading data"
    close = dataobj.get_data(timestamps, symbols, closefield)
    close = (close.fillna()).fillna(method='backfill')
    if verbose:
        print __name__ + " finding events"
    for symbol in symbols:
        close[symbol][close[symbol] >= 1.0] = np.NAN
        for i in range(1, len(close[symbol])):
            if np.isnan(
                    close[symbol][i - 1]
            ) and close[symbol][i] < 1.0:  #(i-1)th was > $1, and (i)th is <$1
                close[symbol][i] = 1.0  #overwriting the price by the bit
        close[symbol][close[symbol] < 1.0] = np.NAN
    return close
Esempio n. 55
0
def runOther(funds, symbols):
    tsstart = dt.datetime(funds.index[0].year, funds.index[0].month,
                          funds.index[0].day)
    tsend = dt.datetime(funds.index[-1].year, funds.index[-1].month,
                        funds.index[-1].day)
    timeofday = dt.timedelta(hours=16)
    timestamps = du.getNYSEdays(tsstart, tsend, timeofday)
    dataobj = da.DataAccess('Norgate')
    historic = dataobj.get_data(timestamps, symbols, "close")
    alloc_val = float(0.1 / (float(len(symbols)) + 1))
    alloc_vals = alloc_val * ones(len(symbols))
    alloc = DataMatrix(index=[historic.index[0]],
                       data=[alloc_vals],
                       columns=symbols)
    alloc = alloc.append(
        DataMatrix(index=[historic.index[-1]],
                   data=[alloc_vals],
                   columns=symbols))
    alloc['_CASH'] = alloc_val
    return qs.quickSim(alloc, historic, 1000)
Esempio n. 56
0
    def __init__(self,eventMatrix,startday,endday,\
               lookback_days = 20, lookforward_days =20,\
               verbose=False):
        """ Event Profiler class construtor 
		Parameters : evenMatrix
			   : startday
			   : endday
		(optional) : lookback_days ( default = 20)
		(optional) : lookforward_days( default = 20)

		eventMatrix is a pandas DataMatrix
		eventMatrix must have the following structure:
		    |IBM |GOOG|XOM |MSFT| GS | JP |
		(d1)|nan |nan | 1  |nan |nan | 1  |
		(d2)|nan | 1  |nan |nan |nan |nan |
		(d3)| 1  |nan | 1  |nan | 1  |nan |
		(d4)|nan |  1 |nan | 1  |nan |nan |
		...................................
		...................................
		Also, d1 = start date
		nan = no information about any event.
		 = status bit(positively confirms the event occurence)
	    """

        self.eventMatrix = eventMatrix
        self.startday = startday
        self.endday = endday
        self.symbols = eventMatrix.columns
        self.lookback_days = lookback_days
        self.lookforward_days = lookforward_days
        self.total_days = lookback_days + lookforward_days + 1
        self.dataobj = da.DataAccess('Yahoo')
        self.timeofday = dt.timedelta(hours=16)
        self.timestamps = du.getNYSEdays(startday, endday, self.timeofday)
        self.verbose = verbose
        if verbose:
            print __name__ + " reading historical data"
        self.close = self.dataobj.get_data(self.timestamps,\
                   self.symbols, "close", verbose=self.verbose)
        self.close = (self.close.fillna()).fillna(method='backfill')
Esempio n. 57
0
def calculate_efficiency(dt_start_date, dt_end_date, s_stock):
    """
    @summary calculates the exit-entry/high-low trade efficiency of a stock from historical data
    @param start_date: entry point for the trade
    @param end_date: exit point for the trade
    @param stock: stock to compute efficiency for
    @return: float representing efficiency
    """
    # Get the data from the data store
    dataobj = de.DataAccess('mysql')

    # Get desired timestamps
    timeofday=dt.timedelta(hours=16)
    timestamps = du.getNYSEdays(dt_start_date,dt_end_date+dt.timedelta(days=1),timeofday)
    historic = dataobj.get_data( timestamps, [s_stock] ,["close"] )[0]
    # print "######"
    # print historic
    hi=numpy.max(historic.values)
    low=numpy.min(historic.values)
    entry=historic.values[0]
    exit_price=historic.values[-1]
    return (((exit_price-entry)/(hi-low))[0])
Esempio n. 58
0
def findEvents(symbols, startday,endday, marketSymbol,verbose=False):

        # Reading the Data for the list of Symbols.	
        timeofday=dt.timedelta(hours=16)
        timestamps = du.getNYSEdays(startday,endday,timeofday)
        dataobj = da.DataAccess('Yahoo')
        if verbose:
                print __name__ + " reading data"
        # Reading the Data
        close = dataobj.get_data(timestamps, symbols, closefield)
        
        # Completing the Data - Removing the NaN values from the Matrix
        close = (close.fillna(method='ffill')).fillna(method='backfill')
        
        # Calculating Daily Returns for the Market
        SPYValues=close[marketSymbol]

        # Calculating the Returns of the Stock Relative to the Market 
        # So if a Stock went up 5% and the Market rised 3%. The the return relative to market is 2% 
        np_eventmat = copy.deepcopy(close)
        for sym in symbols:
                for time in timestamps:
                        np_eventmat[sym][time]=np.NAN

        if verbose:
                print __name__ + " finding events"

        orders = open('orders.csv', 'w')
        
        # Generating the Event Matrix
        for symbol in symbols:
                for i in range(2,len(close[symbol])):
                        if close[symbol][i-1]>=7.0 and close[symbol][i]<7.0: #TRUE if price drops below 5.0
                                np_eventmat[symbol][i] = 1.0  #overwriting by the bit, marking the event
                                j = min([i+5, len(close) -1]) #order reversing day
                                writeOrder(close.index[i], symbol, 'Buy', 100, orders)
                                writeOrder(close.index[j], symbol, 'Sell', 100, orders)
        orders.close()
        return np_eventmat