def fit_tvars(data): models = {} decomps = {} for chan, series in data.iteritems(): print chan model = tvar.TVAR(series, p=p, m0=m0, C0=C0, n0=n0, s0=s0, state_discount=state_discount, var_discount=var_discount) models[chan] = model decomps[chan] = decomp = model.decomp() freqs = {} mods = {} for chan, decomp in decomps.iteritems(): freqs[chan] = decomp['frequency'][0] mods[chan] = decomp['modulus'].max(1) freqs = pn.DataMatrix(freqs) mods = pn.DataMatrix(mods) return freqs, mods
def strat1OverN(dtStart, dtEnd, dFuncArgs): """ @summary Evenly distributed strategy. @param dtStart: Start date for portfolio @param dtEnd: End date for portfolio @param dFuncArgs: Dict of function args passed to the function @return DataFrame corresponding to the portfolio allocations """ if not dFuncArgs.has_key('dmPrice'): print 'Error: Strategy requires dmPrice information' return dmPrice = dFuncArgs['dmPrice'] lNumSym = len(dmPrice.columns) ''' Generate two allocations, one for the start day, one for the end ''' naAlloc = (np.array(np.ones(lNumSym)) * (1.0 / lNumSym)).reshape(1, -1) dfAlloc = pand.DataMatrix(index=[dtStart], data=naAlloc, columns=(dmPrice.columns)) dfAlloc = dfAlloc.append( pand.DataMatrix(index=[dtEnd], data=naAlloc, columns=dmPrice.columns)) dfAlloc['_CASH'] = 0.0 return dfAlloc
def stratGiven(dtStart, dtEnd, dFuncArgs): """ @summary Simplest strategy, weights are provided through args. @param dtStart: Start date for portfolio @param dtEnd: End date for portfolio @param dFuncArgs: Dict of function args passed to the function @return DataFrame corresponding to the portfolio allocations """ if not dFuncArgs.has_key('dmPrice'): print 'Error: Strategy requires dmPrice information' return if not dFuncArgs.has_key('lfWeights'): print 'Error: Strategy requires weight information' return dmPrice = dFuncArgs['dmPrice'] lfWeights = dFuncArgs['lfWeights'] ''' Generate two allocations, one for the start day, one for the end ''' naAlloc = np.array(lfWeights).reshape(1, -1) dfAlloc = pand.DataFrame(index=[dtStart], data=naAlloc, columns=(dmPrice.columns)) dfAlloc = dfAlloc.append( pand.DataMatrix(index=[dtEnd], data=naAlloc, columns=dmPrice.columns)) dfAlloc['_CASH'] = 0.0 return dfAlloc
def getDataMatrixFromData(dataname, partname, symbols, tsstart, tsend): pathpre = os.environ.get('QSDATA') + "/Processed" if dataname == "Norgate": pathsub = "/Norgate/Equities" paths = list() paths.append(pathpre + pathsub + "/US_NASDAQ/") paths.append(pathpre + pathsub + "/US_NYSE/") paths.append(pathpre + pathsub + "/US_NYSE Arca/") paths.append(pathpre + pathsub + "/OTC/") paths.append(pathpre + pathsub + "/US_AMEX/") paths.append(pathpre + pathsub + "/Delisted_US_Recent/") paths.append(pathpre + pathsub + "/US_Delisted/") datastr1 = "/StrategyData" datastr2 = "StrategyData" else: raise Exception("unknown dataname " + str(dataname)) data = da.DataAccess(True, paths, datastr1, datastr2, False, symbols, tsstart, tsend) tss = list(data.getTimestampArray()) start_time = tss[0] end_time = tss[-1] dates = [] for ts in tss: dates.append(tu.epoch2date(ts)) vals = data.getMatrixBetweenTS(symbols, partname, start_time, end_time) syms = list(data.getListOfSymbols()) del data return (pandas.DataMatrix(vals, dates, syms))
def shortingQuickSim(alloc, historic, start_cash, leverage): ''' shortingQuickSim designed to handle shorts, keeps track of leverage keeping it within paramaterized value, ignore alloc cash column ''' del alloc['_CASH'] #fix invalid days historic = historic.fillna(method='backfill') #compute first trade closest = historic[historic.index <= alloc.index[0]] fund_ts = pand.Series([start_cash], index=[closest.index[-1]]) shares = alloc.values[0, :] * fund_ts.values[-1] / closest.values[-1, :] cash_values = pand.DataMatrix([shares * closest.values[-1, :]], index=[closest.index[-1]]) #compute all trades for i in range(1, len(alloc.values[:, 0])): #check leverage #TODO Find out what to use for fundvall below... this_leverage = _compute_leverage(alloc.values[0, :], start_cash) if this_leverage > leverage: print('Warning, leverage of ', this_leverage, \ ' reached, exceeds leverage limit of ', leverage, '\n') #get closest date(previous date) closest = historic[historic.index <= alloc.index[i]] #for loop to calculate fund daily (without rebalancing) for date in closest[closest.index > fund_ts.index[-1]].index: #compute and record total fund value (Sum(closest close * stocks)) fund_ts = fund_ts.append( pand.Series([(closest.xs(date) * shares).sum()], index=[date])) cash_values = cash_values.append( pand.DataMatrix([shares * closest.xs(date)], index=[date])) #distribute fund in accordance with alloc shares = alloc.values[i, :] * (fund_ts.values[-1] / closest.xs(closest.index[-1])) #compute fund value for rest of historic data with final share distribution for date in historic[historic.index > alloc.index[-1]].index: if date in closest.index: fund_ts = fund_ts.append( pand.Series([(closest.xs(date) * shares).sum()], index=[date])) #return fund record return fund_ts
def make_subsampled_dataset(): channames = np.loadtxt('/home/wesm/research/mike/eegdata/channames', dtype=object) data = np.loadtxt('/home/wesm/research/mike/eegdata/eeg19_data.dat') # subsample to match P&W datasets data = data[1999:][::5][:3600] dm = pn.DataMatrix(data, columns=channames) dm.save(eeg_path)
def _make_lag_matrix(x, lags): data = {} columns = [] for i in range(1, 1 + lags): lagstr = 'L%d.' % i lag = x.shift(i).rename(columns=lambda c: lagstr + c) data.update(lag._series) columns.extend(lag.columns) return pn.DataMatrix(data, columns=columns)
def _convert_Matrix(mat): columns = mat.colnames rows = mat.rownames columns = None if _is_null(columns) else list(columns) index = None if _is_null(rows) else list(rows) return pandas.DataMatrix(np.array(mat), index=_check_int(index), columns=columns)
def getquotes(symbol, start, end): quotes = fin.quotes_historical_yahoo(symbol, start, end) dates, open, close, high, low, volume = zip(*quotes) data = { 'open': open, 'close': close, 'high': high, 'low': low, 'volume': volume } dates = pa.Index([dt.datetime.fromordinal(int(d)) for d in dates]) return pa.DataMatrix(data, index=dates)
def rountrip_archive(N, iterations=10): # Create data arr = np.random.randn(N, N) lar = la.larry(arr) dma = pandas.DataMatrix(arr, range(N), range(N)) # filenames filename_numpy = 'c:/temp/numpy.npz' filename_larry = 'c:/temp/archive.hdf5' filename_pandas = 'c:/temp/pandas_tmp' # Delete old files try: os.unlink(filename_numpy) except: pass try: os.unlink(filename_larry) except: pass try: os.unlink(filename_pandas) except: pass # Time a round trip save and load numpy_f = lambda: numpy_roundtrip(filename_numpy, arr, arr) numpy_time = timeit(numpy_f, iterations) / iterations larry_f = lambda: larry_roundtrip(filename_larry, lar, lar) larry_time = timeit(larry_f, iterations) / iterations pandas_f = lambda: pandas_roundtrip(filename_pandas, dma, dma) pandas_time = timeit(pandas_f, iterations) / iterations print 'Numpy (npz) %7.4f seconds' % numpy_time print 'larry (HDF5) %7.4f seconds' % larry_time print 'pandas (HDF5) %7.4f seconds' % pandas_time
def forecast(self, steps=1): """ Produce dynamic forecast Parameters ---------- steps Returns ------- forecasts : pandas.DataMatrix """ output = np.empty((self.T - steps, self.neqs)) y_values = self.y.values y_index_map = self.y.index.indexMap result_index_map = self.result_index.indexMap coefs = self._coefs_raw intercepts = self._intercepts_raw # can only produce this many forecasts forc_index = self.result_index[steps:] for i, date in enumerate(forc_index): # TODO: check that this does the right thing in weird cases... idx = y_index_map[date] - steps result_idx = result_index_map[date] - steps y_slice = y_values[:idx] forcs = _model.forecast(y_slice, coefs[result_idx], intercepts[result_idx], steps) output[i] = forcs[-1] return pn.DataMatrix(output, index=forc_index, columns=self.names)
def r2(self): """Returns the r-squared values.""" data = dict((eq, r.r2) for eq, r in self.equations.iteritems()) return pn.DataMatrix(data)
def resid(self): data = {} for eq, result in self.equations.iteritems(): data[eq] = result.resid return pn.DataMatrix(data)
def nobs(self): # Stub, do I need this? data = dict((eq, r.nobs) for eq, r in self.equations.iteritems()) return pn.DataMatrix(data)
print '\nUsing la' import la dta = la.larry(s.data, label=[range(len(s.data))]) dat = la.larry(s.dates.tolist(), label=[range(len(s.data))]) s2 = ts.time_series(dta.group_mean(dat).x, dates=ts.date_array(dat.x, freq="M")) s2u = ts.remove_duplicated_dates(s2) print repr(s) print dat print repr(s2) print repr(s2u) print '\nUsing pandas' import pandas pdta = pandas.DataMatrix(s.data, np.arange(len(s.data)), [1]) pa = pdta.groupby(dict(zip(np.arange(len(s.data)), s.dates.tolist()))).aggregate(np.mean) s3 = ts.time_series(pa.values.ravel(), dates=ts.date_array(pa.index.tolist(), freq="M")) print pa print repr(s3) print '\nUsing tabular' import tabular as tb X = tb.tabarray(array=s.torecords(), dtype=s.torecords().dtype) tabx = X.aggregate(On=['_dates'], AggFuncDict={ '_data': np.mean, '_mask': np.all
def fx_rates_returns(): path = os.path.join(data_path, 'returns_exrates.dat') data = np.loadtxt(path) index = pn.DateRange(_start_date, periods=len(data)) return pn.DataMatrix(data, index=index, columns=_rates_cols)
def stratMark(dtStart, dtEnd, dFuncArgs): """ @summary Markovitz strategy, generates a curve and then chooses a point on it. @param dtStart: Start date for portfolio @param dtEnd: End date for portfolio @param dFuncArgs: Dict of function args passed to the function @return DataFrame corresponding to the portfolio allocations """ if not dFuncArgs.has_key('dmPrice'): print 'Error:', stratMark.__name__, 'requires dmPrice information' return if not dFuncArgs.has_key('sPeriod'): print 'Error:', stratMark.__name__, 'requires rebalancing period' return if not dFuncArgs.has_key('lLookback'): print 'Error:', stratMark.__name__, 'requires lookback' return if not dFuncArgs.has_key('sMarkPoint'): print 'Error:', stratMark.__name__, 'requires markowitz point to choose' return ''' Optional variables ''' if not dFuncArgs.has_key('bAddAlpha'): bAddAlpha = False else: bAddAlpha = dFuncArgs['bAddAlpha'] dmPrice = dFuncArgs['dmPrice'] sPeriod = dFuncArgs['sPeriod'] lLookback = dFuncArgs['lLookback'] sMarkPoint = dFuncArgs['sMarkPoint'] ''' Select rebalancing dates ''' drNewRange = pand.DateRange(dtStart, dtEnd, timeRule=sPeriod) + pand.DateOffset(hours=16) dfAlloc = pand.DataMatrix() ''' Go through each rebalance date and calculate an efficient frontier for each ''' for i, dtDate in enumerate(drNewRange): dtStart = dtDate - pand.DateOffset(days=lLookback) if (dtStart < dmPrice.index[0]): print 'Error, not enough data to rebalance' continue naRets = dmPrice.ix[dtStart:dtDate].values.copy() tsu.returnize1(naRets) tsu.fillforward(naRets) tsu.fillbackward(naRets) ''' Add alpha to returns ''' if bAddAlpha: if i < len(drNewRange) - 1: naFutureRets = dmPrice.ix[dtDate:drNewRange[i + 1]].values.copy() tsu.returnize1(naFutureRets) tsu.fillforward(naFutureRets) tsu.fillbackward(naFutureRets) naAvg = np.mean(naFutureRets, axis=0) ''' make a mix of past/future rets ''' for i in range(naRets.shape[0]): naRets[i, :] = (naRets[i, :] + (naAvg * 0.05)) / 1.05 ''' Generate the efficient frontier ''' (lfReturn, lfStd, lnaPortfolios) = getFrontier(naRets, fUpper=0.2, fLower=0.01) lInd = 0 ''' plt.clf() plt.plot( lfStd, lfReturn)''' if (sMarkPoint == 'Sharpe'): ''' Find portfolio with max sharpe ''' fMax = -1E300 for i in range(len(lfReturn)): fShrp = (lfReturn[i] - 1) / (lfStd[i]) if fShrp > fMax: fMax = fShrp lInd = i ''' plt.plot( [lfStd[lInd]], [lfReturn[lInd]], 'ro') plt.draw() time.sleep(2) plt.show()''' elif (sMarkPoint == 'MinVar'): ''' use portfolio with minimum variance ''' fMin = 1E300 for i in range(len(lfReturn)): if lfStd[i] < fMin: fMin = lfStd[i] lInd = i elif (sMarkPoint == 'MaxRet'): ''' use Portfolio with max returns (not really markovitz) ''' lInd = len(lfReturn) - 1 elif (sMarkPoint == 'MinRet'): ''' use Portfolio with min returns (not really markovitz) ''' lInd = 0 else: print 'Warning: invalid sMarkPoint' '' return ''' Generate allocation based on selected portfolio ''' naAlloc = (np.array(lnaPortfolios[lInd]).reshape(1, -1)) dmNew = pand.DataMatrix(index=[dtDate], data=naAlloc, columns=(dmPrice.columns)) dfAlloc = dfAlloc.append(dmNew) dfAlloc['_CASH'] = 0.0 return dfAlloc
# Setup import numpy as np import pandas import la N = 1000 K = 50 arr1 = np.random.randn(N, K) arr2 = np.random.randn(N, K) idx1 = range(N) idx2 = range(K) # pandas dma1 = pandas.DataMatrix(arr1, idx1, idx2) dma2 = pandas.DataMatrix(arr2, idx1[::-1], idx2[::-1]) # larry lar1 = la.larry(arr1, [idx1, idx2]) lar2 = la.larry(arr2, [idx1[::-1], idx2[::-1]]) for i in range(100): result = lar1 + lar2