def __init__(self, models): self.models = models self.names = sorted(models.keys()) mod = self.models.values()[0] self.pred_like = DataMatrix(dict( (k, v.pred_like) for k, v in models.iteritems()), index=mod.dates)
def _read_matrix(group): index = _read_index(group, 'index') columns = _read_index(group, 'columns') values = group.values[:] objects = None if hasattr(group, 'obj_columns'): obj_columns = _read_index(group, 'columns') obj_values = group.obj_values[:] objects = DataMatrix(obj_values, index=index, columns=obj_columns) return DataMatrix(values, index=index, columns=columns, objects=objects)
def __init__(self, models): self.models = models self.names = sorted(models.keys()) mod = self.models.values()[0] self.pred_like = DataMatrix(dict((k, v.pred_like) for k, v in models.iteritems()), index=mod.dates)
def getQuotes(symbol, start, end): quotes = fin.quotes_historical_yahoo(symbol, start, end) dates, open, close, high, low, volume = zip(*quotes) data = { 'open': open, 'close': close, 'high': high, 'low': low, 'volume': volume } dates = Index([datetime.fromordinal(int(d)) for d in dates]) return DataMatrix(data, index=dates)
def __init__(self, phi): self.phi = phi self.p = len(phi) (self.modulus, self.frequency, self.wavelength, self.H) = self._compute_decomp() self.result = DataMatrix( { 'wavelength': self.wavelength, 'modulus': self.modulus, 'frequency': self.frequency }, columns=['modulus', 'wavelength', 'frequency'])
class DLMMixture(object): """ Mixture of DLMs (Class I type model) Parameters ---------- models : dict Notes ----- cf. W&H Section 12.2 """ def __init__(self, models): self.models = models self.names = sorted(models.keys()) mod = self.models.values()[0] self.pred_like = DataMatrix(dict( (k, v.pred_like) for k, v in models.iteritems()), index=mod.dates) @property def post_model_prob(self): cumprod = self.pred_like.cumprod() return cumprod / cumprod.sum(1) def plot_post_prob(self): ratio = self.post_model_prob ratio.plot(subplots=True, sharey=True) ax = plt.gca() ax.set_ylim([0, 1]) def get_weights(self, t): weights = self.post_model_prob return weights.xs(weights.index[t]) def plot_mu_density(self, t, index=0, support_thresh=0.1): """ Plot posterior densities for single model parameter over the set of mixture components Parameters ---------- t : int time index, relative to response variable index : int parameter index to plot Notes ----- cf. West & Harrison Figure 12.3. Automatically annotating individual component curves would probably be difficult. """ ix = index dists = {} for name in self.names: model = self.models[name] df = model.df[t] mode = model.mu_mode[t + 1, ix] scale = np.sqrt(model.mu_scale[t + 1, ix, ix]) dists[name] = stats.t(df, loc=mode, scale=scale) plot_mixture(dists, self.get_weights(t), support_thresh=support_thresh) def plot_forc_density(self, t, support_thresh=0.1): """ Plot posterior densities for 1-step forecasts Parameters ---------- t : int time index, relative to response variable Notes ----- cf. West & Harrison Figure 12.4. """ dists = {} for name in self.names: model = self.models[name] df = model.df[t] mode = model.forecast[t] scale = np.sqrt(model.forc_var[t]) dists[name] = stats.t(df, loc=mode, scale=scale) plot_mixture(dists, self.get_weights(t), support_thresh=support_thresh)
# Set start and end dates t = map(int, sys.argv[1].split('-')) startday = dt.datetime(t[2], t[0], t[1]) t = map(int, sys.argv[2].split('-')) endday = dt.datetime(t[2], t[0], t[1]) # Get desired timestamps timeofday = dt.timedelta(hours=16) timestamps = du.getNYSEdays(startday, endday, timeofday) # Get the data from the data store dataobj = da.DataAccess('Norgate') historic = dataobj.get_data(timestamps, symbols, "close") # Setup the allocation table alloc_val = random.random() alloc = DataMatrix(index=[historic.index[0]], data=[alloc_val], columns=symbols) for date in range(1, len(historic.index)): alloc_val = 1 #random.random() alloc = alloc.append( DataMatrix(index=[historic.index[date]], data=[alloc_val], columns=[symbols[0]])) alloc['_CASH'] = 1 - alloc[symbols[0]] # Dump to pkl file output = open(sys.argv[3], "wb") cPickle.dump(alloc, output)
def getQuotes(symbol, start, end): quotes = fin.quotes_historical_yahoo(symbol, start, end) dates, open, close, high, low, volume = zip(*quotes) data = {"open": open, "close": close, "high": high, "low": low, "volume": volume} dates = Index([datetime.fromordinal(int(d)) for d in dates]) return DataMatrix(data, index=dates) msft = getQuotes("MSFT", startDate, endDate) aapl = getQuotes("AAPL", startDate, endDate) goog = getQuotes("GOOG", startDate, endDate) ibm = getQuotes("IBM", startDate, endDate) px = DataMatrix({"MSFT": msft["close"], "IBM": ibm["close"], "GOOG": goog["close"], "AAPL": aapl["close"]}) returns = px / px.shift(1) - 1 # Select dates subIndex = ibm.index[(ibm["close"] > 95) & (ibm["close"] < 100)] msftOnSameDates = msft.reindex(subIndex) # Insert columns msft["hi-lo spread"] = msft["high"] - msft["low"] ibm["hi-lo spread"] = ibm["high"] - ibm["low"] # Aggregate monthly
print "Running One Stock strategy from "+sys.argv[1] +" to "+sys.argv[2] # Use google symbol symbols = list(['SPY']) # Set start and end dates t = map(int,sys.argv[1].split('-')) startday = dt.datetime(t[2],t[0],t[1]) t = map(int,sys.argv[2].split('-')) endday = dt.datetime(t[2],t[0],t[1]) # Get desired timestamps timeofday=dt.timedelta(hours=16) timestamps = du.getNYSEdays(startday,endday,timeofday) # Get the data from the data store dataobj = da.DataAccess('Norgate') historic = dataobj.get_data(timestamps, symbols, "close") # Setup the allocation table alloc_val= random.random() alloc=DataMatrix(index=[historic.index[0]], data=[alloc_val], columns=symbols) for date in range(1, len(historic.index)): alloc_val=1 #random.random() alloc=alloc.append(DataMatrix(index=[historic.index[date]], data=[alloc_val], columns=[symbols[0]])) alloc['_CASH']=1-alloc[symbols[0]] # Dump to pkl file output=open(sys.argv[3],"wb") cPickle.dump(alloc, output)
class DLMMixture(object): """ Mixture of DLMs (Class I type model) Parameters ---------- models : dict Notes ----- cf. W&H Section 12.2 """ def __init__(self, models): self.models = models self.names = sorted(models.keys()) mod = self.models.values()[0] self.pred_like = DataMatrix(dict((k, v.pred_like) for k, v in models.iteritems()), index=mod.dates) @property def post_model_prob(self): cumprod = self.pred_like.cumprod() return cumprod / cumprod.sum(1) def plot_post_prob(self): ratio = self.post_model_prob ratio.plot(subplots=True, sharey=True) ax = plt.gca() ax.set_ylim([0, 1]) def get_weights(self, t): weights = self.post_model_prob return weights.xs(weights.index[t]) def plot_mu_density(self, t, index=0, support_thresh=0.1): """ Plot posterior densities for single model parameter over the set of mixture components Parameters ---------- t : int time index, relative to response variable index : int parameter index to plot Notes ----- cf. West & Harrison Figure 12.3. Automatically annotating individual component curves would probably be difficult. """ ix = index dists = {} for name in self.names: model = self.models[name] df = model.df[t] mode = model.mu_mode[t + 1, ix] scale = np.sqrt(model.mu_scale[t + 1, ix, ix]) dists[name] = stats.t(df, loc=mode, scale=scale) plot_mixture(dists, self.get_weights(t), support_thresh=support_thresh) def plot_forc_density(self, t, support_thresh=0.1): """ Plot posterior densities for 1-step forecasts Parameters ---------- t : int time index, relative to response variable Notes ----- cf. West & Harrison Figure 12.4. """ dists = {} for name in self.names: model = self.models[name] df = model.df[t] mode = model.forecast[t] scale = np.sqrt(model.forc_var[t]) dists[name] = stats.t(df, loc=mode, scale=scale) plot_mixture(dists, self.get_weights(t), support_thresh=support_thresh)
ibm['hi-lo spread'] = ibm['high'] - ibm['low'] # Aggregate monthly def toMonthly(frame, how): offset = BMonthEnd() return frame.groupby(offset.rollforward).aggregate(how) msftMonthly = toMonthly(msft, np.mean) ibmMonthly = toMonthly(ibm, np.mean) # Statistics stdev = DataMatrix({'MSFT': msft.std(), 'IBM': ibm.std()}) # Arithmetic ratios = ibm / msft # Works with different indices ratio = ibm / ibmMonthly monthlyRatio = ratio.reindex(ibmMonthly.index) # Ratio relative to past month average filledRatio = ibm / ibmMonthly.reindex(ibm.index, fillMethod='pad')
'close' : close, 'high' : high, 'low' : low, 'volume' : volume } dates = Index([datetime.fromordinal(int(d)) for d in dates]) return DataMatrix(data, index=dates) msft = getQuotes('MSFT', startDate, endDate) aapl = getQuotes('AAPL', startDate, endDate) goog = getQuotes('GOOG', startDate, endDate) ibm = getQuotes('IBM', startDate, endDate) px = DataMatrix({'MSFT' : msft['close'], 'IBM' : ibm['close'], 'GOOG' : goog['close'], 'AAPL' : aapl['close']}) returns = px / px.shift(1) - 1 # Select dates subIndex = ibm.index[(ibm['close'] > 95) & (ibm['close'] < 100)] msftOnSameDates = msft.reindex(subIndex) # Insert columns msft['hi-lo spread'] = msft['high'] - msft['low'] ibm['hi-lo spread'] = ibm['high'] - ibm['low'] # Aggregate monthly