def black_scholes(current, strike, maturity, rate, volatility): d1 = 1.0 / (volatility * sqrt(maturity)) * ( log(current / strike) + (rate + volatility ** 2 / 2) * (maturity) ) d2 = 1.0 / (volatility * sqrt(maturity)) * ( log(current / strike) + (rate + volatility ** 2 / 2) * (maturity) ) - volatility * maturity call = norm_cdf(d1) * current - \ norm_cdf(d2) * strike * exp(-rate * maturity) put = norm_cdf(-d2) * strike * exp(-rate * maturity) - \ norm_cdf(-d1) * current return put, call
def kneighbors(self, X, n_neighbors=None): """Finds the K-neighbors of a point. Returns distance Parameters ---------- X : array-like, last dimension same as that of fit data The new point. n_neighbors : int Number of neighbors to get (default is the value passed to the constructor). Returns ------- dist : array Array representing the lengths to point, only present if return_distance=True ind : array Indices of the nearest points in the population matrix. """ if n_neighbors is not None: self.n_neighbors = n_neighbors if isinstance(X, np.ndarray): X = expr.from_numpy(X) if self.algorithm in ('auto', 'brute'): X_broadcast = expr.reshape(X, (X.shape[0], 1, X.shape[1])) fit_X_broadcast = expr.reshape(self.X, (1, self.X.shape[0], self.X.shape[1])) distances = expr.sum((X_broadcast - fit_X_broadcast) ** 2, axis=2) neigh_ind = expr.argsort(distances, axis=1) neigh_ind = neigh_ind[:, :n_neighbors].optimized().glom() neigh_dist = expr.sort(distances, axis=1) neigh_dist = expr.sqrt(neigh_dist[:, :n_neighbors]).optimized().glom() return neigh_dist, neigh_ind else: results = self.X.foreach_tile(mapper_fn=_knn_mapper, kw={'X': self.X, 'Q': X, 'n_neighbors': self.n_neighbors, 'algorithm': self.algorithm}) dist = None ind = None """ Get the KNN candidates for each tile of X, then find out the real KNN """ for k, v in results.iteritems(): if dist is None: dist = v[0] ind = v[1] else: dist = np.concatenate((dist, v[0]), axis=1) ind = np.concatenate((ind, v[1]), axis=1) mask = np.argsort(dist, axis=1)[:, :self.n_neighbors] new_dist = np.array([dist[i][mask[i]] for i, r in enumerate(dist)]) new_ind = np.array([ind[i][mask[i]] for i, r in enumerate(ind)]) return new_dist, new_ind
def fit(data, labels, label_size, alpha=1.0): ''' Train standard naive bayes model. Args: data(Expr): documents to be trained. labels(Expr): the correct labels of the training data. label_size(int): the number of different labels. alpha(float): alpha parameter of naive bayes model. ''' labels = expr.force(labels) # calc document freq df = expr.reduce(data, axis=0, dtype_fn=lambda input: input.dtype, local_reduce_fn=lambda ex, data, axis: (data > 0).sum(axis), accumulate_fn=np.add, tile_hint=(data.shape[1],)) idf = expr.log(data.shape[0] * 1.0 / (df + 1)) + 1 # Normalized Frequency for a feature in a document is calculated by dividing the feature frequency # by the root mean square of features frequencies in that document square_sum = expr.reduce(data, axis=1, dtype_fn=lambda input: input.dtype, local_reduce_fn=lambda ex, data, axis: np.square(data).sum(axis), accumulate_fn=np.add, tile_hint=(data.shape[0],)) rms = expr.sqrt(square_sum * 1.0 / data.shape[1]) # calculate weight normalized Tf-Idf data = data / rms.reshape((data.shape[0], 1)) * idf.reshape((1, data.shape[1])) # add up all the feature vectors with the same labels sum_instance_by_label = expr.ndarray((label_size, data.shape[1]), dtype=np.float64, reduce_fn=np.add, tile_hint=(label_size / len(labels.tiles), data.shape[1])) sum_instance_by_label = expr.shuffle(data, _sum_instance_by_label_mapper, target=sum_instance_by_label, kw={'labels': labels, 'label_size': label_size}) # sum up all the weights for each label from the previous step weights_per_label = expr.sum(sum_instance_by_label, axis=1, tile_hint=(label_size,)) # generate naive bayes per_label_and_feature weights weights_per_label_and_feature = expr.shuffle(sum_instance_by_label, _naive_bayes_mapper, kw={'weights_per_label': weights_per_label, 'alpha':alpha}) return {'scores_per_label_and_feature': weights_per_label_and_feature.force(), 'scores_per_label': weights_per_label.force(), }
def fit(data, labels, label_size, alpha=1.0): ''' Train standard naive bayes model. Args: data(Expr): documents to be trained. labels(Expr): the correct labels of the training data. label_size(int): the number of different labels. alpha(float): alpha parameter of naive bayes model. ''' # calc document freq df = expr.reduce(data, axis=0, dtype_fn=lambda input: input.dtype, local_reduce_fn=lambda ex, data, axis: (data > 0).sum(axis), accumulate_fn=np.add) idf = expr.log(data.shape[0] * 1.0 / (df + 1)) + 1 # Normalized Frequency for a feature in a document is calculated by dividing the feature frequency # by the root mean square of features frequencies in that document square_sum = expr.reduce(data, axis=1, dtype_fn=lambda input: input.dtype, local_reduce_fn=lambda ex, data, axis: np.square(data).sum(axis), accumulate_fn=np.add) rms = expr.sqrt(square_sum * 1.0 / data.shape[1]) # calculate weight normalized Tf-Idf data = data / rms.reshape((data.shape[0], 1)) * idf.reshape((1, data.shape[1])) # add up all the feature vectors with the same labels #weights_per_label_and_feature = expr.ndarray((label_size, data.shape[1]), dtype=np.float64) #for i in range(label_size): # i_mask = (labels == i) # weights_per_label_and_feature = expr.assign(weights_per_label_and_feature, np.s_[i, :], expr.sum(data[i_mask, :], axis=0)) weights_per_label_and_feature = expr.shuffle(expr.retile(data, tile_hint=util.calc_tile_hint(data, axis=0)), _sum_instance_by_label_mapper, target=expr.ndarray((label_size, data.shape[1]), dtype=np.float64, reduce_fn=np.add), kw={'labels': labels, 'label_size': label_size}, cost_hint={hash(labels):{'00':0, '01':np.prod(labels.shape)}}) # sum up all the weights for each label from the previous step weights_per_label = expr.sum(weights_per_label_and_feature, axis=1) # generate naive bayes per_label_and_feature weights weights_per_label_and_feature = expr.log((weights_per_label_and_feature + alpha) / (weights_per_label.reshape((weights_per_label.shape[0], 1)) + alpha * weights_per_label_and_feature.shape[1])) return {'scores_per_label_and_feature': weights_per_label_and_feature.optimized().force(), 'scores_per_label': weights_per_label.optimized().force(), }
def _get_norm_of_each_item(self, rating_table): """Get norm of each item vector. For each Item, caculate the norm the item vector. Parameters ---------- rating_table : Spartan matrix of shape(M, N). Each column represents the rating of the item. Returns --------- item_norm: Spartan matrix of shape(N,). item_norm[i] equals || rating_table[:,i] || """ return expr.sqrt(expr.sum(expr.multiply(rating_table, rating_table), axis=0))
def _get_norm_of_each_item(self, rating_table): """Get norm of each item vector. For each Item, caculate the norm the item vector. Parameters ---------- rating_table : Spartan matrix of shape(M, N). Each column represents the rating of the item. Returns --------- item_norm: Spartan matrix of shape(N,). item_norm[i] equals || rating_table[:,i] || """ return expr.sqrt( expr.sum(expr.multiply(rating_table, rating_table), axis=0))
def move(galaxy, dt): '''Move the bodies. First find forces and change velocity and then move positions. ''' # `.reshape(add_tuple(a, 1))` is the spartan way of doing # `ndarray[:, np.newaxis]` in numpy. While syntactically different, both # add a dimension of length 1 after the other dimensions. # e.g. (5, 5) becomes (5, 5, 1) # Calculate all distances component wise (with sign). dx_new = galaxy['x'].reshape(add_tuple(galaxy['x'].shape, [1])) dy_new = galaxy['y'].reshape(add_tuple(galaxy['y'].shape, [1])) dz_new = galaxy['z'].reshape(add_tuple(galaxy['z'].shape, [1])) dx = (galaxy['x'] - dx_new) * -1 dy = (galaxy['y'] - dy_new) * -1 dz = (galaxy['z'] - dz_new) * -1 # Euclidean distances (all bodies). r = sqrt(dx**2 + dy**2 + dz**2) r = set_diagonal(r, 1.0) # Prevent collision. mask = r < 1.0 #r = r * ~mask + 1.0 * mask r = spartan.map((r, mask), lambda x, m: x * ~m + 1.0 * m) m = galaxy['m'].reshape(add_tuple(galaxy['m'].shape, [1])) # Calculate the acceleration component wise. fx = G * m * dx / r**3 fy = G * m * dy / r**3 fz = G * m * dz / r**3 # Set the force (acceleration) a body exerts on itself to zero. fx = set_diagonal(fx, 0.0) fy = set_diagonal(fy, 0.0) fz = set_diagonal(fz, 0.0) galaxy['vx'] += dt * expr.sum(fx, axis=0) galaxy['vy'] += dt * expr.sum(fy, axis=0) galaxy['vz'] += dt * expr.sum(fz, axis=0) galaxy['x'] += dt * galaxy['vx'] galaxy['y'] += dt * galaxy['vy'] galaxy['z'] += dt * galaxy['vz']
def move(galaxy, dt): '''Move the bodies. First find forces and change velocity and then move positions. ''' # `.reshape(add_tuple(a, 1))` is the spartan way of doing # `ndarray[:, np.newaxis]` in numpy. While syntactically different, both # add a dimension of length 1 after the other dimensions. # e.g. (5, 5) becomes (5, 5, 1) # Calculate all distances component wise (with sign). dx_new = galaxy['x'].reshape(add_tuple(galaxy['x'].shape, [1])) dy_new = galaxy['y'].reshape(add_tuple(galaxy['y'].shape, [1])) dz_new = galaxy['z'].reshape(add_tuple(galaxy['z'].shape, [1])) dx = (galaxy['x'] - dx_new) * -1 dy = (galaxy['y'] - dy_new) * -1 dz = (galaxy['z'] - dz_new) * -1 # Euclidean distances (all bodies). r = sqrt(dx**2 + dy**2 + dz**2) r = set_diagonal(r, 1.0) # Prevent collision. mask = r < 1.0 #r = r * ~mask + 1.0 * mask r = spartan.map((r, mask), lambda x, m: x * ~m + 1.0 * m) m = galaxy['m'].reshape(add_tuple(galaxy['m'].shape, [1])) # Calculate the acceleration component wise. fx = G*m*dx / r**3 fy = G*m*dy / r**3 fz = G*m*dz / r**3 # Set the force (acceleration) a body exerts on itself to zero. fx = set_diagonal(fx, 0.0) fy = set_diagonal(fy, 0.0) fz = set_diagonal(fz, 0.0) galaxy['vx'] += dt*expr.sum(fx, axis=0) galaxy['vy'] += dt*expr.sum(fy, axis=0) galaxy['vz'] += dt*expr.sum(fz, axis=0) galaxy['x'] += dt*galaxy['vx'] galaxy['y'] += dt*galaxy['vy'] galaxy['z'] += dt*galaxy['vz']
def _get_norm_of_each_item(self, rating_table): """Get norm of each item vector. For each Item, caculate the norm the item vector. Parameters ---------- rating_table : Spartan matrix of shape(M, N). Each column represents the rating of the item. Returns --------- item_norm: Spartan matrix of shape(N,). item_norm[i] equals || rating_table[:,i] || """ ctx = blob_ctx.get() if isinstance(rating_table, array.distarray.DistArray): rating_table = expr.lazify(rating_table) res = expr.sqrt(expr.sum(expr.multiply(rating_table, rating_table), axis=0, tile_hint=(rating_table.shape[1] / ctx.num_workers, ))) return res.force()
def center_data(X, y, fit_intercept, normalize=False): """ Centers data to have mean zero along axis 0. This is here because nearly all linear models will want their data to be centered. """ if fit_intercept: X_mean = X.mean(axis = 0) X_mean = expr.reshape(X_mean, (1, X_mean.shape[0])) X -= X_mean if normalize: X_std = expr.sqrt(expr.sum(X ** 2, axis=0)).force() X_std[X_std == 0] = 1 X /= X_std else: X_std = expr.ones(X.shape[1]) y_mean = y.mean(axis=0) y -= y_mean else: X_mean = expr.zeros(X.shape[1]) X_std = expr.ones(X.shape[1]) y_mean = 0. if y.ndim == 1 else expr.zeros(y.shape[1], dtype=X.dtype) return X, y, X_mean, y_mean, X_std
def simulate(ts_all, te_all, lamb_all, num_paths): '''Range over a number of independent products. :param ts_all: DistArray Start dates for a series of swaptions. :param te_all: DistArray End dates for a series of swaptions. :param lamb_all: DistArray Parameter values for a series of swaptions. :param num_paths: Int Number of paths used in random walk. :rtype: DistArray ''' swaptions = [] i = 0 for ts_a, te, lamb in zip(ts_all, te_all, lamb_all): for ts in ts_a: #start = time() print i time_structure = arange(None, 0, ts + DELTA, DELTA) maturity_structure = arange(None, 0, te, DELTA) ############# MODEL ############### # Variance reduction technique - Antithetic Variates. eps_tmp = randn(time_structure.shape[0] - 1, num_paths) eps = concatenate(eps_tmp, -eps_tmp, 1) # Forward LIBOR rates for the construction of the spot measure. f_kk = zeros((time_structure.shape[0], 2*num_paths)) f_kk = assign(f_kk, np.s_[0, :], F_0) # Plane kxN of simulated LIBOR rates. f_kn = ones((maturity_structure.shape[0], 2*num_paths))*F_0 # Simulations of the plane f_kn for each time step. for t in xrange(1, time_structure.shape[0]): f_kn_new = f_kn[1:, :]*exp(lamb*mu(f_kn, lamb)*DELTA-0.5*lamb*lamb * DELTA + lamb*eps[t - 1, :]*sqrt(DELTA)) f_kk = assign(f_kk, np.s_[t, :], f_kn_new[0]) f_kn = f_kn_new ############## PRODUCT ############### # Value of zero coupon bonds. zcb = ones((int((te-ts)/DELTA)+1, 2*num_paths)) f_kn_modified = 1 + DELTA*f_kn for j in xrange(zcb.shape[0] - 1): zcb = assign(zcb, np.s_[j + 1], zcb[j] / f_kn_modified[j]) # Swaption price at maturity. last_row = zcb[zcb.shape[0] - 1, :].reshape((20, )) swap_ts = maximum(1 - last_row - THETA*DELTA*expr.sum(zcb[1:], 0), 0) # Spot measure used for discounting. b_ts = ones((2*num_paths, )) tmp = 1 + DELTA * f_kk for j in xrange(int(ts/DELTA)): b_ts *= tmp[j].reshape((20, )) # Swaption price at time 0. swaption = swap_ts/b_ts # Save expected value in bps and std. me = mean((swaption[0:num_paths] + swaption[num_paths:])/2) * 10000 st = std((swaption[0:num_paths] + swaption[num_paths:])/2)/sqrt(num_paths)*10000 swaptions.append([me.optimized().force(), st.optimized().force()]) #print time() - start i += 1 return swaptions
def fit(data, labels, label_size, alpha=1.0): ''' Train standard naive bayes model. Args: data(Expr): documents to be trained. labels(Expr): the correct labels of the training data. label_size(int): the number of different labels. alpha(float): alpha parameter of naive bayes model. ''' # calc document freq df = expr.reduce(data, axis=0, dtype_fn=lambda input: input.dtype, local_reduce_fn=lambda ex, data, axis: (data > 0).sum(axis), accumulate_fn=np.add) idf = expr.log(data.shape[0] * 1.0 / (df + 1)) + 1 # Normalized Frequency for a feature in a document is calculated by dividing the feature frequency # by the root mean square of features frequencies in that document square_sum = expr.reduce( data, axis=1, dtype_fn=lambda input: input.dtype, local_reduce_fn=lambda ex, data, axis: np.square(data).sum(axis), accumulate_fn=np.add) rms = expr.sqrt(square_sum * 1.0 / data.shape[1]) # calculate weight normalized Tf-Idf data = data / rms.reshape((data.shape[0], 1)) * idf.reshape( (1, data.shape[1])) # add up all the feature vectors with the same labels #weights_per_label_and_feature = expr.ndarray((label_size, data.shape[1]), dtype=np.float64) #for i in range(label_size): # i_mask = (labels == i) # weights_per_label_and_feature = expr.assign(weights_per_label_and_feature, np.s_[i, :], expr.sum(data[i_mask, :], axis=0)) weights_per_label_and_feature = expr.shuffle( expr.retile(data, tile_hint=util.calc_tile_hint(data, axis=0)), _sum_instance_by_label_mapper, target=expr.ndarray((label_size, data.shape[1]), dtype=np.float64, reduce_fn=np.add), kw={ 'labels': labels, 'label_size': label_size }, cost_hint={hash(labels): { '00': 0, '01': np.prod(labels.shape) }}) # sum up all the weights for each label from the previous step weights_per_label = expr.sum(weights_per_label_and_feature, axis=1) # generate naive bayes per_label_and_feature weights weights_per_label_and_feature = expr.log( (weights_per_label_and_feature + alpha) / (weights_per_label.reshape((weights_per_label.shape[0], 1)) + alpha * weights_per_label_and_feature.shape[1])) return { 'scores_per_label_and_feature': weights_per_label_and_feature.optimized().force(), 'scores_per_label': weights_per_label.optimized().force(), }
def simulate(ts_all, te_all, lamb_all, num_paths): """Range over a number of independent products. :param ts_all: DistArray Start dates for a series of swaptions. :param te_all: DistArray End dates for a series of swaptions. :param lamb_all: DistArray Parameter values for a series of swaptions. :param num_paths: Int Number of paths used in random walk. :rtype: DistArray """ swaptions = [] i = 0 for ts_a, te, lamb in zip(ts_all, te_all, lamb_all): for ts in ts_a: # start = time() print i time_structure = arange(None, 0, ts + DELTA, DELTA) maturity_structure = arange(None, 0, te, DELTA) ############# MODEL ############### # Variance reduction technique - Antithetic Variates. eps_tmp = randn(time_structure.shape[0] - 1, num_paths) eps = concatenate(eps_tmp, -eps_tmp, 1) # Forward LIBOR rates for the construction of the spot measure. f_kk = zeros((time_structure.shape[0], 2 * num_paths)) f_kk = assign(f_kk, np.s_[0, :], F_0) # Plane kxN of simulated LIBOR rates. f_kn = ones((maturity_structure.shape[0], 2 * num_paths)) * F_0 # Simulations of the plane f_kn for each time step. for t in xrange(1, time_structure.shape[0]): f_kn_new = f_kn[1:, :] * exp( lamb * mu(f_kn, lamb) * DELTA - 0.5 * lamb * lamb * DELTA + lamb * eps[t - 1, :] * sqrt(DELTA) ) f_kk = assign(f_kk, np.s_[t, :], f_kn_new[0]) f_kn = f_kn_new ############## PRODUCT ############### # Value of zero coupon bonds. zcb = ones((int((te - ts) / DELTA) + 1, 2 * num_paths)) f_kn_modified = 1 + DELTA * f_kn for j in xrange(zcb.shape[0] - 1): zcb = assign(zcb, np.s_[j + 1], zcb[j] / f_kn_modified[j]) # Swaption price at maturity. last_row = zcb[zcb.shape[0] - 1, :].reshape((20,)) swap_ts = maximum(1 - last_row - THETA * DELTA * expr.sum(zcb[1:], 0), 0) # Spot measure used for discounting. b_ts = ones((2 * num_paths,)) tmp = 1 + DELTA * f_kk for j in xrange(int(ts / DELTA)): b_ts *= tmp[j].reshape((20,)) # Swaption price at time 0. swaption = swap_ts / b_ts # Save expected value in bps and std. me = mean((swaption[0:num_paths] + swaption[num_paths:]) / 2) * 10000 st = std((swaption[0:num_paths] + swaption[num_paths:]) / 2) / sqrt(num_paths) * 10000 swaptions.append([me.optimized().force(), st.optimized().force()]) # print time() - start i += 1 return swaptions
def kneighbors(self, X, n_neighbors=None): """Finds the K-neighbors of a point. Returns distance Parameters ---------- X : array-like, last dimension same as that of fit data The new point. n_neighbors : int Number of neighbors to get (default is the value passed to the constructor). Returns ------- dist : array Array representing the lengths to point, only present if return_distance=True ind : array Indices of the nearest points in the population matrix. """ if n_neighbors is not None: self.n_neighbors = n_neighbors if isinstance(X, np.ndarray): X = expr.from_numpy(X) if self.algorithm in ('auto', 'brute'): X_broadcast = expr.reshape(X, (X.shape[0], 1, X.shape[1])) fit_X_broadcast = expr.reshape( self.X, (1, self.X.shape[0], self.X.shape[1])) distances = expr.sum((X_broadcast - fit_X_broadcast)**2, axis=2) neigh_ind = expr.argsort(distances, axis=1) neigh_ind = neigh_ind[:, :n_neighbors].optimized().glom() neigh_dist = expr.sort(distances, axis=1) neigh_dist = expr.sqrt( neigh_dist[:, :n_neighbors]).optimized().glom() return neigh_dist, neigh_ind else: results = self.X.foreach_tile(mapper_fn=_knn_mapper, kw={ 'X': self.X, 'Q': X, 'n_neighbors': self.n_neighbors, 'algorithm': self.algorithm }) dist = None ind = None """ Get the KNN candidates for each tile of X, then find out the real KNN """ for k, v in results.iteritems(): if dist is None: dist = v[0] ind = v[1] else: dist = np.concatenate((dist, v[0]), axis=1) ind = np.concatenate((ind, v[1]), axis=1) mask = np.argsort(dist, axis=1)[:, :self.n_neighbors] new_dist = np.array([dist[i][mask[i]] for i, r in enumerate(dist)]) new_ind = np.array([ind[i][mask[i]] for i, r in enumerate(ind)]) return new_dist, new_ind