def hessian_inverse_vector_product(self, vec, hessian_scaling, S1=None, S2=None, method='stochastic'): ''' From Agarwal et. al. "Second-order stochastic optimization for machine learning in linear time." 2017. Not clear that this provides good accuracy in a reasonable amount of time. ''' N = self.training_data.X.shape[0] D = vec.shape[0] if S1 is None and S2 is None: S1 = int(np.ceil(np.sqrt(N) / 10)) S2 = int(np.ceil(10 * np.sqrt(N))) hivpEsts = np.zeros((S1, D)) for ii in range(S1): hivpEsts[ii] = vec for n in range(1, S2): idx = np.random.choice(N) #H_n_prod_prev = self.get_single_datapoint_hvp(idx, hivpEsts[ii]) * N #H_n_prod_prev /= hessian_scaling H_n_prod_prev = self.get_all_data_hvp( hivpEsts[ii]) / hessian_scaling hivpEsts[ii] = vec + hivpEsts[ii] - H_n_prod_prev return np.mean(hivpEsts, axis=0) / hessian_scaling
def image_bbox(params, img): img_ymax, img_xmax = img.nelec.shape px, py = img.equa2pixel(params.u) xlim = (np.max([0, int(np.floor(px - pixel_radius))]), np.min([img_xmax, int(np.ceil(px + pixel_radius))])) ylim = (np.max([0, int(np.floor(py - pixel_radius))]), np.min([img_ymax, int(np.ceil(py + pixel_radius))])) return xlim, ylim
def image_bbox(params, img): img_ymax, img_xmax = img.nelec.shape px, py = img.equa2pixel(params.u) xlim = (np.max([0, int(np.floor(px - pixel_radius))]), np.min([img_xmax, int(np.ceil(px + pixel_radius))])) ylim = (np.max([0, int(np.floor(py - pixel_radius))]), np.min([img_ymax, int(np.ceil(py + pixel_radius))])) return xlim, ylim
def MovingWinFeats(x, xLen, fs, winLen, winDisp, featFn): y = np.zeros((1, np.floor(((xLen - winLen * fs) / (winDisp * fs)) + 1))) y[0] = featFn(x[0:np.round(winLen * fs)]) a = np.arange(2, np.floor(((xLen - winLen * fs) / (winDisp * fs)) + 1)) for i in a: y[i] = featFn(x[np.ceil(winDisp * fs * (i - 1)):np.ceil(winDisp * fs * (i - 1) + winLen * fs)])
def get_bounding_box(params, img): if params.is_star(): bound = img.R elif params.is_galaxy(): bound = gal_funs.gen_galaxy_psf_image_bound(params, img) else: raise "source type unknown" px, py = img.equa2pixel(params.u) xlim = (np.max([0, np.floor(px - bound)]), np.min([img.nelec.shape[1], np.ceil(px + bound)])) ylim = (np.max([0, np.floor(py - bound)]), np.min([img.nelec.shape[0], np.ceil(py + bound)])) return xlim, ylim
def rasterize_triangles(self, vertices): ''' Args: vertices: [nver, 3] triangles: [ntri, 3] h: height w: width Returns: depth_buffer: [h, w] saves the depth, here, the bigger the z, the fronter the point. triangle_buffer: [h, w] saves the tri id(-1 for no triangle). barycentric_weight: [h, w, 3] saves corresponding barycentric weight. # Each triangle has 3 vertices & Each vertex has 3 coordinates x, y, z. # h, w is the size of rendering ''' # initial depth_buffer = {}#np.zeros([h, w]) - 999999. #+ np.min(vertices[2,:]) - 999999. # set the initial z to the farest position triangle_buffer = np.zeros([self.h, self.w], dtype = np.int32) - 1 # if tri id = -1, the pixel has no triangle correspondance barycentric_weight = {}#np.zeros([h, w, 3], dtype = np.float32) # for i in range(self.h): for j in range(self.w): depth_buffer[(i,j)] = -math.inf barycentric_weight[(i,j)] = np.array([0, 0, 0]) for i in range(self.tri_mesh_data.shape[0]): # print('Rasterzing: ',i+1) tri = self.tri_mesh_data[i, :] # 3 vertex indices # the inner bounding box umin = max(int(np.ceil(np.min(vertices[tri, 0]))), 0) umax = min(int(np.floor(np.max(vertices[tri, 0]))), self.w-1) vmin = max(int(np.ceil(np.min(vertices[tri, 1]))), 0) vmax = min(int(np.floor(np.max(vertices[tri, 1]))), self.h-1) if umax<umin or vmax<vmin: continue for u in range(umin, umax+1): for v in range(vmin, vmax+1): if not self.isPointInTri([u,v], vertices[tri, :2]): continue w0, w1, w2 = self.get_point_weight([u, v], vertices[tri, :2]) # barycentric weight point_depth = w0*vertices[tri[0], 2] + w1*vertices[tri[1], 2] + w2*vertices[tri[2], 2] if point_depth > depth_buffer[v, u]: depth_buffer[(v, u)] = point_depth triangle_buffer[v, u] = i barycentric_weight[(v, u)] = np.array([w0, w1, w2]) return depth_buffer, triangle_buffer, barycentric_weight
def get_bounding_box(params, img): if params.is_star(): bound = img.R elif params.is_galaxy(): bound = gal_funs.gen_galaxy_psf_image_bound(params, img) else: raise "source type unknown" px, py = img.equa2pixel(params.u) xlim = (np.max([0, np.floor(px - bound)]), np.min([img.nelec.shape[1], np.ceil(px + bound)])) ylim = (np.max([0, np.floor(py - bound)]), np.min([img.nelec.shape[0], np.ceil(py + bound)])) return xlim, ylim
def plotDistrs(ds,mus_,sigmas_): Nrows = int(np.ceil(np.log2(ds.N))) + 1 fig,ax= plt.subplots(nrows=Nrows,ncols=ds.N,figsize=(5 * ds.N,5 * ds.N)) for row in range(Nrows): for col in range(2**row): idx = col if row > 0: idx += 2**(row) - 1 ax[row,col].hist(ds.treeAlphaHats[idx],density=True) leafIndices = getChildren(idx, ds.N - 1).astype(int) - (ds.N-1) ln = ds.numU[leafIndices] # Final mu = np.dot(ds.mu[leafIndices],ln)/np.sum(ln) sigma = ds.sigma[idx] pdf = ss.norm.pdf(np.arange(0, ds.treeAlphaHats[idx].max(), .01), loc=mu,scale=sigma) ax[row,col].plot(np.arange(0, ds.treeAlphaHats[idx].max(), .01), pdf,color="green",alpha=.5,label="final") ax[row,col].vlines(mu,0,1,color="green",label="alpha hat") # Original mu = np.dot(mus_[0][leafIndices],ln)/np.sum(ln) sigma = sigmas_[0][idx] pdf = ss.norm.pdf(np.arange(0,ds.treeAlphaHats[idx].max(),.01), loc=mu,scale=sigma) ax[row,col].plot(np.arange(0,ds.treeAlphaHats[idx].max(),.01),pdf,color="red",alpha=.5,label="og") truth = np.dot(ds.trueAlphas[leafIndices].flatten(), ln)/np.sum(ln) ax[row,col].vlines(truth,0,1,color="black",label="alpha") ax[row,col].legend() return fig
def __init__(self, dimension, inputs, obs, mini_batch=False): """These functions implement a standard multi-layer perceptron, vectorized over both training examples and weight samples.""" self.dimension = dimension self.prior = FiniteDimensionalPrior(self.dimension) self.inputs = inputs self.inputs_size = len(inputs) self.obs = obs self.mini_batch = mini_batch if self.mini_batch: self.it = 0 self.mini_batch_size = 32 self.number_batchs = np.int( np.ceil(self.inputs_size / self.mini_batch_size)) self.inputs_all = np.copy(inputs) self.obs_all = np.copy(obs) self.inputs = inputs[:self.mini_batch_size] self.obs = obs[:self.mini_batch_size] self.gx = grad(self.cost) self.J = jacobian(self.forward) self.hx = hessian_vector_product(self.cost) self.hvp = hvp(self.hx)
def plot_images(images, ax, ims_per_row=5, padding=5, digit_dimensions=(28, 28), cmap=matplotlib.cm.binary, vmin=None): """iamges should be a (N_images x pixels) matrix.""" N_images = images.shape[0] N_rows = np.ceil(float(N_images) / ims_per_row) pad_value = np.min(images.ravel()) concat_images = np.full( ((digit_dimensions[0] + padding) * N_rows + padding, (digit_dimensions[0] + padding) * ims_per_row + padding), pad_value) for i in range(N_images): cur_image = np.reshape(images[i, :], digit_dimensions) row_ix = i / ims_per_row # Integer division. col_ix = i % ims_per_row row_start = padding + (padding + digit_dimensions[0]) * row_ix col_start = padding + (padding + digit_dimensions[0]) * col_ix concat_images[row_start: row_start + digit_dimensions[0], col_start: col_start + digit_dimensions[0]] \ = cur_image cax = ax.matshow(concat_images, cmap=cmap, vmin=vmin) plt.xticks(np.array([])) plt.yticks(np.array([])) return cax
def get_dataset(): """ gets the dataset """ num_samples = 50 # number of batches split = 0.5 seed = 0 # params for each specific trajectory t_span = [0, 3] timescale = 15 # this is the discretization per second, size of the trajectory is 45 in this case noise_std = 1 # randomly sample inputs np.random.seed(seed) x = np.zeros([num_samples, states, timescale * (t_span[1] - t_span[0])]) for s in range(num_samples): # get batch of samples x_sample = get_trajectory(t_span, timescale, noise_std) x[s, :, :] = x_sample test_size = int(np.ceil(num_samples / 2)) test_data = x[:test_size, :, :] train_data = x[test_size:, :, :] return test_data, train_data
def gradient_descent(g, alpha, max_its, w, num_pts, batch_size, **kwargs): # flatten the input function, create gradient based on flat function g_flat, unflatten, w = flatten_func(g, w) grad = value_and_grad(g_flat) # record history w_hist = [] w_hist.append(unflatten(w)) # how many mini-batches equal the entire dataset? num_batches = int(np.ceil(np.divide(num_pts, batch_size))) # over the line for k in range(max_its): # loop over each minibatch for b in range(num_batches): # collect indices of current mini-batch batch_inds = np.arange(b * batch_size, min((b + 1) * batch_size, num_pts)) # plug in value into func and derivative cost_eval, grad_eval = grad(w, batch_inds) grad_eval.shape = np.shape(w) # take descent step with momentum w = w - alpha * grad_eval # record weight update w_hist.append(unflatten(w)) return w_hist
def fit(self, target, input, nb_epochs=500, batch_size=16, lr=1e-3, verbose=True): nb_batches = int(np.ceil(len(input) / batch_size)) def batch_indices(iter): idx = iter % nb_batches return slice(idx * batch_size, (idx + 1) * batch_size) def _objective(params, iter): self.params = params idx = batch_indices(iter) return self.cost(target[idx], input[idx]) def _callback(params, iter, grad): if iter % (nb_batches * 10) == 0: self.params = params if verbose: print('Epoch: {}/{}.............'.format( iter // nb_batches, nb_epochs), end=' ') print("Loss: {:.4f}".format(self.cost(target, input))) _gradient = grad(_objective) self.params = adam(_gradient, self.params, step_size=lr, num_iters=nb_epochs * nb_batches, callback=_callback)
def newtons_method(g, max_its, w, num_pts, batch_size, **kwargs): # flatten input funciton, in case it takes in matrices of weights g_flat, unflatten, w = flatten_func(g, w) # compute the gradient / hessian functions of our input function - gradient = value_and_grad(g_flat) hess = hessian(g_flat) # set numericxal stability parameter / regularization parameter epsilon = 10**(-7) if 'epsilon' in kwargs: epsilon = kwargs['epsilon'] # record history w_hist = [] w_hist.append(unflatten(w)) cost_hist = [g_flat(w, np.arange(num_pts))] # how many mini-batches equal the entire dataset? num_batches = int(np.ceil(np.divide(num_pts, batch_size))) # over the line for k in range(max_its): # loop over each minibatch for b in range(num_batches): # collect indices of current mini-batch batch_inds = np.arange(b * batch_size, min((b + 1) * batch_size, num_pts)) # evaluate the gradient, store current weights and cost function value cost_eval, grad_eval = gradient(w, batch_inds) # evaluate the hessian hess_eval = hess(w, batch_inds) # reshape for numpy linalg functionality hess_eval.shape = (int( (np.size(hess_eval))**(0.5)), int((np.size(hess_eval))**(0.5))) ''' # compute minimum eigenvalue of hessian matrix eigs, vecs = np.linalg.eig(hess_eval) smallest_eig = np.min(eigs) adjust = 0 if smallest_eig < 0: adjust = np.abs(smallest_eig) ''' # solve second order system system for weight update A = hess_eval + (epsilon) * np.eye(np.size(w)) b = grad_eval w = np.linalg.lstsq(A, np.dot(A, w) - b)[0] #w = w - np.dot(np.linalg.pinv(hess_eval + epsilon*np.eye(np.size(w))),grad_eval) # record weights after each epoch w_hist.append(unflatten(w)) cost_hist.append(g_flat(w, np.arange(num_pts))) return w_hist, cost_hist
def gradient_descent(g, w, x_train, x_val, alpha, max_its, batch_size, **kwargs): verbose = True if 'verbose' in kwargs: verbose = kwargs['verbose'] # flatten the input function, create gradient based on flat function g_flat, unflatten, w = flatten_func(g, w) grad = value_and_grad(g_flat) # record history num_train = x_train.shape[1] num_val = x_val.shape[1] w_hist = [unflatten(w)] train_hist = [g_flat(w, x_train, np.arange(num_train))] val_hist = [g_flat(w, x_val, np.arange(num_val))] # how many mini-batches equal the entire dataset? num_batches = int(np.ceil(np.divide(num_train, batch_size))) # over the line for k in range(max_its): # loop over each minibatch start = timer() train_cost = 0 for b in range(num_batches): # collect indices of current mini-batch batch_inds = np.arange(b * batch_size, min((b + 1) * batch_size, num_train)) # plug in value into func and derivative cost_eval, grad_eval = grad(w, x_train, batch_inds) grad_eval.shape = np.shape(w) # take descent step with momentum w = w - alpha * grad_eval end = timer() # update training and validation cost train_cost = g_flat(w, x_train, np.arange(num_train)) val_cost = g_flat(w, x_val, np.arange(num_val)) # record weight update, train and val costs w_hist.append(unflatten(w)) train_hist.append(train_cost) val_hist.append(val_cost) if verbose == True: print('step ' + str(k + 1) + ' done in ' + str(np.round(end - start, 1)) + ' secs, train cost = ' + str(np.round(train_hist[-1][0], 4)) + ', val cost = ' + str(np.round(val_hist[-1][0], 4))) if verbose == True: print('finished all ' + str(max_its) + ' steps') #time.sleep(1.5) #clear_output() return w_hist, train_hist, val_hist
def __init__(self, sigma, integrate=True, boxsize=None): sigma = self.prepare_param(sigma, "sigma") if boxsize is None: boxsize = int(np.ceil(10 * np.max(sigma))) super().__init__(sigma, integrate=integrate, boxsize=boxsize)
def objective(train_images,train_labels,params, mask_param, c_list, iter): num_batches = int(np.ceil(len(train_images) / batch_size)) def batch_indices(iter): idx = iter % num_batches return slice(idx * batch_size, (idx+1) * batch_size) idx = batch_indices(iter) #if DEBUG: print ('In train objective') return -log_posterior_binary(params, mask_param, train_images[idx], train_labels[idx], L2_reg, c_list)
def generate_batch(X, batch_size=32): num_batches = int(np.ceil(X.shape[0] / batch_size)) def batch_indices(iter): idx = iter % num_batches return slice(idx * batch_size, (idx + 1) * batch_size) return batch_indices
def test_objective(params, mask_param, test_images, test_labels, c_list, iter): num_batches = int(np.ceil(len(test_images) / test_batch_size)) def batch_indices(iter): idx = iter % num_batches return slice(idx * batch_size, (idx+1) * test_batch_size) idx = batch_indices(iter) if DEBUG: print ('Test BCE loss') return [-x for x in log_posterior_binary_test(params, mask_param, test_images[idx], test_labels[idx], L2_reg, c_list)]
def gen_n_point_in_polygon(n_point, polygon, tol=0.1): """ ----------- Description ----------- Generate n regular spaced points within a shapely Polygon geometry function from stackoverflow ----------- Parameters ----------- - n_point (int) : number of points required - polygon (shapely.geometry.polygon.Polygon) : Polygon geometry - tol (float) : spacing tolerance (Default is 0.1) ----------- Returns ----------- - points (list) : generated point geometries ----------- Examples ----------- >>> geom_pts = gen_n_point_in_polygon(200, polygon) >>> points_gs = gpd.GeoSeries(geom_pts) >>> points_gs.plot() """ # Get the bounds of the polygon minx, miny, maxx, maxy = polygon.bounds # ---- Initialize spacing and point counter spacing = polygon.area / n_point point_counter = 0 # Start while loop to find the better spacing according to tolérance increment while point_counter <= n_point: # --- Generate grid point coordinates x = np.arange(np.floor(minx), int(np.ceil(maxx)), spacing) y = np.arange(np.floor(miny), int(np.ceil(maxy)), spacing) xx, yy = np.meshgrid(x, y) # ---- pts = [Point(X, Y) for X, Y in zip(xx.ravel(), yy.ravel())] # ---- Keep only points in polygons points = [pt for pt in pts if pt.within(polygon)] # ---- Verify number of point generated point_counter = len(points) spacing -= tol # ---- Return return points
def newtons_method(g, max_its, w, num_pts, batch_size, **kwargs): # flatten input funciton, in case it takes in matrices of weights flat_g, unflatten, w = flatten_func(g, w) # compute the gradient / hessian functions of our input function - # note these are themselves functions. In particular the gradient - # - when evaluated - returns both the gradient and function evaluations (remember # as discussed in Chapter 3 we always ge the function evaluation 'for free' when we use # an Automatic Differntiator to evaluate the gradient) gradient = value_and_grad(flat_g) hess = hessian(flat_g) # set numericxal stability parameter / regularization parameter epsilon = 10**(-7) if 'epsilon' in kwargs: epsilon = kwargs['epsilon'] # record history w_hist = [] w_hist.append(unflatten(w)) # how many mini-batches equal the entire dataset? num_batches = int(np.ceil(np.divide(num_pts, batch_size))) # over the line for k in range(max_its): # loop over each minibatch for b in range(num_batches): # collect indices of current mini-batch batch_inds = np.arange(b * batch_size, min((b + 1) * batch_size, num_pts)) # evaluate the gradient, store current weights and cost function value cost_eval, grad_eval = gradient(w, batch_inds) # evaluate the hessian hess_eval = hess(w, batch_inds) # reshape for numpy linalg functionality hess_eval.shape = (int( (np.size(hess_eval))**(0.5)), int((np.size(hess_eval))**(0.5))) # solve second order system system for weight update A = hess_eval + epsilon * np.eye(np.size(w)) b = grad_eval w = np.linalg.lstsq(A, np.dot(A, w) - b)[0] #w = w - np.dot(np.linalg.pinv(hess_eval + epsilon*np.eye(np.size(w))),grad_eval) # record weights after each epoch w_hist.append(unflatten(w)) # collect final weights w_hist.append(unflatten(w)) return w_hist
def broadcast1024(*args): """Extend numpy.broadcast to accept 1024 inputs, rather than the default 32.""" ngroups = int(np.ceil(len(args) / 32)) if ngroups == 1: return np.broadcast(*args) else: return np.broadcast(*[ np.empty(np.broadcast(*args[n * 32:(n + 1) * 32]).shape) for n in range(ngroups) ])
def __init__(self, n, wavelengths, amplitudes=None): self.wavelengths = np.array(wavelengths) self.repetitions = (np.ceil(n // self.wavelengths)).astype(np.int8) self.n = n if (amplitudes is None): self.amplitudes = np.ones(len(wavelengths)) else: self.amplitudes = amplitudes
def mkcov_ASDfactored(prs, nx, nxcirc=None, condthresh=1e8, compfftbasis=None): # % Factored representation of ASD covariance matrix in Fourier domain # % # % [Cdiag,U,wvec] = mkcov_ASDfactored(prs,nx,opts) # % # % Covariance represented as C = U*sdiag*U' # % where U is unitary (in some larger basis) and sdiag is diagonal # % # % C_ij = rho*exp(((i-j)^2/(2*l^2)) # % # % INPUT: # % ------ # % prs [2 x 1] - ASD parameters [len_sc = length scale; rho - maximal variance; ]: # % nx [1 x 1] - number of regression coeffs # % # % Note: nxcirc = nx gives circular boundary # % # % OUTPUT: # % ------- # % cdiag [ni x 1] - vector with thresholded eigenvalues of C # % U [ni x nxcirc] - column vectors define orthogonal basis for C (on Reals) # % wvec [nxcirc x 1] - vector of Fourier frequencies len_sc = prs[0] rho = prs[1] # % Parse inputs if nxcirc is None: nxcirc = nx + np.ceil( 4 * len_sc) # extends support by 4 stdevs of ASD kernel width # % Check that nxcirc isn't bigger than nx if nxcirc < nx: warnings.warn( 'mkcov_ASDfactored: nxcirc < nx. Some columns of x will be ignored' ) # % compute vector of Fourier frequencies # maxfreq = np.floor(nxcirc/(np.pi*len_sc)*np.sqrt(.5*np.log(condthresh))) # max # if maxfreq < nxcirc/2: # wvec = np.concatenate(([np.arange(int(maxfreq))],[np.arange(-int(maxfreq),0)]),axis = 1) #else: # % in case cutoff is above max number of frequencies wvec = rffb.comp_wvec(nxcirc, len_sc, condthresh) # % Compute diagonal in Fourier domain cdiag = mkcovdiag_ASD(len_sc, rho, nxcirc, wvec=wvec) # compute diagonal and Fourier freqs # % Compute real-valued discrete Fourier basis U if compfftbasis is not None: U = rffb.realfftbasis(nx, nxcirc, wvec)[0] return cdiag, U, wvec else: return cdiag
def optimize(self, n_iters, objective, init_param): """ Parameters ---------- n_iters : `int` Number of iterations of the optimization objective: `function` Function for constructing the objective and gradient function init_param : `numpy.ndarray`, shape(var_param_dim,) Initial values of the variational parameters int_learning_rate: `float` Initial learning rate of optimization (step size to reach the (local) minimum) Returns ---------- Dictionary smoothed_opt_param : `numpy.ndarray`, shape(var_param_dim,) Iterate averaged estimated variational parameters variational_param_history : `numpy.ndarray`, shape(n_iters, var_param_dim) Estimated variational parameters over all iterations value_history : `numpy.ndarray`, shape(n_iters,) Estimated loss (ELBO) over all iterations """ t0 = 0 history = None learning_rate = self._sgo._learning_rate variational_param = init_param.copy() variational_param_mean = init_param.copy() value_history = [] Delta_history = [] variational_param_history = [] for t in tqdm.trange(n_iters): object_val, object_grad = objective(variational_param) value_history.append(object_val) descent_dir, history = self._sgo.descent_direction(object_grad, history) variational_param -= learning_rate * descent_dir variational_param_history.append(variational_param) Delta = np.dot(variational_param,descent_dir) - 0.5*learning_rate*np.sum(descent_dir**2) Delta_history.append(Delta) W = np.max([np.min([t-t0, self._W0]), np.ceil(self._theta*(t-t0)).astype(int)]) if (W >= self._W0) and (t % self._t_check == 0): convg = self.convergence_check(W, Delta_history) if convg == True: m = b = np.floor(np.sqrt(W)).astype(int) learning_rate = self._rho * learning_rate variational_param_mean_prev = variational_param_mean variational_param_mean = np.mean(np.array(variational_param_history[-m*b:]),axis = 0) t0 = t SKL = MFGaussian(self._dim)._kl(variational_param_mean_prev, variational_param_mean) + MFGaussian(self._dim)._kl(variational_param_mean, variational_param_mean_prev) if (SKL/self._rho < self._eps): print('Stopping rule reached at', t+1, 'th iteration') break return dict(smoothed_opt_param = variational_param_mean, variational_param_history = variational_param_history, value_history = np.array(value_history))
def train_lstm(inputs, outputs, state_size, batch_size=256, param_scale=0.001, num_epochs=5, step_size=0.001): # split data (again) into a training and a validation set (tr_inputs, va_inputs), (tr_outputs, va_outputs) = util.split_data( inputs, out_data=outputs, frac=0.80) input_size = tr_inputs.shape[2] output_size = tr_outputs.shape[2] init_params = init_lstm_params(input_size, state_size, output_size, param_scale=param_scale, rs=npr.RandomState(0)) num_batches = int(np.ceil(tr_inputs.shape[1] / batch_size)) def batch_indices(iter): idx = iter % num_batches return slice(idx * batch_size, (idx+1) * batch_size) # Define training objective def objective(params, iter): idx = batch_indices(iter) return -lstm_log_likelihood( params, tr_inputs[:, idx, :], tr_outputs[:, idx, :]) # Get gradient of objective using autograd. objective_grad = grad(objective) print( " Epoch | Train accuracy | Train log-like | Holdout accuracy | Holdout log-like ") def print_perf(params, iter, gradient): train_acc = accuracy(params, tr_inputs, tr_outputs) train_ll = -lstm_log_likelihood(params, tr_inputs, tr_outputs) valid_acc = accuracy(params, va_inputs, va_outputs) valid_ll = -lstm_log_likelihood(params, va_inputs, va_outputs) print("{:15}|{:20}|{:20}|{:20}|{:20}".format( iter//num_batches, train_acc, train_ll, valid_acc, valid_ll)) # The optimizers provided can optimize lists, tuples, or dicts of parameters. optimized_params = adam(objective_grad, init_params, step_size=step_size, num_iters=num_epochs, callback=print_perf) return optimized_params
def fun_mpi(dof, fun, N, output='sum'): '''mpi parallization for fun(dof,ctrl), ctrl is the numbering of ctrl's frequency calculation N calculations in total returns the sum: sum_{ctrl=1 toN} fun(dof,ctrl) ''' dof = comm.bcast(dof) Nloop = int(np.ceil(1.0 * N / size)) # number of calculations for each node val_i = [] g_i = [] val = [] g = [] for i in range(0, Nloop): ctrl = i * size + rank if ctrl < N: funi = lambda dof: fun(dof, ctrl) grad_fun = grad(funi) val = funi(dof) gval = grad_fun(dof) # include indexing for now, in case one is interested val_i.append([ctrl, val]) g_i.append([ctrl, gval]) # gather the solution val_i = comm.gather(val_i) g_i = comm.gather(g_i) # summation if rank == 0: val_i = [x for x in val_i if x] g_i = [x for x in g_i if x] val_i = npf.concatenate(npf.array(val_i)) g_i = npf.concatenate(npf.array(g_i)) # sindex = val_i[:,0].argsort() # val_i = val_i[sindex,1] # g_i = g_i[sindex,1] if output == 'sum': val = np.sum(val_i[:, 1]) g = np.sum(g_i[:, 1]) elif output == 'logsumexp': val = logsumexp(val_i[:, 1]) g = np.zeros_like(g_i[0, 1]) for i in range(N): g += g_i[i, 1] * np.exp(val_i[i, 1] - val) val = comm.bcast(val) g = comm.bcast(g) return val, g
def make_batch_iter(X, batch_size, max_iter): N, D = X.shape n_batches = int(np.ceil(N / batch_size)) n_epochs = int(np.ceil(max_iter / n_batches)) idx = np.arange(N) batch_sched = [] for i in range(n_epochs): idx_shuffled = np.random.permutation(idx) batches = np.array_split(idx_shuffled, n_batches) batch_sched.append(batches) def get_batch(t): epoch = int(np.floor(t / n_batches)) batch = t % n_batches epoch_idx = batch_sched[epoch] idx_batch = epoch_idx[batch] return X[idx_batch].reshape(len(idx_batch), D) return get_batch
def __init__(self, alpha=4.7, beta=1.5, integrate=False, boxsize=None): alpha = self.prepare_param(alpha, "alpha") beta = self.prepare_param(beta, "beta") assert len(alpha) == len(beta) assert integrate is False, "In-pixel integration not implemented (yet)!" if boxsize is None: boxsize = int(np.ceil(5 * np.max(alpha))) super().__init__(alpha, beta, integrate=integrate, boxsize=boxsize)
def get_batch_samples(iter_no, args, mdl): """Return inputs and outputs belonging to batch given iteration number.""" if args.batch_size == 0: return None, None num_batches = int(np.ceil(len(mdl.inputs) / args.batch_size)) mod_iter_no = iter_no % num_batches start = mod_iter_no * args.batch_size end = (mod_iter_no + 1) * args.batch_size inputs = mdl.inputs[start:end] targets = mdl.targets[start:end] return inputs, targets
def gen_galaxy_psf_image(th, u_s, img, xlim=None, ylim=None, check_overlap=True, unconstrained=True, return_patch=True): """ generates the profile of a combination of exp/dev images. Calls the above function twice - once for each profile, and adds them together """ # unpack shape params theta_s, sig_s, phi_s, rho_s = th[0:4] # generate unit flux model patch px, py = img.equa2pixel(u_s) galmix = MixtureOfGaussians.convex_combine(galaxy_profs, [theta_s, 1. - theta_s]) Tinv = gen_galaxy_transformation(sig_s, rho_s, phi_s, img.cd_at_pixel(px, py)) amix = galmix.apply_affine(Tinv, np.array([px, py])) cmix = amix.convolve(img.psf) # compute bounding box if xlim is None and ylim is None: bound = calc_bounding_radius(cmix.pis, cmix.means, cmix.covs, error=1e-5, center=np.array([px, py])) xlim = (np.max([0, np.floor(px - bound)]), np.min([img.nelec.shape[1], np.ceil(px + bound)])) ylim = (np.max([0, np.floor(py - bound)]), np.min([img.nelec.shape[0], np.ceil(py + bound)])) # compute values on grid return cmix.evaluate_grid(xlim, ylim), ylim, xlim
def RMSprop(self, g, w, x_train, y_train, lam, alpha, max_its, batch_size, **kwargs): # rmsprop params gamma = 0.9 eps = 10**-8 if 'gamma' in kwargs: gamma = kwargs['gamma'] if 'eps' in kwargs: eps = kwargs['eps'] # flatten the input function, create gradient based on flat function g_flat, unflatten, w = flatten_func(g, w) grad = value_and_grad(g_flat) # initialize average gradient avg_sq_grad = np.ones(np.size(w)) # record history num_train = y_train.size w_hist = [unflatten(w)] train_hist = [g_flat(w, x_train, y_train, lam, np.arange(num_train))] # how many mini-batches equal the entire dataset? num_batches = int(np.ceil(np.divide(num_train, batch_size))) # over the line for k in range(max_its): # loop over each minibatch for b in range(num_batches): # collect indices of current mini-batch batch_inds = np.arange(b * batch_size, min((b + 1) * batch_size, num_train)) # plug in value into func and derivative cost_eval, grad_eval = grad(w, x_train, y_train, lam, batch_inds) grad_eval.shape = np.shape(w) # update exponential average of past gradients avg_sq_grad = gamma * avg_sq_grad + (1 - gamma) * grad_eval**2 # take descent step w = w - alpha * grad_eval / (avg_sq_grad**(0.5) + eps) # update training and validation cost train_cost = g_flat(w, x_train, y_train, lam, np.arange(num_train)) # record weight update, train and val costs w_hist.append(unflatten(w)) train_hist.append(train_cost) return w_hist, train_hist
def augment_times(t, h): """ Pads the vector t so that the maximum spacing is h. Parameters ---------- t : one dimensional array like h : float """ inds = [0] res = [t[0]] for ta, tb in zip(t[:-1], t[1:]): N = np.ceil((tb - ta) / h + 1) N = int(N) _tt = np.linspace(ta, tb, N) res = np.concatenate((res, _tt[1:])) inds.append(inds[-1] + N - 1) return res, np.array(inds)
def plot_images(images, ax, ims_per_row=5, padding=5, digit_dimensions=(28, 28), cmap=matplotlib.cm.binary, vmin=None, vmax=None): """Images should be a (N_images x pixels) matrix.""" N_images = images.shape[0] N_rows = np.ceil(float(N_images) / ims_per_row) pad_value = np.min(images.ravel()) concat_images = np.full(((digit_dimensions[0] + padding) * N_rows + padding, (digit_dimensions[1] + padding) * ims_per_row + padding), pad_value) for i in range(N_images): cur_image = np.reshape(images[i, :], digit_dimensions) row_ix = i // ims_per_row col_ix = i % ims_per_row row_start = padding + (padding + digit_dimensions[0]) * row_ix col_start = padding + (padding + digit_dimensions[1]) * col_ix concat_images[row_start: row_start + digit_dimensions[0], col_start: col_start + digit_dimensions[1]] = cur_image cax = ax.matshow(concat_images, cmap=cmap, vmin=vmin, vmax=vmax) plt.xticks(np.array([])) plt.yticks(np.array([])) return cax
def run_expt(config, loss_opt=0): ttl = config_to_str(config) print '\nstarting experiment {}'.format(ttl) print config Xtrain, Ytrain, params_true, true_fun, fun_name = \ demo.make_data_linreg_1d(config['N'], config['fun_type']) data_dim = Xtrain.shape[1] N = Xtrain.shape[0] Xtrain, Ytrain = opt.shuffle_data(Xtrain, Ytrain) model_type = config['model_type'] if model_type == 'linear': model = LinregModel(data_dim, add_ones=True) params, loss = model.ols_fit(Xtrain, Ytrain) elif model_type[0:3] == 'mlp': _, layer_sizes = model_type.split(':') layer_sizes = [int(n) for n in layer_sizes.split('-')] model = MLP(layer_sizes, 'regression', L2_reg=0.001) else: raise ValueError('unknown model type {}'.format(model_type)) initial_params = model.init_params() param_dim = len(initial_params) plot_data = (data_dim == 1) plot_params = (param_dim == 2) nplots = 1 if plot_data: nplots += 1 if plot_params: nplots += 1 plot_rows, plot_cols = util.nsubplots(nplots) if config['optimizer'] == 'BFGS': obj_fun = lambda params: model.PNLL(params, Xtrain, Ytrain) grad_fun = autograd.grad(obj_fun) logger = opt.OptimLogger(lambda params: obj_fun(params), eval_freq=1, store_freq=1, print_freq=1) params, obj = opt.bfgs(obj_fun, grad_fun, initial_params, config['num_epochs'], logger.callback) if config['optimizer'] == 'SGD': B = config['batch_size'] M = N / B # num_minibatches_per_epoch (num iter per epoch) max_iters = config['num_epochs'] * M grad_fun_with_iter = opt.build_batched_grad(model.gradient, config['batch_size'], Xtrain, Ytrain) #obj_fun = opt.build_batched_grad(model.PNLL, config['batch_size'], Xtrain, Ytrain) obj_fun = lambda params: model.PNLL(params, Xtrain, Ytrain) sf = config.get('store_freq', M) logger = opt.OptimLogger(obj_fun, eval_freq=sf, store_freq=sf, print_freq=0) sgd_fun = config['sgd_fun'] if config['lr_tune']==True: eval_fun = lambda params: model.PNLL(params, Xtrain, Ytrain) lr, lrs, scores = opt.lr_tuner(eval_fun, 'grid', sgd_fun, grad_fun_with_iter, initial_params, int(np.ceil(max_iters*0.1))) print 'lr tuner chose lr {:0.3f}'.format(lr) print lrs print scores config['lr_init'] = lr lr_fun = lambda iter: opt.lr_exp_decay(iter, config['lr_init'], config['lr_decay'], config['lr_step']) params, obj = sgd_fun(obj_fun, grad_fun_with_iter, initial_params, max_iters, logger.callback, lr_fun) training_loss = model.PNLL(params, Xtrain, Ytrain) print 'finished fitting, training loss {:0.3g}, {} obj calls, {} grad calls'.\ format(training_loss, model.num_obj_fun_calls, model.num_grad_fun_calls) fig = plt.figure() ax = fig.add_subplot(plot_rows, plot_cols, 1) opt.plot_loss_trace(logger.eval_trace, loss_opt, ax) ax.set_title('final objective {:0.3g}'.format(training_loss)) ax.set_xlabel('epochs') if plot_data: ax = fig.add_subplot(plot_rows, plot_cols, 2) predict_fun = lambda X: model.predictions(params, X) demo.plot_data_and_predictions_1d(Xtrain, Ytrain, true_fun, predict_fun, ax) if plot_params: ax = fig.add_subplot(plot_rows, plot_cols, 3) loss_fun = lambda w0, w1: model.PNLL(np.array([w0, w1]), Xtrain, Ytrain) demo.plot_error_surface_2d(loss_fun, params, params_true, config['fun_type'], ax) demo.plot_param_trace_2d(logger.param_trace, ax) ttl = config_to_str(config) # recompute in case lr has been estimated fig.suptitle(ttl) folder = 'figures/linreg-sgd' fname = os.path.join(folder, 'linreg_1d_sgd_{}.png'.format(ttl)) plt.savefig(fname) return training_loss
axarr[1].plot(emcee_sampler.lnprobability[0][w]) plt.show() plt.hist(emcee_sampler.flatchain[0][::10,0], 50); plt.show() ########################################################################## ############ DEBUG ####################################################### ########################################################################## if False: names = np.concatenate([ ["z"], ["w_%d"%i for i in range(B.shape[0])], ["mu"] ]) w_samps = np.exp(samps[:, 1:(B.shape[0]+1)]) w_samps /= np.sum(w_samps, axis = 1, keepdims=True) fig, axarr = plt.subplots(2, int(np.ceil(samps.shape[1]/2.)), figsize=(8, 6)) for i, ax in enumerate(axarr.flatten()): ## histogram red shift, show if i >= 1 and i < B.shape[0]+1: pltsamps = w_samps[:, i-1] else: pltsamps = samps[Nsamps/2:, i] cnts, bins, patches = ax.hist(pltsamps, 15, normed=True, alpha=.35) ax.vlines(pltsamps.mean(), 0, cnts.max(), linewidth=4, color="green", label='$E[z_{photo}]$') if names[i] == "z": ax.vlines(z_n, 0, cnts.max(), linewidth=4, color="black", label='$z_{spec}$') ax.legend(fontsize=15) ax.set_title("%s"%names[i], fontsize=18) #plt.savefig("z_compare_idx_%d.pdf"%n, bbox_inches='tight')
ax1.set_xlabel('video age (day)', fontsize=24) ax1.set_xlim([0, 125]) ax1.set_ylim(ymin=max(0, ax1.get_ylim()[0])) ax2.set_ylim([0, 1]) ax1.tick_params('y', colors='b') ax2.tick_params('y', colors='k') annotated_str = r'ID: {0}'.format(vid) annotated_str += '\n' annotated_str += r'$C$: {0:.4f}, $\lambda$: {1:.4f}'.format(*optimizer1.x) ax2.text(120, 0.77, annotated_str, horizontalalignment='right', fontsize=24) ax2.set_xticks([0, 40, 80, 120]) display_min = int(np.floor(min(daily_view) / 100) * 100) display_max = int(np.ceil(max(daily_view) / 100) * 100) ax1.set_yticks([display_min, (display_min+display_max)/2, display_max]) ax2.set_yticks([0.0, 0.5, 1.0]) for ax in [ax1, ax2]: plt.setp(ax.yaxis.get_majorticklabels(), rotation=90) ax.tick_params(axis='both', which='major', labelsize=24) plt.legend([plt.Line2D((0, 1), (0, 0), color='k', linestyle='--'), plt.Line2D((0, 1), (0, 0), color='b'), plt.Line2D((0, 1), (0, 0), color='r')], ['Observed relative engagement', 'Observed view series', 'Fitted relative engagement'], fontsize=18, frameon=False, handlelength=1, loc='lower center', bbox_to_anchor=(0.5, -1.75), ncol=2) plt.title('(a)', fontsize=24) plt.tight_layout(rect=[0, 0.08, 1, 1], h_pad=0) plt.show()
dsc_layer_sizes = [784, 200, 1] # Training parameters param_scale = 0.001 batch_size = 100 num_epochs = 50 step_size_max = 0.01 step_size_min = 0.01 print("Loading training data...") N, train_images, _, test_images, _ = load_mnist() init_gen_params = init_random_params(param_scale, gen_layer_sizes) init_dsc_params = init_random_params(param_scale, dsc_layer_sizes) num_batches = int(np.ceil(len(train_images) / batch_size)) def batch_indices(iter): idx = iter % num_batches return slice(idx * batch_size, (idx+1) * batch_size) # Define training objective seed = npr.RandomState(0) def objective(gen_params, dsc_params, iter): idx = batch_indices(iter) return gan_objective(gen_params, dsc_params, train_images[idx], batch_size, noise_dim, seed) # Get gradients of objective using autograd. both_objective_grad = multigrad(objective, argnums=[0,1]) print(" Epoch | Objective | Fake probability | Real Probability ")