def run_coordinate_descent(self): w = copy.deepcopy(self.w_init) self.w_hist = [] self.w_hist.append(copy.deepcopy(w)) j = 0 for j in range(int(self.max_its)): # plug in value into func and derivative grad_eval = self.grad(w) # loop over coordinates for k in range(len(w)): # strip gradient of k^th coordinate coord_grad = copy.deepcopy(grad_eval[k]) # normalize direction or no? if self.version == 'normalized': # normaize direction, if everything is perfectly zero then move in random direction grad_norm = np.linalg.norm(coord_grad) if grad_norm == 0: coord_grad = np.sign(2 * np.random.rand(1) - 1)[0] else: coord_grad = np.sign(coord_grad) ### check what sort of steplength rule to employ ### alpha = 0 grad_temp = copy.deepcopy(grad_eval) grad_temp[k] = coord_grad alpha = self.exact(w, grad_temp) # take coordinate descent step - update single weight w[k] -= alpha * coord_grad # record each coordinate descent step for visualization self.w_hist.append(copy.deepcopy(w))
def run(backend=SUPPORTED_BACKENDS[0], quiet=True): n = 128 matrix = rnd.randn(n, n) matrix = 0.5 * (matrix + matrix.T) cost, egrad = create_cost_egrad(backend, matrix) manifold = Sphere(n) problem = pymanopt.Problem(manifold, cost=cost, egrad=egrad) if quiet: problem.verbosity = 0 solver = SteepestDescent() estimated_dominant_eigenvector = solver.solve(problem) if quiet: return # Calculate the actual solution by a conventional eigenvalue decomposition. eigenvalues, eigenvectors = la.eig(matrix) dominant_eigenvector = eigenvectors[:, np.argmax(eigenvalues)] # Make sure both vectors have the same direction. Both are valid # eigenvectors, but for comparison we need to get rid of the sign # ambiguity. if (np.sign(dominant_eigenvector[0]) != np.sign( estimated_dominant_eigenvector[0])): estimated_dominant_eigenvector = -estimated_dominant_eigenvector # Print information about the solution. print("l2-norm of x: %f" % la.norm(dominant_eigenvector)) print("l2-norm of xopt: %f" % la.norm(estimated_dominant_eigenvector)) print("Solution found: %s" % np.allclose( dominant_eigenvector, estimated_dominant_eigenvector, rtol=1e-3)) error_norm = la.norm(dominant_eigenvector - estimated_dominant_eigenvector) print("l2-error: %f" % error_norm)
def test_beta_rules(self, beta_rule): optimizer = ConjugateGradient(beta_rule=beta_rule, verbosity=0) result = optimizer.run(self.problem) estimated_dominant_eigenvector = result.point if np.sign(self.dominant_eigenvector[0]) != np.sign( estimated_dominant_eigenvector[0]): estimated_dominant_eigenvector = -estimated_dominant_eigenvector np_testing.assert_allclose( self.dominant_eigenvector, estimated_dominant_eigenvector, atol=1e-6, )
def maneuverPoses(self, pose_s, r_1, alpha_1, r_2, alpha_2): ''' Calculates the middle and final poses of the turn-around maneuver given the starting position and the desired turning radius and arc length of the two segments. Args: ----- pose_s: Pose2D object containing (x,y) position and yaw angle, theta r_1: turning radius of the first section (positive means steering wheel is turned to the left, negative is turned to the right) alpha_1: arc angle of the first section (positive means traveling in the forkward direction, negative is backwards)) r_2: turning radius of the second section (should be positive, will be made the opposite sign of r_1) alpha_2: arc angle of the second section (should be positive, will be made the oppposite sign of alpha_1) Returns: -------- [pose_m, pose_f]: the middle and final pose values as Pose2D objects ''' # Unpack pose values x_s = pose_s.x y_s = pose_s.y theta_s = pose_s.theta # Calculate parameters for the middle pose theta_m = theta_s + np.sign(r_1) * alpha_1 pos_s = np.array([x_s, y_s]) R1 = rotZ2D(theta_s) pos_m = pos_s + np.dot( R1, np.array( [np.abs(r_1) * np.sin(alpha_1), r_1 * (1 - np.cos(alpha_1))])) # Make second parameters opposite signs of the first r_2 = -np.sign(r_1) * r_2 alpha_2 = -np.sign(alpha_1) * alpha_2 # Calculate parameters for the final pose theta_f = theta_m + np.sign(r_2) * alpha_2 R2 = rotZ2D(theta_m) pos_f = pos_m + np.dot( R2, np.array( [np.abs(r_2) * np.sin(alpha_2), r_2 * (1 - np.cos(alpha_2))])) # Save middle and final pose as Pose2D objects pose_m = Pose2D(pos_m[0], pos_m[1], theta_m) pose_f = Pose2D(pos_f[0], pos_f[1], theta_f) return [pose_m, pose_f]
def gen_amp_mod_At(T, D_roi): At = np.zeros((T, D_roi, D_roi)) At = np.array([0.4 * np.eye(D_roi) for _ in range(T)]) f01 = np.sin(np.linspace(0., 2 * np.pi, num=T)) f10 = -4. * np.sin(np.linspace(0., 2 * np.pi, num=T) + 1.2) * f01 At[:, 0, 1] = f01 * np.random.rand() * np.sign(np.random.randn()) At[:, 1, 0] = f10 * np.random.rand() * np.sign(np.random.randn()) At[-1] = np.zeros((D_roi, D_roi)) return At
def inds_to_effect_change(leverage, desired_delta): # Argsort sorts low to high. # We are removing points, so multiply by -1. sort_inds = np.argsort(leverage * np.sign(desired_delta)) deltas = -1 * np.cumsum(leverage[sort_inds]) change_sign_inds = np.argwhere( np.sign(desired_delta) * (desired_delta - deltas) <= 0.) if len(change_sign_inds) > 0: first_ind_change_sign = np.min(change_sign_inds) remove_inds = sort_inds[:(first_ind_change_sign + 1)] return remove_inds else: return None
def run_gradient_descent(self): w = self.w_init self.w_hist = [] self.w_hist.append(w) j = 0 for j in range(int(self.max_its)): # plug in value into func and derivative grad_eval = self.grad(w) ### L1, L2, or Linf? L2 by default ### # L1 steepest descent if self.version == 'L1': # take absolute value of each entry in grad vector grad_abs = np.abs(grad_eval) best_val = np.max(grad_abs) ind_best = np.argwhere(grad_abs == best_val) new_grad = np.zeros((len(grad_eval))) new_grad[ind_best] = np.sign(grad_eval[ind_best]) grad_eval = new_grad # Linf steepest descent elif self.version == 'Linf': grad_eval = np.sign(grad_eval) # normaize direction, if everything is perfectly zero then move in random direction grad_norm = np.linalg.norm(grad_eval) if grad_norm == 0: grad_eval = 2 * np.random.rand(len(w)) - 1 grad_norm = np.linalg.norm(grad_eval) grad_eval /= grad_norm else: grad_eval /= grad_norm ### check what sort of steplength rule to employ ### alpha = 0 if self.steplength == 'diminishing': alpha = 1 / (1 + j) elif self.steplength == 'backtracking': alpha = self.backtracking(w, grad_eval) elif self.steplength == 'exact': alpha = self.exact(w, grad_eval) else: alpha = float(self.steplength) # take gradient descent step w = w - alpha * grad_eval # record self.w_hist.append(w)
def corr_spline_grad(D, theta): ss = np.zeros(D.shape) xi = np.abs(D) * theta I = np.where(xi <= 0.2) if len(I) > 0: ss[I] = 1 - xi[I]**2 * (15 - 30 * xi[I]) I = np.where(np.logical_and(xi > 0.2, xi < 1.0)) if len(I) > 0: ss[I] = 1.25 * (1 - xi[I])**3 dr = np.zeros(D.shape) m, n = D.shape u = np.sign(D) * theta I = np.where(u <= 0.2) if len(I) > 0: dr[I] = u[I] * ((90 * xi[I] - 30) * xi[I]) I = np.where(np.logical_and(xi > 0.2, xi < 1.0)) if len(I) > 0: dr[I] = -3.75 * u[I] * (1 - xi[I]**2) for j in range(n): _ss = np.copy(ss) _ss[:, j] = dr[:, j] dr[:, j] = np.prod(_ss, axis=1) return dr
def gradient_descent_beta(g, w, alpha, max_its, beta, version): # flatten the input function, create gradient based on flat function g_flat, unflatten, w = flatten_func(g, w) grad = compute_grad(g_flat) # record history w_hist = [] w_hist.append(unflatten(w)) # start gradient descent loop z = np.zeros((np.shape(w))) # momentum term # over the line for k in range(max_its): # plug in value into func and derivative grad_eval = grad(w) grad_eval.shape = np.shape(w) ### normalized or unnormalized descent step? ### if version == 'normalized': grad_norm = np.linalg.norm(grad_eval) if grad_norm == 0: grad_norm += 10**-6 * np.sign(2 * np.random.rand(1) - 1) grad_eval /= grad_norm # take descent step with momentum z = beta * z + grad_eval w = w - alpha * z # record weight update w_hist.append(unflatten(w)) return w_hist
def _pHfromTAVX(TA, VX, totals, k_constants, initialfunc, deltafunc): """Calculate pH from total alkalinity and DIC or one of its components using a Newton-Raphson iterative method. Although it is coded for H on the total pH scale, for the pH values occuring in seawater (pH > 6) it will be equally valid on any pH scale (H terms negligible) as long as the K Constants are on that scale. Based on the CalculatepHfromTA* functions, version 04.01, Oct 96, by Ernie Lewis. """ # First guess inspired by M13/OE15, added v1.3.0: pH = initialfunc(TA, VX, totals["TB"], k_constants["K1"], k_constants["K2"], k_constants["KB"]) deltapH = 1.0 + pHTol while np.any(np.abs(deltapH) >= pHTol): pHdone = np.abs( deltapH) < pHTol # check which rows don't need updating deltapH = deltafunc(pH, TA, VX, totals, k_constants) # the pH jump # To keep the jump from being too big: abs_deltapH = np.abs(deltapH) np.sign_deltapH = np.sign(deltapH) # Jump by 1 instead if `deltapH` > 5 deltapH = np.where(abs_deltapH > 5.0, np.sign_deltapH, deltapH) # Jump by 0.5 instead if 1 < `deltapH` < 5 deltapH = np.where( (abs_deltapH > 0.5) & (abs_deltapH <= 5.0), 0.5 * np.sign_deltapH, deltapH, ) # assumes that once we're within 1 of the correct pH, we will converge pH = np.where(pHdone, pH, pH + deltapH) # only update rows that need it return pH
def draw_decision_boundary(self,ax,**kwargs): # control viewing limits minx = min(self.x[0,:]) maxx = max(self.x[0,:]) gapx = (maxx - minx)*0.1 minx -= gapx maxx += gapx miny = min(self.x[1,:]) maxy = max(self.x[1,:]) gapy = (maxy - miny)*0.1 miny -= gapy maxy += gapy r = np.linspace(minx,maxx,200) s = np.linspace(miny,maxy,200) w1_vals,w2_vals = np.meshgrid(r,s) w1_vals.shape = (len(r)**2,1) w2_vals.shape = (len(s)**2,1) h = np.concatenate([w1_vals,w2_vals],axis = 1) g_vals = self.model(h.T) g_vals = np.asarray(g_vals) # vals for cost surface w1_vals.shape = (len(r),len(s)) w2_vals.shape = (len(r),len(s)) g_vals.shape = (len(r),len(s)) # plot separator curve in right plot ax.contour(w1_vals,w2_vals,g_vals,colors = 'k',levels = [0],linewidths = 3,zorder = 1) # plot color filled contour based on separator g_vals = np.sign(g_vals) + 1 ax.contourf(w1_vals,w2_vals,g_vals,colors = self.color_opts[:],alpha = 0.1,levels = range(0,2+1))
def test_sign(): fun = lambda x : 3.0 * np.sign(x) d_fun = grad(fun) check_grads(fun, 1.1) check_grads(fun, -1.1) check_grads(d_fun, 1.1) check_grads(d_fun, -1.1)
def soft_thr(x, lambdaPar, lower=None, upper=None): out = np.sign(x) * np.fmax(np.abs(x) - lambdaPar, 0) if (lower != None): out[out < lower] = 0.0 if (upper != None): out[out > upper] = 0.0 return out
def __call__(self, state): x, theta, x_dot, theta_dot = state if theta > np.pi: alpha = - (2. * np.pi - theta) else: alpha = theta dyna = self.dynamics Mp = self.dynamics.mp pl = self.dynamics.pl Jp = self.dynamics.Jp Ek = Jp/2. * theta_dot**2 Ep = Mp * dyna.g * pl * (1. - np.cos(theta + np.pi)) # E(pi) = 0., E(0) = E(2pi) = 2 mgl Er = 2 * Mp * dyna.g * pl # = 2 mgl if np.abs(alpha) < 0.1745: u = np.matmul(self.Kpd, (np.array([x, alpha, x_dot, theta_dot]))) else: self.u_max = 180 u = np.clip(self.ke * ((Ep + Ek) - Er) * np.sign(theta_dot * np.cos(theta + np.pi)) + self.kp * (0.0 - x), -self.u_max, self.u_max) Vm = (dyna.Jeq * dyna.Rm * dyna.r_mp * u) / (dyna.eta_g * dyna.Kg * dyna.eta_m * dyna.Kt)\ + dyna.Kg * dyna.Km * x_dot / dyna.r_mp Vm = np.clip(Vm, -self.v_max, self.v_max) return np.array([Vm])
def run_gradient_descent(self): w = self.w_init self.w_hist = [] self.w_hist.append(w) w_old = np.inf j = 0 for j in range(int(self.max_its)): # update old w and index w_old = w # plug in value into func and derivative grad_eval = self.grad(w) # normalized or unnormalized? if self.version == 'normalized': grad_norm = np.linalg.norm(grad_eval) if grad_norm == 0: grad_norm += 10**-6 * np.sign(2 * np.random.rand(1) - 1) grad_eval /= grad_norm # check if diminishing steplength rule used alpha = 0 if self.steplength == 'diminishing': alpha = 1 / (1 + j) else: alpha = float(self.steplength) # take gradient descent step w = w - alpha * grad_eval # record self.w_hist.append(w)
def erf(x): cst_erf = 8.0 / (3.0 * np.pi) * (np.pi - 3.0) / (4.0 - np.pi) return \ np.sign(x) * \ np.sqrt(1 - np.exp(-x * x * (4 / np.pi + cst_erf * x * x) / (1 + cst_erf * x * x)))
def _center_cart(self, verbose=False): t_max = 10.0 if verbose: print("\tCentering the Cart:\t\t", end="") # Center the cart: t0 = time.time() state = self._zero_sim_step() while (time.time() - t0) < t_max: a = -np.sign(state[0]) * 1.5 * np.ones(1) state = self._sim_step(a) if np.abs(state[0]) <= self.c_lim / 10.: break # Stop the Cart: state = self._zero_sim_step() time.sleep(0.5) if np.abs(state[0]) > self.c_lim: if verbose: print("\u274C") time.sleep(0.1) raise RuntimeError( "Centering of the cart failed. |x| = {0:.2f} > {1:.2f}".format( np.abs(state[0]), self.c_lim)) elif verbose: print("\u2713")
def run_gradient_descent(self, alpha): w = self.w_init self.w_hist = [] self.w_hist.append(w) w_old = np.inf j = 0 for j in range(int(self.max_its)): # update old w and index w_old = w # plug in value into func and derivative grad_eval = float(self.grad(w)) # normalized or unnormalized? if self.version == 'normalized': grad_norm = abs(grad_eval) if grad_norm == 0: grad_norm += 10**-6 * np.sign(2 * np.random.rand(1) - 1) grad_eval /= grad_norm # take gradient descent step w = w - alpha * grad_eval # record self.w_hist.append(w)
def multiclass_counting_cost(self, w): all_evals = self.model(self.x, w) y_predict = (np.argmax(all_evals, axis=1))[:, np.newaxis] count = np.sum(np.abs(np.sign(self.y - y_predict))) return count
def plot_fit(self, weights, **kwargs): # construct figure fig, axs = plt.subplots(1, 3, figsize=(9, 4)) # create subplot with 2 panels gs = gridspec.GridSpec(1, 3, width_ratios=[1, 5, 1]) ax1 = plt.subplot(gs[0]) ax1.axis('off') ax = plt.subplot(gs[1]) ax3 = plt.subplot(gs[2]) ax3.axis('off') # set plotting limits xmax = copy.deepcopy(max(self.x)) xmin = copy.deepcopy(min(self.x)) xgap = (xmax - xmin) * 0.25 xmin -= xgap xmax += xgap ymax = max(self.y) ymin = min(self.y) ygap = (ymax - ymin) * 0.25 ymin -= ygap ymax += ygap # initialize points ax.scatter(self.x, self.y, color='k', edgecolor='w', linewidth=0.9, s=80) # clean up panel ax.set_xlim([xmin, xmax]) ax.set_ylim([ymin, ymax]) # label axes ax.set_xlabel(r'$x$', fontsize=12) ax.set_ylabel(r'$y$', rotation=0, fontsize=12) # create fit s = np.linspace(xmin, xmax, 300) colors = ['k', 'magenta'] if 'colors' in kwargs: colors = kwargs['colors'] c = 0 transformer = lambda a: a if 'transformer' in kwargs: transformer = kwargs['transformer'] # plot approximation l = weights[0] + weights[1] * transformer(s) t = np.tanh(l).flatten() ax.plot(s, t, linewidth=2, color='r', zorder=3) # plot counting cost t = np.sign(l).flatten() ax.plot(s, t, linewidth=4, color='b', zorder=2)
def full_relaxed_fit(self): #print('full relaxed fit', X.shape, y.shape) self.c = 0 step = 0 for epoch in tqdm(range(self.n_epoch)): random.shuffle(self.i_['train']) for i_batch in range(self.n_batches): step += 1 if step % 100 == 0: self.compute_metrics(step) self.loss(self.mu, self.w, self.V, self.item_bias, self.item_embed, self.item_slopes, display=True) if self.fair and step > 0 and step % 50 == 0: auc_1 = self.relaxed_auc('valid', '_1', self.mu, self.w, self.V, self.item_bias, self.item_embed, self.item_slopes, 10000) auc_0 = self.relaxed_auc('valid', '_0', self.mu, self.w, self.V, self.item_bias, self.item_embed, self.item_slopes, 10000) self.c += np.sign(auc_1 - auc_0) * 0.01 self.c = np.clip(self.c, -1, 1) self.prepare_batch(i_batch) # for z in range(2): # for y in range(2): # print(z, y, getattr(self, 'X_batch_{}_{}'.format(y, z))[:5]) # Display batch # self.mu -= self.GAMMA * grad(lambda mu: self.loss(X, y, mu, self.w, self.V))(self.mu) gradient = grad(lambda w: self.auc_loss( self.c, self.mu, w, self.V, self.item_bias, self. item_embed, self.item_slopes))(self.w) self.w -= self.GAMMA * gradient self.item_bias -= self.GAMMA * grad( lambda item_bias: self.auc_loss( self.c, self.mu, self.w, self.V, item_bias, self. item_embed, self.item_slopes))(self.item_bias) self.item_slopes -= self.GAMMA * grad( lambda item_slopes: self.auc_loss( self.c, self.mu, self.w, self.V, self.item_bias, self. item_embed, item_slopes))(self.item_slopes) if self.GAMMA_V: self.V -= self.GAMMA_V * grad(lambda V: self.auc_loss( self.c, self.mu, self.w, V, self.item_bias, self. item_embed, self.item_slopes))(self.V) self.item_embed -= self.GAMMA_V * grad( lambda item_embed: self.auc_loss( self.c, self.mu, self.w, self.V, self.item_bias, item_embed, self.item_slopes))(self.item_embed)
def multiclass_counter(self,W): # make predictions y_predict = self.fusion_rule(W) # compare to actual labels misclassifications = int(sum([abs(np.sign(a - b)) for a,b in zip(self.y,y_predict)])) return misclassifications
def counting_cost(self, w): cost = 0 for p in range(0, len(self.y)): x_p = self.x[p, :] y_p = self.y[p] a_p = w[0] + np.sum([u * v for (u, v) in zip(x_p, w[1:])]) cost += (np.sign(a_p) - y_p)**2 return cost
def counting_cost(self, w): cost = 0 for p in range(0, len(self.y)): x_p = copy.deepcopy(self.x[p, :]) x_p.shape = (len(x_p), 1) y_p = self.y[p] cost += (np.sign(w[0] + np.dot(w[1:].T, x_p)) - y_p)**2 return cost
def counting_cost(self, w): cost = 0 for p in range(0, len(self.y)): x_p = self.x[p] y_p = self.y[p] a_p = w[0] + sum([a * b for a, b in zip(w[1:], x_p)]) cost += (np.sign(a_p) - y_p)**2 return 0.25 * cost
def PhotometricError(iref, inew, R, T, points, D): # points is a tuple ([y], [x]); convert to homogeneous siz = iref.shape npoints = len(points[0]) f = siz[1] # focal length, FIXME Xref = np.vstack(( (points[1] - siz[1] * 0.5) / f, # x (siz[0] * 0.5 - points[0]) / f, # y (left->right hand) np.ones(npoints))) # z = 1 # this is confusingly written -- i am broadcasting the translation T to # every column, but numpy broadcasting only works if it's rows, hence all # the transposes # print D * Xref Xnew = (np.dot(so3.exp(R), (D * Xref)).T + T).T # print Xnew # right -> left hand projection proj = Xnew[0:2] / Xnew[2] p = (-proj[1] * f + siz[0] * 0.5, proj[0] * f + siz[1] * 0.5) margin = 10 # int(siz[0] / 5) inwindow_mask = ((p[0] >= margin) & (p[0] < siz[0] - margin - 1) & (p[1] >= margin) & (p[1] < siz[1] - margin - 1)) npts_inw = sum(inwindow_mask) if npts_inw < 10: return 1e6, np.zeros(6 + npoints) # todo: filter points which are now out of the window oldpointidxs = (points[0][inwindow_mask], points[1][inwindow_mask]) newpointidxs = (p[0][inwindow_mask], p[1][inwindow_mask]) origpointidxs = np.nonzero(inwindow_mask)[0] E = InterpolatedValues(inew, newpointidxs) - iref[oldpointidxs] # dE/dk -> # d/dk r_p^2 = d/dk (Inew(w(r, T, D, p)) - Iref(p))^2 # = -2r_p dInew/dp dp/dw dw/dX dX/dk # = -2r_p * g(w(r, T, D, p)) * dw(r, T, D, p) # intensity gradients for each point Ig = InterpolatedGradients(inew, newpointidxs) # TODO: use tensors for this # gradients for R, T, and D gradient = np.zeros(6 + npoints) for i in range(npts_inw): # print 'newidx (y,x) = ', newpointidxs[0][i], newpointidxs[1][i] # Jacobian of w oi = origpointidxs[i] Jw = dw(Xref[0][oi], Xref[1][oi], D[oi], R, T) # scale back up into pixel space, right->left hand coords to get # Jacobian of p Jp = f * np.vstack((-Jw[1], Jw[0])) # print origpointidxs[i], 'Xref', Xref[:, i], 'Ig', Ig[:, i], \ # 'dwdRz', Jw[:, 2], 'dpdRz', Jp[:, 2] # full Jacobian = 2*E + Ig * Jp J = np.sign(E[i]) * np.dot(Ig[:, i], Jp) # print '2 E[i]', 2*E[i], 'Ig*Jp', np.dot(Ig[:, i], Jp) gradient[:6] += J[:6] # print J[:6] gradient[6 + origpointidxs[i]] += J[6] print R, T, np.sum(np.abs(E)), npts_inw # return ((0.2*(npoints - npts_inw) + np.dot(E, E)), gradient) return np.sum(np.abs(E)) / (npts_inw), gradient / (npts_inw)
def PhotometricError(iref, inew, R, T, points, D): # points is a tuple ([y], [x]); convert to homogeneous siz = iref.shape npoints = len(points[0]) f = siz[1] # focal length, FIXME Xref = np.vstack(((points[1] - siz[1]*0.5) / f, # x (siz[0]*0.5 - points[0]) / f, # y (left->right hand) np.ones(npoints))) # z = 1 # this is confusingly written -- i am broadcasting the translation T to # every column, but numpy broadcasting only works if it's rows, hence all # the transposes # print D * Xref Xnew = (np.dot(so3.exp(R), (D * Xref)).T + T).T # print Xnew # right -> left hand projection proj = Xnew[0:2] / Xnew[2] p = (-proj[1]*f + siz[0]*0.5, proj[0]*f + siz[1]*0.5) margin = 10 # int(siz[0] / 5) inwindow_mask = ((p[0] >= margin) & (p[0] < siz[0]-margin-1) & (p[1] >= margin) & (p[1] < siz[1]-margin-1)) npts_inw = sum(inwindow_mask) if npts_inw < 10: return 1e6, np.zeros(6 + npoints) # todo: filter points which are now out of the window oldpointidxs = (points[0][inwindow_mask], points[1][inwindow_mask]) newpointidxs = (p[0][inwindow_mask], p[1][inwindow_mask]) origpointidxs = np.nonzero(inwindow_mask)[0] E = InterpolatedValues(inew, newpointidxs) - iref[oldpointidxs] # dE/dk -> # d/dk r_p^2 = d/dk (Inew(w(r, T, D, p)) - Iref(p))^2 # = -2r_p dInew/dp dp/dw dw/dX dX/dk # = -2r_p * g(w(r, T, D, p)) * dw(r, T, D, p) # intensity gradients for each point Ig = InterpolatedGradients(inew, newpointidxs) # TODO: use tensors for this # gradients for R, T, and D gradient = np.zeros(6 + npoints) for i in range(npts_inw): # print 'newidx (y,x) = ', newpointidxs[0][i], newpointidxs[1][i] # Jacobian of w oi = origpointidxs[i] Jw = dw(Xref[0][oi], Xref[1][oi], D[oi], R, T) # scale back up into pixel space, right->left hand coords to get # Jacobian of p Jp = f * np.vstack((-Jw[1], Jw[0])) # print origpointidxs[i], 'Xref', Xref[:, i], 'Ig', Ig[:, i], \ # 'dwdRz', Jw[:, 2], 'dpdRz', Jp[:, 2] # full Jacobian = 2*E + Ig * Jp J = np.sign(E[i]) * np.dot(Ig[:, i], Jp) # print '2 E[i]', 2*E[i], 'Ig*Jp', np.dot(Ig[:, i], Jp) gradient[:6] += J[:6] # print J[:6] gradient[6+origpointidxs[i]] += J[6] print R, T, np.sum(np.abs(E)), npts_inw # return ((0.2*(npoints - npts_inw) + np.dot(E, E)), gradient) return np.sum(np.abs(E)) / (npts_inw), gradient / (npts_inw)
def sample_prior(kld_sampler, n_layers, n_hid_units, is_ResNet, n_inv_steps=20, alpha=.00005): ### DEFINE FUNCTIONS # define E[KLD] function def expected_KLD(log_tau, prev_log_taus, n_layers, mc_samples=3, sigma2_y=1.): tau = softplus(log_tau) prev_taus = softplus(prev_log_taus) kld_accum = 0. for s_idx in range(mc_samples): if is_ResNet: f0 = fprop(0., prev_taus, n_layers, n_hid_units, is_ResNet) else: f0 = fprop(-1, prev_taus, n_layers, n_hid_units, is_ResNet) f1 = fprop(tau, prev_taus, n_layers, n_hid_units, is_ResNet) kld_accum += np.mean((f0 - f1)**2 / (2 * sigma2_y)) return kld_accum / mc_samples # define grad dEKLD_dTau = grad(expected_KLD) ### RUN ITERATIVE SAMPLING log_tau_samples = np.random.uniform(low=-2, high=-1, size=(n_layers, )) for layer_idx in range(n_layers): k_hat = kld_sampler() for t_idx in range(n_inv_steps): ekld = expected_KLD(log_tau=log_tau_samples[layer_idx], prev_log_taus=log_tau_samples[:layer_idx], n_layers=n_layers) if not np.isfinite(ekld): continue ekld_prime = dEKLD_dTau(log_tau_samples[layer_idx], log_tau_samples[:layer_idx], n_layers) if not np.isfinite(ekld_prime): continue if np.abs(ekld_prime) < .1: ekld_prime = np.sign(ekld_prime) * .1 log_tau_samples[layer_idx] = log_tau_samples[layer_idx] - alpha / ( ekld_prime) * (ekld - k_hat) return softplus(log_tau_samples)
def build_checker_dataset(n_data=6, noise_std=0.1): rs = npr.RandomState(0) inputs = np.array([ np.array([x, y]) for x in np.linspace(-1, 1, n_data) for y in np.linspace(-1, 1, n_data) ]) targets = np.sign([np.prod(input) for input in inputs]) + rs.randn(n_data**2) * noise_std return inputs, targets
def draw_fit(self,ax,run,ind): # viewing ranges xmin1 = min(copy.deepcopy(self.x[0,:])) xmax1 = max(copy.deepcopy(self.x[0,:])) xgap1 = (xmax1 - xmin1)*0.05 xmin1 -= xgap1 xmax1 += xgap1 xmin2 = min(copy.deepcopy(self.x[1,:])) xmax2 = max(copy.deepcopy(self.x[1,:])) xgap2 = (xmax2 - xmin2)*0.05 xmin2 -= xgap2 xmax2 += xgap2 ymin = min(copy.deepcopy(self.y)) ymax = max(copy.deepcopy(self.y)) ygap = (ymax - ymin)*0.05 ymin -= ygap ymax += ygap # plot boundary for 2d plot r1 = np.linspace(xmin1,xmax1,300) r2 = np.linspace(xmin2,xmax2,300) s,t = np.meshgrid(r1,r2) s = np.reshape(s,(np.size(s),1)) t = np.reshape(t,(np.size(t),1)) h = np.concatenate((s,t),axis = 1).T # plot total fit cost = run.cost model = run.model feat = run.feature_transforms normalizer = run.normalizer cost_history = run.train_cost_histories[0] weight_history = run.weight_histories[0] # get best weights win = np.argmin(cost_history) w = weight_history[win] model = lambda b: run.model(normalizer(b),w) z = model(h) z = np.sign(z) # reshape it s.shape = (np.size(r1),np.size(r2)) t.shape = (np.size(r1),np.size(r2)) z.shape = (np.size(r1),np.size(r2)) #### plot contour, color regions #### ax.contour(s,t,z,colors='k', linewidths=2.5,levels = [0],zorder = 2) ax.contourf(s,t,z,colors = [self.colors[1],self.colors[0]],alpha = 0.15,levels = range(-1,2)) ### cleanup left plots, create max view ranges ### ax.set_xlim([xmin1,xmax1]) ax.set_ylim([xmin2,xmax2]) ax.set_title(str(ind+1) + ' units fit to data',fontsize = 14)
def linear_prediction(x, w, b, neg=0, binary=True): guesses = np.matmul(x, w.transpose()) + b if binary: prediction = np.array(np.sign(guesses), dtype=int) if neg == 0: prediction[prediction == -1] = 0 else: prediction = guesses return prediction
def outputs(weights, input_set, fence_set, output_set=None, return_pred_set=False): update_x_weights = parser.get(weights, 'update_x_weights') update_h_weights = parser.get(weights, 'update_h_weights') reset_x_weights = parser.get(weights, 'reset_x_weights') reset_h_weights = parser.get(weights, 'reset_h_weights') thidden_x_weights = parser.get(weights, 'thidden_x_weights') thidden_h_weights = parser.get(weights, 'thidden_h_weights') output_h_weights = parser.get(weights, 'output_h_weights') data_count = len(fence_set) - 1 feat_count = input_set.shape[0] ll = 0.0 n_i_track = 0 fence_base = fence_set[0] pred_set = None if return_pred_set: pred_set = np.zeros((output_count, input_set.shape[1])) # loop through sequences and time steps for data_iter in range(data_count): hiddens = copy(parser.get(weights, 'init_hiddens')) fence_post_1 = fence_set[data_iter] - fence_base fence_post_2 = fence_set[data_iter + 1] - fence_base time_count = fence_post_2 - fence_post_1 curr_input = input_set[:, fence_post_1:fence_post_2] for time_iter in range(time_count): hiddens = update( np.expand_dims(np.hstack((curr_input[:, time_iter], 1)), axis=0), hiddens, update_x_weights, update_h_weights, reset_x_weights, reset_h_weights, thidden_x_weights, thidden_h_weights) if output_set is not None: # subtract a small number so -1 out_proba = sigmoid( np.sign(output_set[:, n_i_track] - 1e-3) * np.dot(hiddens, output_h_weights)) out_lproba = safe_log(out_proba) ll += np.sum(out_lproba) else: out_proba = sigmoid(np.dot(hiddens, output_h_weights)) out_lproba = safe_log(out_proba) if return_pred_set: pred_set[:, n_i_track] = out_lproba[0] n_i_track += 1 return ll, pred_set
def test_sign(): fun = lambda x : 3.0 * np.sign(x) check_grads(fun)(1.1) check_grads(fun)(-1.1)
defvjp(j1,lambda ans, x: lambda g: g * (j0(x) - jn(2, x)) / 2.0) defvjp(y1,lambda ans, x: lambda g: g * (y0(x) - yn(2, x)) / 2.0) defvjp(jn, None, lambda ans, n, x: lambda g: g * (jn(n - 1, x) - jn(n + 1, x)) / 2.0) defvjp(yn, None, lambda ans, n, x: lambda g: g * (yn(n - 1, x) - yn(n + 1, x)) / 2.0) ### Faster versions of common Bessel functions ### i0 = primitive(scipy.special.i0) i1 = primitive(scipy.special.i1) iv = primitive(scipy.special.iv) ive = primitive(scipy.special.ive) defvjp(i0, lambda ans, x: lambda g: g * i1(x)) defvjp(i1, lambda ans, x: lambda g: g * (i0(x) + iv(2, x)) / 2.0) defvjp(iv, None, lambda ans, n, x: lambda g: g * (iv(n - 1, x) + iv(n + 1, x)) / 2.0) defvjp(ive, None, lambda ans, n, x: lambda g: g * (ans * (n / x - np.sign(x)) + ive(n + 1, x))) ### Error Function ### inv_root_pi = 0.56418958354775627928 erf = primitive(scipy.special.erf) erfc = primitive(scipy.special.erfc) defvjp(erf, lambda ans, x: lambda g: 2.*g*inv_root_pi*np.exp(-x**2)) defvjp(erfc,lambda ans, x: lambda g: -2.*g*inv_root_pi*np.exp(-x**2)) ### Inverse error function ### root_pi = 1.7724538509055159 erfinv = primitive(scipy.special.erfinv) erfcinv = primitive(scipy.special.erfcinv)
def build_checker_dataset(n_data = 6, noise_std =0.1): rs = npr.RandomState(0) inputs = np.array([np.array([x,y]) for x in np.linspace(-1,1,n_data) for y in np.linspace(-1,1,n_data)]) targets = np.sign([np.prod(input) for input in inputs]) + rs.randn(n_data**2)*noise_std return inputs, targets
def build_step_function_dataset(D=1, n_data=40, noise_std=0.1): rs = npr.RandomState(0) inputs = np.linspace(-2, 2, num=n_data) targets = np.sign(inputs) + rs.randn(n_data) * noise_std inputs = inputs.reshape((len(inputs), D)) return inputs, targets
def predict(self, X=None): predictions = self._predict(X) return np.sign(predictions)