def forward_pass(self, inputs, param_vector): new_shape = inputs.shape[:2] for i in [0, 1]: pool_width = self.pool_shape[i] img_width = inputs.shape[i + 2] new_shape += (pool_width, img_width / pool_width) result = inputs.reshape(new_shape) return np.max(np.max(result, axis=2), axis=3)
def image_bbox(params, img): img_ymax, img_xmax = img.nelec.shape px, py = img.equa2pixel(params.u) xlim = (np.max([0, int(np.floor(px - pixel_radius))]), np.min([img_xmax, int(np.ceil(px + pixel_radius))])) ylim = (np.max([0, int(np.floor(py - pixel_radius))]), np.min([img_ymax, int(np.ceil(py + pixel_radius))])) return xlim, ylim
def get_bounding_box(params, img): if params.is_star(): bound = img.R elif params.is_galaxy(): bound = gal_funs.gen_galaxy_psf_image_bound(params, img) else: raise "source type unknown" px, py = img.equa2pixel(params.u) xlim = (np.max([0, np.floor(px - bound)]), np.min([img.nelec.shape[1], np.ceil(px + bound)])) ylim = (np.max([0, np.floor(py - bound)]), np.min([img.nelec.shape[0], np.ceil(py + bound)])) return xlim, ylim
def compare_deltas(baseline=None, candidate=None, abs_tol=1e-5, rel_tol=0.01): # TODO: maybe add relative tolerance check epsilon = 1e-25 if baseline.shape != candidate.shape: return False diff_tensor = np.abs(baseline - candidate) rel_tensor1 = diff_tensor / (np.abs(baseline) + 1e-25) rel_tensor2 = diff_tensor / (np.abs(candidate) + 1e-25) max_error = np.max(diff_tensor) max_rel = max(np.max(rel_tensor1), np.max(rel_tensor2)) if max_error > abs_tol and max_rel > rel_tol: return False else: return True
def callback(weights, iter): if iter % 10 == 0: print "max of weights", np.max(np.abs(weights)) train_preds = undo_norm(pred_fun(weights, train_smiles)) cur_loss = loss_fun(weights, train_smiles, train_targets) training_curve.append(cur_loss) print "Iteration", iter, "loss", cur_loss, "train RMSE", rmse(train_preds, train_raw_targets), if validation_smiles is not None: validation_preds = undo_norm(pred_fun(weights, validation_smiles)) print "Validation RMSE", iter, ":", rmse(validation_preds, validation_raw_targets),
def plot_data_and_pred(x, y, model, draw_verticals=True): x_range = np.linspace(np.min(x), np.max(x), 100) yhat_range = model.predict(x_range) fig = plt.figure() ax = fig.add_subplot(111) ax.plot(x, y, 'o', label='observed') ax.plot(x_range, yhat_range, 'r-', label='predicted') if draw_verticals: # from observed value to predicted true yhat_sparse = model.predict(x) for x0, y0, yhat0 in zip(x, y, yhat_sparse): ax.plot([x0, x0],[y0, yhat0],'k-') plt.legend() #[line_pred, line_true], ['predicted', 'true'])
def callback(weights, iter): if iter % 10 == 0: print "max of weights", np.max(np.abs(weights)) # import pdb; pdb.set_trace() train_preds = undo_norm(pred_fun(weights, train_smiles[:num_print_examples])) cur_loss = loss_fun(weights, train_smiles[:num_print_examples], train_targets[:num_print_examples]) # V: refers to line number #78 i.e. # def loss_fun(weights, smiles, targets) of build_vanilla_net.py training_curve.append(cur_loss) print "Iteration", iter, "loss", cur_loss,\ "train RMSE", rmse(train_preds, train_raw_targets[:num_print_examples]), if validation_smiles is not None: validation_preds = undo_norm(pred_fun(weights, validation_smiles)) print "Validation RMSE", iter, ":", rmse(validation_preds, validation_raw_targets),
def logistic(x): """takes rows R^D vectors in general position and outputs R^D+1 vectors on the simplex. Input: x: aN x D+1 non-negative matrix such that each row sums to 1 Output: p: N x D matrix of real valued such that the softmax of x yields p Note: this is the inverse transformation of logit """ x = np.concatenate([x, np.zeros((x.shape[0], 1))], axis=1) x -= np.max(x, axis=1)[:,np.newaxis] # subtract off to prevent overflow p = np.exp(x) return p / np.sum(p, axis=1)[:,np.newaxis]
def forward_pass(self, X): self.last_input = X out_height, out_width = pooling_shape(self.pool_shape, X.shape, self.stride) n_images, n_channels, _, _ = X.shape col = image_to_column(X, self.pool_shape, self.stride, self.padding) col = col.reshape(-1, self.pool_shape[0] * self.pool_shape[1]) arg_max = np.argmax(col, axis=1) out = np.max(col, axis=1) self.arg_max = arg_max return out.reshape(n_images, out_height, out_width, n_channels).transpose(0, 3, 1, 2)
def _decode_map(self, data): # adapted hmmlearn framelogprob = self._compute_log_likelihood(data) logprob, fwdlattice = self._do_forward_pass(framelogprob) bwdlattice = self._do_backward_pass(framelogprob) gamma = fwdlattice + bwdlattice # gamma is guaranteed to be correctly normalized by logprob at # all frames, unless we do approximate inference using pruning. # So, we will normalize each frame explicitly in case we # pruned too aggressively. posteriors = np.exp(gamma.T - logsumexp(gamma, axis=1)).T posteriors += np.finfo(np.float64).eps posteriors /= np.sum(posteriors, axis=1).reshape((-1, 1)) state_sequence = np.argmax(posteriors, axis=1) map_logprob = np.max(posteriors, axis=1).sum() return map_logprob, state_sequence
def logprob(weights, inputs, targets): log_prior = -L2_reg * np.sum(weights**2, axis=1) #preds = sigmoid(np.mean(predictions(weights, inputs), axis=0)) preds = predictions(weights, inputs) #log_lik = -np.sum((preds - targets)**2, axis=1)[:, 0] / noise_variance num_samples = preds.shape[0] exp_pred = np.exp(preds - np.max(preds,axis=2)[:,:,np.newaxis]) normalized_pred = exp_pred / np.sum(exp_pred, axis=2)[:,:,np.newaxis] log_lik = np.sum(targets * np.log(normalized_pred + 1e-6), axis=(1,2)) ''' log_lik = np.zeros(num_samples) for mm in xrange(preds.shape[0]): pred_sample = preds[mm,:,:] exp_pred = np.exp(pred_sample - np.max(pred_sample, axis=1, keepdims=True)) exp_pred /= exp_pred.sum() loglik_sample = log_prior[mm] - np.sum(targets * np.log(exp_pred + 1e-6)) log_lik[mm] = loglik_sample ''' return log_prior + log_lik
def plot_runtime(ex, fname, func_xvalues, xlabel, func_title=None): results = glo.ex_load_result(ex, fname) value_accessor = lambda job_results: job_results['time_secs'] vf_pval = np.vectorize(value_accessor) # results['test_results'] is a dictionary: # {'test_result': (dict from running perform_test(te) '...':..., } times = vf_pval(results['test_results']) repeats, _, n_methods = results['test_results'].shape time_avg = np.mean(times, axis=0) time_std = np.std(times, axis=0) xvalues = func_xvalues(results) #ns = np.array(results[xkey]) #te_proportion = 1.0 - results['tr_proportion'] #test_sizes = ns*te_proportion line_styles = exglo.func_plot_fmt_map() method_labels = exglo.get_func2label_map() func_names = [f.__name__ for f in results['method_job_funcs'] ] for i in range(n_methods): te_proportion = 1.0 - results['tr_proportion'] fmt = line_styles[func_names[i]] #plt.errorbar(ns*te_proportion, mean_rejs[:, i], std_pvals[:, i]) method_label = method_labels[func_names[i]] plt.errorbar(xvalues, time_avg[:, i], yerr=time_std[:,i], fmt=fmt, label=method_label) ylabel = 'Time (s)' plt.ylabel(ylabel) plt.xlabel(xlabel) plt.gca().set_yscale('log') plt.xlim([np.min(xvalues), np.max(xvalues)]) plt.xticks( xvalues, xvalues) plt.legend(loc='best') title = '%s. %d trials. '%( results['prob_label'], repeats ) if func_title is None else func_title(results) plt.title(title) #plt.grid() return results
def plotResults(dsi, res): # plot results fig, ax = plt.subplots(1, 2, figsize=(12, 4)) # Plot MAEs ax[0].plot(res["maes"], label="gradient") # add global baseline globalMAE = np.mean(np.abs(dsi.trueAlphas - dsi.globalAlphaHats.mean())) ax[0].hlines(globalMAE, 0, len(res["maes"]), color="black", label="global") ax[0].legend() ax[0].set_title("Gradient Method MAE") # Plot final alphaHat N = len(dsi.numU) K = len(dsi.globalAlphaHats) for i in range(N): ax[1].fill_between(np.array( [res["alphaHats"][i].min(), res["alphaHats"][i].max()]), y1=0, y2=dsi.numU[i] + .25, alpha=.25, color="red") ax[1].vlines(res["alphaHats"][i].mean(), 0, dsi.numU[i] + 1.5, color="red") ax[1].vlines(dsi.globalAlphaHats.mean(), 0, max(dsi.numU), color="black", label=r"$\hat{\alpha_{c_i}}$") ax[1].fill_between(np.array( [dsi.globalAlphaHats.min(), dsi.globalAlphaHats.max()]), y1=0, y2=np.max(dsi.numU), color="black", alpha=.25) for i in range(N): ax[1].fill_between(np.array( [dsi.alphaHats[i].min(), dsi.alphaHats[i].max()]), y1=0, y2=dsi.numU[i], color="blue", alpha=.25) ax[1].vlines(dsi.alphaHats.mean(1), 0, dsi.numU - .15, color="blue", label=r"$\hat{\alpha}_0$") ax[1].vlines(dsi.trueAlphas, 0, dsi.numU - .25, color="green", label=r"$\alpha$") ax[1].vlines(dsi.trueGlobalClassPrior, 0, dsi.numU.max(), color="orange", label=r"$\alpha_c$") ax[1].set_title("Alphas") # ax[1].set_xlim(0,1) ax[1].legend(loc="upper right", bbox_to_anchor=(1.25, 1)) # plot weights #ax[2].vlines(res["weights"],0,np.tile(dsi.numU,(K,1))) plt.show()
def fun(x): return to_scalar(np.max(x, axis=1)) d_fun = lambda x : to_scalar(grad(fun)(x))
def log_likelihood(all_params): # implement mini batches later? n_samples = 1 samples = [sample_mean_cov_from_deep_gp(all_params, X, True) for i in xrange(n_samples)] return logsumexp(np.array([mvn.logpdf(y,mean,var+1e-6*np.eye(len(var))*np.max(np.diag(var))) for mean,var in samples])) - np.log(n_samples) \ + evaluate_prior(all_params)
def hinge(actual, predicted): return np.mean(np.max(1. - actual * predicted, 0.))
def tf_nn(nx, nt, num_hidden_neurons, activations, num_iter=100000, eta=0.01): tf.reset_default_graph() # Set a seed to ensure getting the same results from every run tf.set_random_seed(4155) nx = 10 nt = 10 x_np = np.linspace(0, 1, nx) t_np = np.linspace(0, 1, nt) X, T = np.meshgrid(x_np, t_np) x = X.ravel() t = T.ravel() ## The construction phase zeros = tf.reshape(tf.convert_to_tensor(np.zeros(x.shape)), shape=(-1, 1)) x = tf.reshape(tf.convert_to_tensor(x), shape=(-1, 1)) t = tf.reshape(tf.convert_to_tensor(t), shape=(-1, 1)) pts = tf.concat([x, t], 1) # input layer num_hidden_layers = len(num_hidden_neurons) X = tf.convert_to_tensor(X) T = tf.convert_to_tensor(T) # Define layer structure with tf.name_scope('dnn'): num_hidden_layers = np.size(num_hidden_neurons) previous_layer = pts for l in range(num_hidden_layers): current_layer = tf.layers.dense(previous_layer, num_hidden_neurons[l], name=('hidden%d' % (l + 1)), activation=activations[l]) previous_layer = current_layer dnn_output = tf.layers.dense(previous_layer, 1, name='output', activation=None) # Define loss function # trial function satisfies boundary conditions and initial condition with tf.name_scope('loss'): g_t = (1 - t) * u(x) + x * (1 - x) * t * dnn_output g_t_d2x = tf.gradients(tf.gradients(g_t, x), x) g_t_dt = tf.gradients(g_t, t) loss = tf.losses.mean_squared_error(zeros, g_t_dt[0] - g_t_d2x[0]) # Define optimizer with tf.name_scope('train'): optimizer = tf.train.AdamOptimizer(eta) training_op = optimizer.minimize(loss) init = tf.global_variables_initializer() g_e = u_e(x, t) # g_dnn = None with tf.Session() as sess: init.run() for i in range(num_iter): sess.run(training_op) if i % 1000 == 0: print(loss.eval()) # g_e = g_e.eval() # g_dnn = g_t.eval() # # plot_g_e = g_e.eval().reshape((nt, nx)) # plot_g_dnn = g_t.eval().reshape((nt, nx)) # # plt.plot(x_np, plot_g_e[int(nt/2), :]) # plt.plot(x_np, plot_g_dnn[int(nt/2), :]) # plt.axis([0,1,0,0.1]) # plt.pause(0.001) # plt.clf() g_e = g_e.eval() # analytical solution g_dnn = g_t.eval() # NN solution diff = np.abs(g_e - g_dnn) print( 'Max absolute difference between analytical solution and TensorFlow DNN ', np.max(diff)) G_e = g_e.reshape((nt, nx)) G_dnn = g_dnn.reshape((nt, nx)) diff = diff.reshape((nt, nx)) # Plot the results X, T = np.meshgrid(x_np, t_np) fig = plt.figure(figsize=(10, 10)) ax = fig.gca(projection='3d') ax.set_title('Solution from the deep neural network w/ %d layer' % len(num_hidden_neurons)) s = ax.plot_surface(X, T, G_dnn, linewidth=0, antialiased=False, cmap=cm.viridis) ax.set_ylabel('Time $t$') ax.set_xlabel('Position $x$') fig = plt.figure(figsize=(10, 10)) ax = fig.gca(projection='3d') ax.set_title('Analytical solution') s = ax.plot_surface(X, T, G_e, linewidth=0, antialiased=False, cmap=cm.viridis) ax.set_ylabel('Time $t$') ax.set_xlabel('Position $x$') fig = plt.figure(figsize=(10, 10)) ax = fig.gca(projection='3d') ax.set_title('Difference') s = ax.plot_surface(X, T, diff, linewidth=0, antialiased=False, cmap=cm.viridis) ax.set_ylabel('Time $t$') ax.set_xlabel('Position $x$') ## Take some 3D slices indx1 = 0 indx2 = int(nt / 2) indx3 = nt - 1 t1 = t_np[indx1] t2 = t_np[indx2] t3 = t_np[indx3] # Slice the results from the DNN res1 = G_dnn[indx1, :] res2 = G_dnn[indx2, :] res3 = G_dnn[indx3, :] # Slice the analytical results res_analytical1 = G_e[indx1, :] res_analytical2 = G_e[indx2, :] res_analytical3 = G_e[indx3, :] # Plot the slices plt.figure(figsize=(10, 10)) plt.title("Computed solutions at time = %g" % t1) plt.plot(x_np, res1) plt.plot(x_np, res_analytical1) plt.legend(['dnn', 'analytical']) plt.figure(figsize=(10, 10)) plt.title("Computed solutions at time = %g" % t2) plt.plot(x_np, res2) plt.plot(x_np, res_analytical2) plt.legend(['dnn', 'analytical']) plt.figure(figsize=(10, 10)) plt.title("Computed solutions at time = %g" % t3) plt.plot(x_np, res3) plt.plot(x_np, res_analytical3) plt.legend(['dnn', 'analytical']) plt.show() return diff
def hinge(actual, predicted): return np.mean(np.max(1.0 - actual * predicted, 0.0))
def fit(self, X, y): times = [] start_time = time.time() # store the x min and max for normalization self.x_min = np.min(X, axis=0, keepdims=True) self.x_max = np.max(X, axis=0, keepdims=True) #normalize X X = self.normalize_X(X) # store the x min and max for normalization self.y_min = np.min(y, axis=0, keepdims=True) self.y_max = np.max(y, axis=0, keepdims=True) #normalize X y = self.normalize_Y(y) #store x and y for use in loss calculation self.x = X self.y = y # initiaize variables #Dimension d = X.shape[1] #w = [random.uniform(0, 1)] * (d) # initialize w vector to have dimension D #weights w = np.random.uniform(0,1,d) w_0 = random.uniform(0, 1) w = np.append(w, w_0) #diff diff = 1 #gradient gradient = grad(self.sto_loss_function) t = 0 #Delta check for loss function ''' delta = np.random.uniform(0,1,d+1) * 1e-5 print("Gradient Check") print(self.loss_function(w+delta) - self.loss_function(w)) print(np.matmul(gradient(w).transpose(),delta)) ''' while(diff > 1e-4): w_prev = w.copy() for i in range(self.steps): t += 1 # variant step size w -=((1/(10000*(1+t))) * gradient(w)) #w -=self.step_size * gradient(w) diff = (1 / (1+d)) * np.sum(np.abs(w - w_prev)) print(diff) #store weights self.w = w
def logsumexp(X): max_X = np.max(X, axis=-1)[..., np.newaxis] return max_X + np.log(np.sum(np.exp(X - max_X), axis=-1)[..., np.newaxis])
def static_N2_simple(self, ax, w_best, runner, train_valid): cost = runner.cost predict = runner.model feat = runner.feature_transforms normalizer = runner.normalizer inverse_nornalizer = runner.inverse_normalizer # or just take last weights self.w = w_best ### create boundary data ### xmin1 = np.min(self.x[0, :]) xmax1 = np.max(self.x[0, :]) xgap1 = (xmax1 - xmin1) * 0.05 xmin1 -= xgap1 xmax1 += xgap1 xmin2 = np.min(self.x[1, :]) xmax2 = np.max(self.x[1, :]) xgap2 = (xmax2 - xmin2) * 0.05 xmin2 -= xgap2 xmax2 += xgap2 ### loop over two panels plotting each ### # plot training points if train_valid == 'train': # reverse normalize data x_train = inverse_nornalizer(runner.x_train).T y_train = runner.y_train # plot data ind0 = np.argwhere(y_train == +1) ind0 = [v[1] for v in ind0] ax.scatter(x_train[ind0, 0], x_train[ind0, 1], s=45, color=self.colors[0], edgecolor=[0, 0.7, 1], linewidth=1, zorder=3) ind1 = np.argwhere(y_train == -1) ind1 = [v[1] for v in ind1] ax.scatter(x_train[ind1, 0], x_train[ind1, 1], s=45, color=self.colors[1], edgecolor=[0, 0.7, 1], linewidth=1, zorder=3) ax.set_title('training data', fontsize=15) if train_valid == 'validate': # reverse normalize data x_valid = inverse_nornalizer(runner.x_valid).T y_valid = runner.y_valid # plot testing points ind0 = np.argwhere(y_valid == +1) ind0 = [v[1] for v in ind0] ax.scatter(x_valid[ind0, 0], x_valid[ind0, 1], s=45, color=self.colors[0], edgecolor=[1, 0.8, 0.5], linewidth=1, zorder=3) ind1 = np.argwhere(y_valid == -1) ind1 = [v[1] for v in ind1] ax.scatter(x_valid[ind1, 0], x_valid[ind1, 1], s=45, color=self.colors[1], edgecolor=[1, 0.8, 0.5], linewidth=1, zorder=3) ax.set_title('validation data', fontsize=15) if train_valid == 'original': # plot all points ind0 = np.argwhere(self.y == +1) ind0 = [v[1] for v in ind0] ax.scatter(self.x[0, ind0], self.x[1, ind0], s=55, color=self.colors[0], edgecolor='k', linewidth=1, zorder=3) ind1 = np.argwhere(self.y == -1) ind1 = [v[1] for v in ind1] ax.scatter(self.x[0, ind1], self.x[1, ind1], s=55, color=self.colors[1], edgecolor='k', linewidth=1, zorder=3) ax.set_title('original data', fontsize=15) # cleanup panel ax.set_xlabel(r'$x_1$', fontsize=15) ax.set_ylabel(r'$x_2$', fontsize=15, rotation=0, labelpad=20) ax.xaxis.set_major_formatter(FormatStrFormatter('%.1f')) ax.yaxis.set_major_formatter(FormatStrFormatter('%.1f'))
def cross_validation(odom_1, aligned_1, odom_2, aligned_2, type_1, type_2, K=10): """Function to run cross-validation to run nonlinear optimization for optimal pose estimation and evaluation. Performs cross-validation K times and splits the dataset into K (approximately) even splits, to be used for in-sample training and out-of-sample evaluation. This function estimates a relative transformation between two lidar frames using nonlinear optimization, and evaluates the robustness of this estimate through K-fold cross-validation performance of our framework. Though this function does not return any values, it saves all results in the 'results' relative path. Parameters: odom_1 (pd.DataFrame): DataFrame corresponding to odometry data for the pose we wish to transform into the odom_2 frame of reference. See data/main_odometry.csv for an example of the headers/columns/data types this function expects this DataFrame to have. aligned_1 (pd.DataFrame): DataFrame corresponding to aligned odometry data given the 3 sets of odometry data for the 3 lidar sensors. This data corresponds to the odom_1 sensor frame. odom_2 (pd.DataFrame): DataFrame corresponding to odometry data for the pose we wish to transform the odom_1 frame of reference into. See data/main_odometry.csv for an example of the headers/columns/data types this function expects this DataFrame to have. aligned_2 (pd.DataFrame): DataFrame corresponding to aligned odometry data given the 3 sets of odometry data for the 3 lidar sensors. This data corresponds to the odom_2 sensor frame. type_1 (str): String denoting the lidar type. Should be in the set {'main', 'front', 'rear'}. This type corresponds to the data type for the odom_1 frame. type_2 (str): String denoting the lidar type. Should be in the set {'main', 'front', 'rear'}. This type corresponds to the data type for the odom_2 frame. K (int): The number of folds to be used for cross-validation. Defaults to 10. """ # Get ICP covariance matrices # Odom 1 lidar odometry odom1_icp, odom1_trans_cov, odom1_trans_cov_max, \ odom1_trans_cov_avg, odom1_rot_cov, odom1_rot_cov_max, \ odom1_rot_cov_avg, odom1_reject = parse_icp_cov(odom_1, type=type_1, reject_thr=REJECT_THR) # Odom 2 lidar odometry odom2_icp, odom2_trans_cov, odom2_trans_cov_max, \ odom2_trans_cov_avg, odom2_rot_cov, odom2_rot_cov_max, \ odom2_rot_cov_avg, odom2_reject = parse_icp_cov(odom_2, type=type_2, reject_thr=REJECT_THR) # Calculate relative poses (odom1_aligned, odom1_rel_poses) = relative_pose_processing.calc_rel_poses(aligned_1) (odom2_aligned, odom2_rel_poses) = relative_pose_processing.calc_rel_poses(aligned_2) # Compute weights for weighted estimate cov_t_odom1, cov_R_odom1 = compute_weights_euler(odom1_aligned) cov_t_odom2, cov_R_odom2 = compute_weights_euler(odom2_aligned) # Extract a single scalar using the average value from rotation and translation var_t_odom1 = extract_variance(cov_t_odom1, mode="max") var_R_odom1 = extract_variance(cov_R_odom1, mode="max") var_t_odom2 = extract_variance(cov_t_odom2, mode="max") var_R_odom2 = extract_variance(cov_R_odom2, mode="max") # Optimization (1) Instantiate a manifold translation_manifold = Euclidean(3) # Translation vector so3 = Rotations(3) # Rotation matrix manifold = Product((so3, translation_manifold)) # Instantiate manifold # Get initial guesses for our estimations if os.path.exists(PKL_POSES_PATH): # Check to make sure path exists transforms_dict = load_transforms( PKL_POSES_PATH) # Relative transforms # Map types to sensor names to access initial estimate relative transforms types2sensors = {"main": "velodyne", "front": "front", "rear": "rear"} # Now get initial guesses from the relative poses initial_guess_odom1_odom2 = transforms_dict["{}_{}".format( types2sensors[type_1], types2sensors[type_2])] # Print out all the initial estimates as poses print("INITIAL GUESS {} {}: \n {} \n".format(types2sensors[type_1], types2sensors[type_2], initial_guess_odom1_odom2)) # Get rotation matrices for initial guesses R0_odom1_odom2, t0_odom1_odom2 = initial_guess_odom1_odom2[:3, :3], \ initial_guess_odom1_odom2[:3, 3] X0_odom1_odom2 = (R0_odom1_odom2, t0_odom1_odom2) # Pymanopt estimate print("INITIAL GUESS {} {}: \n R0: \n {} \n\n t0: \n {} \n".format( types2sensors[type_1], types2sensors[type_2], R0_odom1_odom2, t0_odom1_odom2)) # Create KFold xval object to get training/validation indices kf = KFold(n_splits=K, random_state=None, shuffle=False) k = 0 # Set fold counter to 0 # Dataset A = np.array(odom2_rel_poses) # First set of poses B = np.array(odom1_rel_poses) # Second set of poses N = len(A) assert len(A) == len(B) # Sanity check to ensure odometry data matches r = np.logical_or(np.array(odom1_reject)[:N], np.array(odom2_reject)[:N]) # Outlier rejection print("NUMBER OF CROSS-VALIDATION FOLDS: {}".format(K)) # Iterate over 30 second intervals of the poses for train_index, test_index in kf.split( A): # Perform K-fold cross-validation # Path for results from manifold optimization analysis_results_path = os.path.join(ANALYSIS_RESULTS_PATH, "k={}".format(k)) final_estimates_path = os.path.join(FINAL_ESTIMATES_PATH, "k={}".format(k)) odometry_plots_path = os.path.join(ODOMETRY_PLOTS_PATH, "k={}".format(k)) # Make sure all paths exist - if they don't create them for path in [ analysis_results_path, final_estimates_path, odometry_plots_path ]: check_dir(path) # Get training data A_train = A[train_index] B_train = B[train_index] N_train = min(A_train.shape[0], B_train.shape[0]) r_train = r[train_index] print("FOLD NUMBER: {}, NUMBER OF TRAINING SAMPLES: {}".format( k, N_train)) omega = np.max([var_R_odom1, var_R_odom2 ]) # Take average across different odometries rho = np.max([var_t_odom1, var_t_odom2]) # Take average across different odometries cost_lambda = lambda x: cost(x, A_train, B_train, r_train, rho, omega, WEIGHTED) # Create cost function problem = Problem(manifold=manifold, cost=cost_lambda) # Create problem solver = CustomSteepestDescent() # Create custom solver X_opt = solver.solve(problem, x=X0_odom1_odom2) # Solve problem print("Initial Guess for Main-Front Transformation: \n {}".format( initial_guess_odom1_odom2)) print("Optimal solution between {} and {} " "reference frames: \n {}".format(types2sensors[type_1], types2sensors[type_2], X_opt)) # Take intermediate values for plotting estimates_x = solver.estimates errors = solver.errors iters = solver.iterations # Metrics dictionary estimates_dict = {i: T for i, T in zip(iters, estimates_x)} error_dict = {i: e for i, e in zip(iters, errors)} # Save intermediate results to a pkl file estimates_fname = os.path.join( analysis_results_path, "estimates_{}_{}.pkl".format(types2sensors[type_1], types2sensors[type_2], X_opt)) error_fname = os.path.join( analysis_results_path, "error_{}_{}.pkl".format(types2sensors[type_1], types2sensors[type_2], X_opt)) # Save estimates to pickle file with open(estimates_fname, "wb") as pkl_estimates: pickle.dump(estimates_dict, pkl_estimates) pkl_estimates.close() # Save error to pickle file with open(error_fname, "wb") as pkl_error: pickle.dump(error_dict, pkl_error) pkl_error.close() # Calculate difference between initial guess and final X_opt_T = construct_pose(X_opt[0], X_opt[1].reshape((3, 1))) print("DIFFERENCE IN MATRICES: \n {}".format( np.subtract(X_opt_T, initial_guess_odom1_odom2))) # Compute the weighted RMSE (training/in-sample) train_rmse_init_weighted, train_rmse_final_weighted, train_rmse_init_R_weighted, \ train_rmse_init_t_weighted, train_rmse_final_R_weighted, \ train_rmse_final_t_weighted = compute_rmse_weighted( initial_guess_odom1_odom2, X_opt_T, A_train, B_train, rho, omega) # Compute the unweighted RMSE (training/in-sample) train_rmse_init_unweighted, train_rmse_final_unweighted, train_rmse_init_R_unweighted, \ train_rmse_init_t_unweighted, train_rmse_final_R_unweighted, \ train_rmse_final_t_unweighted = compute_rmse_unweighted( initial_guess_odom1_odom2, X_opt_T, A_train, B_train) # Concatenate all RMSE values for training/in-sample train_rmses = [ train_rmse_init_unweighted, train_rmse_final_unweighted, train_rmse_init_weighted, train_rmse_final_weighted, train_rmse_init_R_unweighted, train_rmse_init_t_unweighted, train_rmse_final_R_unweighted, train_rmse_final_t_unweighted, train_rmse_init_R_weighted, train_rmse_init_t_weighted, train_rmse_final_R_weighted, train_rmse_final_t_weighted ] # Display and save RMSEs outpath = os.path.join( analysis_results_path, "train_rmse_{}_{}.txt".format(types2sensors[type_1], types2sensors[type_2])) display_and_save_rmse(train_rmses, outpath) # Get test data A_test = A[test_index] B_test = B[test_index] N_test = min(A_test.shape[0], B_test.shape[0]) print("NUMBER OF TEST SAMPLES: {}".format(N_test)) # Compute the weighted RMSE (testing/out-of-sample) test_rmse_init_weighted, test_rmse_final_weighted, test_rmse_init_R_weighted, \ test_rmse_init_t_weighted, test_rmse_final_R_weighted, \ test_rmse_final_t_weighted = compute_rmse_weighted(initial_guess_odom1_odom2, X_opt_T, A_test, B_test, rho, omega) # Compute the unweighted RMSE (testing/out-of-sample) test_rmse_init_unweighted, test_rmse_final_unweighted, test_rmse_init_R_unweighted, \ test_rmse_init_t_unweighted, test_rmse_final_R_unweighted, \ test_rmse_final_t_unweighted = compute_rmse_unweighted(initial_guess_odom1_odom2, X_opt_T, A_test, B_test) # Concatenate all RMSE values for testing/out-of-sample test_rmses = [ test_rmse_init_unweighted, test_rmse_final_unweighted, test_rmse_init_weighted, test_rmse_final_weighted, test_rmse_init_R_unweighted, test_rmse_init_t_unweighted, test_rmse_final_R_unweighted, test_rmse_final_t_unweighted, test_rmse_init_R_weighted, test_rmse_init_t_weighted, test_rmse_final_R_weighted, test_rmse_final_t_weighted ] # Display and save RMSEs outpath = os.path.join( analysis_results_path, "test_rmse_{}_{}.txt".format(types2sensors[type_1], types2sensors[type_2])) display_and_save_rmse(test_rmses, outpath) # Save final estimates final_estimate_outpath = os.path.join( final_estimates_path, "{}_{}.txt".format(types2sensors[type_1], types2sensors[type_2])) np.savetxt(final_estimate_outpath, X_opt_T) # Finally, increment k k += 1
def main(): """Main function to run nonlinear manifold optimization on SE(3) to estimate an optimal relative pose transformation between coordinate frames given by the different lidar sensors.""" # Extract and process the CSVs main_odometry = relative_pose_processing.process_df(MAIN_ODOM_CSV) front_odometry = relative_pose_processing.process_df(FRONT_ODOM_CSV) rear_odometry = relative_pose_processing.process_df(REAR_ODOM_CSV) # Process poses (main_aligned, front_aligned, rear_aligned) = relative_pose_processing.align_df( [main_odometry, front_odometry, rear_odometry]) # Get ICP covariance matrices # Main lidar odometry main_icp, main_trans_cov, main_trans_cov_max, \ main_trans_cov_avg, main_rot_cov, main_rot_cov_max, \ main_rot_cov_avg, main_reject = parse_icp_cov(main_odometry, type="main", reject_thr=REJECT_THR) # Front lidar odometry front_icp, front_trans_cov, front_trans_cov_max, \ front_trans_cov_avg, front_rot_cov, front_rot_cov_max, \ front_rot_cov_avg, front_reject = parse_icp_cov(front_odometry, type="front", reject_thr=REJECT_THR) # Rear lidar odometry rear_icp, rear_trans_cov, rear_trans_cov_max, \ rear_trans_cov_avg, rear_rot_cov, rear_rot_cov_max, \ rear_rot_cov_avg, rear_reject = parse_icp_cov(rear_odometry, type="rear", reject_thr=REJECT_THR) # Calculate relative poses (main_aligned, main_rel_poses) = relative_pose_processing.calc_rel_poses(main_aligned) (front_aligned, front_rel_poses) = relative_pose_processing.calc_rel_poses(front_aligned) (rear_aligned, rear_rel_poses) = relative_pose_processing.calc_rel_poses(rear_aligned) cov_t_main, cov_R_main = compute_weights_euler(main_aligned) cov_t_front, cov_R_front = compute_weights_euler(front_aligned) cov_t_rear, cov_R_rear = compute_weights_euler(rear_aligned) # Extract a single scalar using the average value from rotation and translation var_t_main = extract_variance(cov_t_main, mode="max") var_R_main = extract_variance(cov_R_main, mode="max") var_t_front = extract_variance(cov_t_front, mode="max") var_R_front = extract_variance(cov_R_front, mode="max") var_t_rear = extract_variance(cov_t_main, mode="max") var_R_rear = extract_variance(cov_R_rear, mode="max") # Optimization (1) Instantiate a manifold translation_manifold = Euclidean(3) # Translation vector so3 = Rotations(3) # Rotation matrix manifold = Product((so3, translation_manifold)) # Instantiate manifold # Get initial guesses for our estimations initial_poses = {} if os.path.exists(PKL_POSES_PATH): # Check to make sure path exists transforms_dict = load_transforms( PKL_POSES_PATH) # Loads relative transforms # Now get initial guesses from the relative poses initial_guess_main_front = transforms_dict[ "velodyne_front"] # Get relative transform from main to front (T^{V}_{F}) initial_guess_main_rear = transforms_dict[ "velodyne_rear"] # Get relative transform from front to main T^{V}_{B}) initial_guess_front_rear = np.linalg.inv( initial_guess_main_front ) @ initial_guess_main_rear # Get relative transform from front to rear T^{B}_{W}) direct_initial_guess_front_rear = transforms_dict[ "direct_front_rear"] # Transform directly computed # Print out all the initial estimates as poses print( "INITIAL GUESS MAIN FRONT: \n {} \n".format(initial_guess_main_front)) print("INITIAL GUESS MAIN REAR: \n {} \n".format(initial_guess_main_rear)) print( "INITIAL GUESS FRONT REAR: \n {} \n".format(initial_guess_front_rear)) print("INITIAL GUESS DIRECT FRONT REAR: \n {} \n".format( direct_initial_guess_front_rear)) # Get rotation matrices for initial guesses R0_main_front, t0_main_front = initial_guess_main_front[:3, : 3], initial_guess_main_front[: 3, 3] X0_main_front = (R0_main_front, t0_main_front) print("INITIAL GUESS MAIN FRONT: \n R0: \n {} \n\n t0: \n {} \n".format( R0_main_front, t0_main_front)) R0_main_rear, t0_main_rear = initial_guess_main_rear[:3, : 3], initial_guess_main_rear[: 3, 3] X0_main_rear = (R0_main_rear, t0_main_rear) print("INITIAL GUESS MAIN REAR: \n R0: \n {} \n\n t0: \n {} \n".format( R0_main_rear, t0_main_rear)) R0_front_rear, t0_front_rear = initial_guess_front_rear[:3, : 3], initial_guess_front_rear[: 3, 3] X0_front_rear = (R0_front_rear, t0_front_rear) print("INITIAL GUESS FRONT REAR: \n R0: \n {} \n\n t0: \n {} \n".format( R0_front_rear, t0_front_rear)) ######################## MAIN FRONT CALIBRATION ################################ # Carry out optimization for main-front homogeneous transformations ### PARAMETERS ### A = np.array(front_rel_poses) # First set of poses B = np.array(main_rel_poses) # Second set of poses N = min(A.shape[0], B.shape[0]) r = np.logical_or(np.array(main_reject[:N]), np.array( front_reject[:N])) # If either has high variance, reject the sample omega = np.max([var_R_main, var_R_front]) # Take average across different odometries rho = np.max([var_t_main, var_t_front]) # Take average across different odometries ### PARAMETERS ### cost_main_front = lambda x: cost(x, A, B, r, rho, omega, WEIGHTED) problem_main_front = Problem( manifold=manifold, cost=cost_main_front ) # (2a) Compute the optimization between main and front solver_main_front = CustomSteepestDescent( ) # (3) Instantiate a Pymanopt solver Xopt_main_front = solver_main_front.solve(problem_main_front, x=X0_main_front) print("Initial Guess for Main-Front Transformation: \n {}".format( initial_guess_main_front)) print("Optimal solution between main and front reference frames: \n {}". format(Xopt_main_front)) # Take intermediate values for plotting estimates_x_main_front = solver_main_front.estimates errors_main_front = solver_main_front.errors iters_main_front = solver_main_front.iterations # Metrics dictionary estimates_dict_main_front = { i: T for i, T in zip(iters_main_front, estimates_x_main_front) } error_dict_main_front = { i: e for i, e in zip(iters_main_front, errors_main_front) } # Save intermediate results to a pkl file estimates_fname_main_front = os.path.join(ANALYSIS_RESULTS_PATH, "estimates_main_front.pkl") error_fname_main_front = os.path.join(ANALYSIS_RESULTS_PATH, "error_main_front.pkl") # Save estimates to pickle file with open(estimates_fname_main_front, "wb") as pkl_estimates: pickle.dump(estimates_dict_main_front, pkl_estimates) pkl_estimates.close() # Save error to pickle file with open(error_fname_main_front, "wb") as pkl_error: pickle.dump(error_dict_main_front, pkl_error) pkl_error.close() # Calculate difference between initial guess and final XOpt_T_main_front = construct_pose(Xopt_main_front[0], Xopt_main_front[1].reshape((3, 1))) print("DIFFERENCE IN MATRICES: \n {}".format( np.subtract(XOpt_T_main_front, initial_guess_main_front))) # Compute the weighted and unweighted RMSE rmse_init_weighted, rmse_final_weighted, rmse_init_R_weighted, \ rmse_init_t_weighted, rmse_final_R_weighted, \ rmse_final_t_weighted = compute_rmse_weighted(initial_guess_main_front, XOpt_T_main_front, A, B, rho, omega) rmse_init_unweighted, rmse_final_unweighted, rmse_init_R_unweighted, \ rmse_init_t_unweighted, rmse_final_R_unweighted, \ rmse_final_t_unweighted = compute_rmse_unweighted(initial_guess_main_front, XOpt_T_main_front, A, B) rmses = [ rmse_init_unweighted, rmse_final_unweighted, rmse_init_weighted, rmse_final_weighted, rmse_init_R_unweighted, rmse_init_t_unweighted, rmse_final_R_unweighted, rmse_final_t_unweighted, rmse_init_R_weighted, rmse_init_t_weighted, rmse_final_R_weighted, rmse_final_t_weighted ] # Display and save RMSEs outpath = os.path.join(ANALYSIS_RESULTS_PATH, "main_front_rmse.txt") display_and_save_rmse(rmses, outpath) # Save final estimates final_estimate_outpath = os.path.join(FINAL_ESTIMATES_PATH, "main_front_final.txt") np.savetxt(final_estimate_outpath, XOpt_T_main_front) ################################################################################ ######################## MAIN REAR CALIBRATION ################################# ### PARAMETERS ### A = np.array(rear_rel_poses) # First set of poses B = np.array(main_rel_poses) # Second set of poses N = min(A.shape[0], B.shape[0]) r = np.logical_or(np.array(main_reject[:N]), np.array( rear_reject[:N])) # If either has high variance, reject the sample omega = np.max([var_R_main, var_R_rear]) # Take average across different odometries rho = np.max([var_t_main, var_t_rear]) # Take average across different odometries ### PARAMETERS ### cost_main_rear = lambda x: cost(x, A, B, r, rho, omega, WEIGHTED) # Carry out optimization for main-rear homogeneous transformations problem_main_rear = Problem( manifold=manifold, cost=cost_main_rear ) # (2a) Compute the optimization between main and front solver_main_rear = CustomSteepestDescent( ) # (3) Instantiate a Pymanopt solver Xopt_main_rear = solver_main_rear.solve(problem_main_rear, x=X0_main_rear) print("Initial Guess for Main-Rear Transformation: \n {}".format( initial_guess_main_rear)) print("Optimal solution between main and rear reference frames: \n {}". format(Xopt_main_rear)) # Take intermediate values for plotting estimates_x_main_rear = solver_main_rear.estimates errors_main_rear = solver_main_rear.errors iters_main_rear = solver_main_rear.iterations # Metrics dictionary estimates_dict_main_rear = { i: T for i, T in zip(iters_main_rear, estimates_x_main_rear) } error_dict_main_rear = { i: e for i, e in zip(iters_main_rear, errors_main_rear) } # Save intermediate results to a pkl file estimates_fname_main_rear = os.path.join(ANALYSIS_RESULTS_PATH, "estimates_main_rear.pkl") error_fname_main_rear = os.path.join(ANALYSIS_RESULTS_PATH, "error_main_rear.pkl") # Save estimates to pickle file with open(estimates_fname_main_rear, "wb") as pkl_estimates: pickle.dump(estimates_dict_main_rear, pkl_estimates) pkl_estimates.close() # Save error to pickle file with open(error_fname_main_rear, "wb") as pkl_error: pickle.dump(error_dict_main_rear, pkl_error) pkl_error.close() # Calculate difference between initial guess and final XOpt_T_main_rear = construct_pose(Xopt_main_rear[0], Xopt_main_rear[1].reshape((3, 1))) print("DIFFERENCE IN MATRICES: \n {}".format( np.subtract(XOpt_T_main_rear, initial_guess_main_rear))) # Compute the weighted and unweighted RMSE rmse_init_weighted, rmse_final_weighted, rmse_init_R_weighted, \ rmse_init_t_weighted, rmse_final_R_weighted, \ rmse_final_t_weighted = compute_rmse_weighted(initial_guess_main_rear, XOpt_T_main_rear, A, B, rho, omega) rmse_init_unweighted, rmse_final_unweighted, rmse_init_R_unweighted, \ rmse_init_t_unweighted, rmse_final_R_unweighted, \ rmse_final_t_unweighted = compute_rmse_unweighted(initial_guess_main_rear, XOpt_T_main_rear, A, B) rmses = [ rmse_init_unweighted, rmse_final_unweighted, rmse_init_weighted, rmse_final_weighted, rmse_init_R_unweighted, rmse_init_t_unweighted, rmse_final_R_unweighted, rmse_final_t_unweighted, rmse_init_R_weighted, rmse_init_t_weighted, rmse_final_R_weighted, rmse_final_t_weighted ] # Display and save RMSEs outpath = os.path.join(ANALYSIS_RESULTS_PATH, "main_rear_rmse.txt") display_and_save_rmse(rmses, outpath) # Save final estimates final_estimate_outpath = os.path.join(FINAL_ESTIMATES_PATH, "main_rear_final.txt") np.savetxt(final_estimate_outpath, XOpt_T_main_rear) ################################################################################ ######################## FRONT REAR CALIBRATION ################################ ### PARAMETERS ### A = np.array(rear_rel_poses) # First set of poses B = np.array(front_rel_poses) # Second set of poses N = min(A.shape[0], B.shape[0]) r = np.logical_or(np.array(front_reject[:N]), np.array( rear_reject[:N])) # If either has high variance, reject the sample omega = np.max([var_R_front, var_R_rear]) # Take average across different odometries rho = np.max([var_t_front, var_t_rear]) # Take average across different odometries ### PARAMETERS ### cost_front_rear = lambda x: cost(x, A, B, r, rho, omega, WEIGHTED) # Carry out optimization for front-rear homogeneous transformations problem_front_rear = Problem( manifold=manifold, cost=cost_front_rear ) # (2a) Compute the optimization between main and front solver_front_rear = CustomSteepestDescent( ) # (3) Instantiate a Pymanopt solver Xopt_front_rear = solver_front_rear.solve(problem_front_rear, x=X0_front_rear) print("Initial Guess for Front-Rear Transformation: \n {}".format( initial_guess_front_rear)) print("Optimal solution between front and rear reference frames: \n {}". format(Xopt_front_rear)) # Take intermediate values for plotting estimates_x_front_rear = solver_front_rear.estimates errors_front_rear = solver_front_rear.errors iters_front_rear = solver_front_rear.iterations # Metrics dictionary estimates_dict_front_rear = { i: T for i, T in zip(iters_front_rear, estimates_x_front_rear) } error_dict_front_rear = { i: e for i, e in zip(iters_front_rear, errors_front_rear) } # Save intermediate results to a pkl file estimates_fname_front_rear = os.path.join(ANALYSIS_RESULTS_PATH, "estimates_front_rear.pkl") error_fname_front_rear = os.path.join(ANALYSIS_RESULTS_PATH, "error_front_rear.pkl") # Save estimates to pickle file with open(estimates_fname_front_rear, "wb") as pkl_estimates: pickle.dump(estimates_dict_front_rear, pkl_estimates) pkl_estimates.close() # Save error to pickle file with open(error_fname_front_rear, "wb") as pkl_error: pickle.dump(error_dict_front_rear, pkl_error) pkl_error.close() # Calculate difference between initial guess and final XOpt_T_front_rear = construct_pose(Xopt_front_rear[0], Xopt_front_rear[1].reshape((3, 1))) print("DIFFERENCE IN MATRICES: \n {}".format( np.subtract(XOpt_T_front_rear, initial_guess_front_rear))) # Compute the weighted and unweighted RMSE rmse_init_weighted, rmse_final_weighted, rmse_init_R_weighted, \ rmse_init_t_weighted, rmse_final_R_weighted, \ rmse_final_t_weighted = compute_rmse_weighted(initial_guess_front_rear, XOpt_T_front_rear, A, B, rho, omega) rmse_init_unweighted, rmse_final_unweighted, rmse_init_R_unweighted, \ rmse_init_t_unweighted, rmse_final_R_unweighted, \ rmse_final_t_unweighted = compute_rmse_unweighted(initial_guess_front_rear, XOpt_T_front_rear, A, B) rmses = [ rmse_init_unweighted, rmse_final_unweighted, rmse_init_weighted, rmse_final_weighted, rmse_init_R_unweighted, rmse_init_t_unweighted, rmse_final_R_unweighted, rmse_final_t_unweighted, rmse_init_R_weighted, rmse_init_t_weighted, rmse_final_R_weighted, rmse_final_t_weighted ] # Display and save RMSEs outpath = os.path.join(ANALYSIS_RESULTS_PATH, "front_rear_rmse.txt") display_and_save_rmse(rmses, outpath) # Save final estimates final_estimate_outpath = os.path.join(FINAL_ESTIMATES_PATH, "front_rear_final.txt") np.savetxt(final_estimate_outpath, XOpt_T_front_rear) ################################################################################ # Display all results print("_________________________________________________________") print("_____________________ALL RESULTS_________________________") print("_________________________________________________________") print("Initial Guess for Main-Front Transformation: \n {}".format( initial_guess_main_front)) print("Optimal solution between main and front reference frames: \n {}". format(Xopt_main_front)) print("_________________________________________________________") print("Initial Guess for Main-Rear Transformation: \n {}".format( initial_guess_main_rear)) print("Optimal solution between main and rear reference frames: \n {}". format(Xopt_main_rear)) print("_________________________________________________________") print("Initial Guess for Front-Rear Transformation: \n {}".format( initial_guess_front_rear)) print("Optimal solution between front and rear reference frames: \n {}". format(Xopt_front_rear)) print("_________________________________________________________")
def compute_dsmi(fval): fpc = fval[:,ipc].reshape(topo_shape) smi = fpc[-1,:]/np.max(fpc,0) dsmi = smi[1] - smi[5] return dsmi
def polyinterp(points, doPlot=None, xminBound=None, xmaxBound=None): """ polynomial interpolation Parameters ---------- points: shape(pointNum, 3), three columns represents x, f, g doPolot: set to 1 to plot, default 0 xmin: min value that brackets minimum (default: min of points) xmax: max value that brackets maximum (default: max of points) set f or g to sqrt(-1)=1j if they are not known the order of the polynomial is the number of known f and g values minus 1 Returns ------- minPos: fmin: """ if doPlot == None: doPlot = 0 nPoints = points.shape[0] order = np.sum(np.imag(points[:, 1:3]) == 0) -1 # code for most common case: cubic interpolation of 2 points if nPoints == 2 and order == 3 and doPlot == 0: [minVal, minPos] = [np.min(points[:,0]), np.argmin(points[:,0])] notMinPos = 1 - minPos d1 = points[minPos,2] + points[notMinPos,2] - 3*(points[minPos,1]-\ points[notMinPos,1])/(points[minPos,0]-points[notMinPos,0]) t_d2 = d1**2 - points[minPos,2]*points[notMinPos,2] if t_d2 > 0: d2 = np.sqrt(t_d2) else: d2 = np.sqrt(-t_d2) * np.complex(0,1) if np.isreal(d2): t = points[notMinPos,0] - (points[notMinPos,0]-points[minPos,0])*\ ((points[notMinPos,2]+d2-d1)/(points[notMinPos,2]-\ points[minPos,2]+2*d2)) minPos = np.min([np.max([t,points[minPos,0]]), points[notMinPos,0]]) else: minPos = np.mean(points[:,0]) fmin = minVal return (minPos, fmin) xmin = np.min(points[:,0]) xmax = np.max(points[:,0]) # compute bounds of interpolation area if xminBound == None: xminBound = xmin if xmaxBound == None: xmaxBound = xmax # constraints based on available function values A = np.zeros((0, order+1)) b = np.zeros((0, 1)) for i in range(nPoints): if np.imag(points[i,1]) == 0: constraint = np.zeros(order+1) for j in np.arange(order,-1,-1): constraint[order-j] = points[i,0]**j A = np.vstack((A, constraint)) b = np.append(b, points[i,1]) # constraints based on availabe derivatives for i in range(nPoints): if np.isreal(points[i,2]): constraint = np.zeros(order+1) for j in range(1,order+1): constraint[j-1] = (order-j+1)* points[i,0]**(order-j) A = np.vstack((A, constraint)) b = np.append(b,points[i,2]) # find interpolating polynomial params = np.linalg.solve(A, b) # compute critical points dParams = np.zeros(order) for i in range(params.size-1): dParams[i] = params[i] * (order-i) if np.any(np.isinf(dParams)): cp = np.concatenate((np.array([xminBound, xmaxBound]), points[:,0])) else: cp = np.concatenate((np.array([xminBound, xmaxBound]), points[:,0], \ np.roots(dParams))) # test critical points fmin = np.infty; minPos = (xminBound + xmaxBound)/2. for xCP in cp: if np.imag(xCP) == 0 and xCP >= xminBound and xCP <= xmaxBound: fCP = np.polyval(params, xCP) if np.imag(fCP) == 0 and fCP < fmin: minPos = np.double(np.real(xCP)) fmin = np.double(np.real(fCP)) # plot situation (omit this part for now since we are not going to use it # anyway) return (minPos, fmin)
def logsumexp(x): """Numerically stable log(sum(exp(x))), also defined in scipy.special""" max_x = np.max(x) return max_x + np.log(np.sum(np.exp(x - max_x)))
def defaultmax(x, default=-np.inf): if x.size == 0: return default return np.max(x)
def optimize(self, w_theta, Waa, Wsa, wa, varphis, Kt, prec, is_valid_eta_omega, old_entropy, eta=None): # wa = w_beta * \grad_beta \varphi_beta(s) * K^T * Prec if False: f_dual = self.opt_info['f_dual'] f_dual_grad = self.opt_info['f_dual_grad'] # Set BFGS eval function def eval_dual(input): param_eta = input[0] param_omega = input[1] val = f_dual(*([varphis, Kt, prec, Waa, Wsa, wa] + [param_eta, param_omega, old_entropy])) return val.astype(np.float64) # Set BFGS gradient eval function def eval_dual_grad(input): param_eta = input[0] param_omega = input[1] grad = f_dual_grad(*([varphis, Kt, prec, Waa, Wsa, wa] + [param_eta, param_omega, old_entropy])) return np.asarray(grad) if eta is not None: param_eta = eta else: param_eta = self.param_eta if self.beta == 0: beta = 0 else: beta = old_entropy - self.beta # eta_before = param_eta # omega_before = self.param_omega # dual_before = eval_dual([eta_before, omega_before]) # dual_grad_before = eval_dual_grad([eta_before, omega_before]) x0 = [param_eta, self.param_omega] # TEST # small = 0.000000001 # f1 = [self.param_eta - small, self.param_omega] # f2 = [self.param_eta + small, self.param_omega] # fd = (eval_dual(f1) - eval_dual(f2)) / (2 * small) # # duals = self.opt_info["f_duals"](*([varphis, Kt, prec, Waa, Wsa, wa] + [eta_before, omega_before, old_entropy])) # logger.log("Theano eta/omega: " + str(eta_before) + "/" + str(omega_before) + ": " + str(dual_before) + # ", " + str(duals) + ", grad: " + str(eval_dual_grad(x0)) + ", fd: " + str(fd)) # # END TEST # Create dual function def eval_dual(input): param_eta = input[0] param_omega = input[1] # ha(s): eta * (\varphi(s)^T * K^T * \Sigma^{-1} + W_{sa}) + wa(s)) ha = np.dot(varphis, param_eta * np.dot(Kt, prec) + Wsa) + wa # hss(s): eta * (\varphi(s)^T * K^T * \Sigma^{-1} * K * \varphi(s)) varphisKt = np.dot(varphis, Kt) hss = param_eta * np.sum(np.dot(varphisKt, prec) * varphisKt, axis=1) Haa = param_eta * prec + Waa # Haa = 0.5 * (Haa + np.transpose(Haa)) HaaInv = np.linalg.inv(Haa) # The two terms 'term1' and 'term2' which come from normalizers of the # 1. Original policy distribution # 2. The distribution after completing the square sigma = np.linalg.inv(prec) term1 = -0.5 * param_eta * np.linalg.slogdet(2 * np.pi * sigma)[1] if self.beta == 0: term2 = 0.5 * param_eta * np.linalg.slogdet(2 * np.pi * param_eta * HaaInv)[1] else: term2 = 0.5 * (param_eta + param_omega) * np.linalg.slogdet( 2 * np.pi * (param_eta + param_omega) * HaaInv)[1] dual = param_eta * self.epsilon - param_omega * beta + \ term1 + term2 + np.mean( 0.5 * (np.sum(np.dot(ha, HaaInv) * ha, axis=1) - hss)) return dual # Automatic gradient of the dual eval_dual_grad = grad(eval_dual) if True: def fx(x): eta, omega = x # eta: Lagrange variable of KL constraint, omega: of the entropy constraint error_return_val = 1e6, np.array([0., 0.]) if eta + omega < 0: return error_return_val if not is_valid_eta_omega(eta, omega, w_theta): return error_return_val return eval_dual(x), eval_dual_grad(x) else: def fx(x): eta, omega = x # eta: Lagrange variable of KL constraint, omega: of the entropy constraint error_return_val = 1e6, np.array([0., 0.]) if eta + omega < 0: return error_return_val if not is_valid_eta_omega(eta, omega, w_theta): return error_return_val return eval_dual(x), eval_dual_grad(x) # L-BFGS-B expects double floats # return np.float64(eval_dual(x)), np.float64(eval_dual_grad(x)) # L-BFGS-B expects double floats logger.log('optimizing dual') # Make sure valid initial covariance matrices while (not is_valid_eta_omega(x0[0], x0[1], w_theta)): x0[0] *= 2 logger.log("Eta increased: " + str(x0[0])) if eta is None: omega_lower = -100 if False: res = scipy.optimize.minimize(fx, x0, method='L-BFGS-B', jac=True, bounds=((1e-12, None), (omega_lower, None)), options={'ftol': 1e-12}) else: res = scipy.optimize.minimize(fx, x0, method='SLSQP', jac=True, bounds=((1e-12, None), (omega_lower, None)), options={'ftol': 1e-12}) # Make sure that eta > omega if res.x[1] < 0 and -res.x[1] > res.x[0]: res.x[1] = -res.x[0] + 1e-6 else: # Fixed eta: make sure that eta > omega omega_lower = np.max([-(eta - 1e-3) + 1e-6, -100]) if False: res = scipy.optimize.minimize(fx, x0, method='L-BFGS-B', jac=True, bounds=((eta - 1e-3, eta + 1e-3), (omega_lower, None)), options={'ftol': 1e-16}) else: res = scipy.optimize.minimize(fx, x0, method='SLSQP', jac=True, bounds=((eta - 1e-3, eta + 1e-3), (omega_lower, None)), options={'ftol': 1e-16}) if self.beta == 0: res.x[1] = 0 logger.log("dual optimized, eta: " + str(res.x[0]) + ", omega: " + str(res.x[1])) return res.x[0], res.x[1]
def logsumexp(X, axis=1): max_X = np.max(X) return max_X + np.log(np.sum(np.exp(X - max_X), axis=axis, keepdims=True))
def show_encode_decode(x,wrapper,**kwargs): # strip instruments off autoencoder wrapper cost_history = wrapper.cost_history weight_history = wrapper.weight_history encoder = wrapper.encoder decoder = wrapper.decoder normalizer = wrapper.normalizer inverse_normalizer = wrapper.inverse_normalizer # show projection map or not projmap = False if 'projmap' in kwargs: projmap = kwargs['projmap'] # for projection map drawing - arrow size scale = 14 if 'scale' in kwargs: scale = kwargs['scale'] # pluck out best weights from run ind = np.argmin(cost_history) w_best = weight_history[ind] ###### figure 1 - original data, encoded data, decoded data ###### fig = plt.figure(figsize = (10,3)) gs = gridspec.GridSpec(1, 3) ax1 = plt.subplot(gs[0],aspect = 'equal'); ax2 = plt.subplot(gs[1],aspect = 'equal'); ax3 = plt.subplot(gs[2],aspect = 'equal'); # scatter original data with pc ax1.scatter(x[0,:],x[1,:],c = 'k',s = 60,linewidth = 0.75,edgecolor = 'w') ### plot encoded and decoded data ### # create encoded vectors v = encoder(normalizer(x),w_best[0]) # decode onto basis p = inverse_normalizer(decoder(v,w_best[1])) # plot decoded data ax3.scatter(p[0,:],p[1,:],c = 'k',s = 60,linewidth = 0.75,edgecolor = 'r') # define range for manifold xmin1 = np.min(x[0,:]) xmax1 = np.max(x[0,:]) xmin2 = np.min(x[1,:]) xmax2 = np.max(x[1,:]) xgap1 = (xmax1 - xmin1)*0.2 xgap2 = (xmax2 - xmin2)*0.2 xmin1 -= xgap1 xmax1 += xgap1 xmin2 -= xgap2 xmax2 += xgap2 # plot learned manifold a = np.linspace(xmin1,xmax1,200) b = np.linspace(xmin2,xmax2,200) s,t = np.meshgrid(a,b) s.shape = (1,len(a)**2) t.shape = (1,len(b)**2) z = np.vstack((s,t)) # create encoded vectors v = encoder(normalizer(z),w_best[0]) # decode onto basis p = inverse_normalizer(decoder(v,w_best[1])) # scatter ax2.scatter(p[0,:],p[1,:],c = 'k',s = 1.5,edgecolor = 'r',linewidth = 1,zorder = 0) ax3.scatter(p[0,:],p[1,:],c = 'k',s = 1.5,edgecolor = 'r',linewidth = 1,zorder = 0) for ax in [ax1,ax2,ax3]: ax.set_xlim([xmin1,xmax1]) ax.set_ylim([xmin2,xmax2]) ax.set_xlabel(r'$x_1$',fontsize = 16) ax.set_ylabel(r'$x_2$',fontsize = 16,rotation = 0,labelpad = 10) ax.axvline(linewidth=0.5, color='k',zorder = 0) ax.axhline(linewidth=0.5, color='k',zorder = 0) ax1.set_title('original data',fontsize = 18) ax2.set_title('learned manifold',fontsize = 18) ax3.set_title('decoded data',fontsize = 18) # set whitespace #fgs.update(wspace=0.01, hspace=0.5) # set the spacing between axes. ##### bottom panels - plot subspace and quiver plot of projections #### if projmap == True: fig = plt.figure(figsize = (10,4)) gs = gridspec.GridSpec(1, 1) ax1 = plt.subplot(gs[0],aspect = 'equal'); ax1.scatter(p[0,:],p[1,:],c = 'r',s = 9.5) ax1.scatter(p[0,:],p[1,:],c = 'k',s = 1.5) ### create quiver plot of how data is projected ### new_scale = 0.75 a = np.linspace(xmin1 - xgap1*new_scale,xmax1 + xgap1*new_scale,20) b = np.linspace(xmin2 - xgap2*new_scale,xmax2 + xgap2*new_scale,20) s,t = np.meshgrid(a,b) s.shape = (1,len(a)**2) t.shape = (1,len(b)**2) z = np.vstack((s,t)) v = 0 p = 0 # create encoded vectors v = encoder(normalizer(z),w_best[0]) # decode onto basis p = inverse_normalizer(decoder(v,w_best[1])) # get directions d = [] for i in range(p.shape[1]): dr = (p[:,i] - z[:,i])[:,np.newaxis] d.append(dr) d = 2*np.array(d) d = d[:,:,0].T M = np.hypot(d[0,:], d[1,:]) ax1.quiver(z[0,:], z[1,:], d[0,:], d[1,:],M,alpha = 0.5,width = 0.01,scale = scale,cmap='autumn') ax1.quiver(z[0,:], z[1,:], d[0,:], d[1,:],edgecolor = 'k',linewidth = 0.25,facecolor = 'None',width = 0.01,scale = scale) #### clean up and label panels #### for ax in [ax1]: #ax.set_xlim([xmin1 - xgap1*new_scale,xmax1 + xgap1*new_scale]) #ax.set_ylim([xmin2 - xgap2*new_scale,xmax2 + xgap1*new_scale]) ax.set_xlim([xmin1,xmax1]) ax.set_ylim([xmin2,xmax2]) ax.set_xlabel(r'$x_1$',fontsize = 16) ax.set_ylabel(r'$x_2$',fontsize = 16,rotation = 0,labelpad = 10) ax1.set_title('projection map',fontsize = 18) # set whitespace gs.update(wspace=0.01, hspace=0.5) # set the spacing between axes.
def pool_function(self,tensor_window): t = np.max(tensor_window,axis = (1,2)) return t
def single_input_plot(self,g,weight_histories,cost_histories,**kwargs): # adjust viewing range wmin = -3.1 wmax = 3.1 if 'wmin' in kwargs: wmin = kwargs['wmin'] if 'wmax' in kwargs: wmax = kwargs['wmax'] onerun_perplot = False if 'onerun_perplot' in kwargs: onerun_perplot = kwargs['onerun_perplot'] ### initialize figure fig = plt.figure(figsize = (9,4)) artist = fig # remove whitespace from figure #fig.subplots_adjust(left=0, right=1, bottom=0, top=1) # remove whitespace #fig.subplots_adjust(wspace=0.01,hspace=0.01) # create subplot with 2 panels, plot input function in center plot gs = gridspec.GridSpec(1, 2, width_ratios=[1,1]) ax1 = plt.subplot(gs[0]); ax2 = plt.subplot(gs[1]); ### plot function in both panels w_plot = np.linspace(wmin,wmax,500) g_plot = g(w_plot) gmin = np.min(g_plot) gmax = np.max(g_plot) g_range = gmax - gmin ggap = g_range*0.1 gmin -= ggap gmax += ggap # plot function, axes lines ax1.plot(w_plot,g_plot,color = 'k',zorder = 2) # plot function ax1.axhline(y=0, color='k',zorder = 1,linewidth = 0.25) ax1.axvline(x=0, color='k',zorder = 1,linewidth = 0.25) ax1.set_xlabel(r'$w$',fontsize = 13) ax1.set_ylabel(r'$g(w)$',fontsize = 13,rotation = 0,labelpad = 25) ax1.set_xlim(wmin,wmax) ax1.set_ylim(gmin,gmax) ax2.plot(w_plot,g_plot,color = 'k',zorder = 2) # plot function ax2.axhline(y=0, color='k',zorder = 1,linewidth = 0.25) ax2.axvline(x=0, color='k',zorder = 1,linewidth = 0.25) ax2.set_xlabel(r'$w$',fontsize = 13) ax2.set_ylabel(r'$g(w)$',fontsize = 13,rotation = 0,labelpad = 25) ax2.set_xlim(wmin,wmax) ax2.set_ylim(gmin,gmax) #### loop over histories and plot each for j in range(len(weight_histories)): w_hist = weight_histories[j] c_hist = cost_histories[j] # colors for points --> green as the algorithm begins, yellow as it converges, red at final point s = np.linspace(0,1,len(w_hist[:round(len(w_hist)/2)])) s.shape = (len(s),1) t = np.ones(len(w_hist[round(len(w_hist)/2):])) t.shape = (len(t),1) s = np.vstack((s,t)) self.colorspec = [] self.colorspec = np.concatenate((s,np.flipud(s)),1) self.colorspec = np.concatenate((self.colorspec,np.zeros((len(s),1))),1) ### plot all history points ax = ax2 if onerun_perplot == True: if j == 0: ax = ax1 if j == 1: ax = ax2 for k in range(len(w_hist)): # pick out current weight and function value from history, then plot w_val = w_hist[k] g_val = c_hist[k] ax.scatter(w_val,g_val,s = 90,c = self.colorspec[k],edgecolor = 'k',linewidth = 0.5*((1/(float(k) + 1)))**(0.4),zorder = 3,marker = 'X') # evaluation on function ax.scatter(w_val,0,s = 90,facecolor = self.colorspec[k],edgecolor = 'k',linewidth = 0.5*((1/(float(k) + 1)))**(0.4), zorder = 3)
def get_route(tree): # gets the route for the tree... tree_dict = {} boundary_dict = {} leaf_dict = {} def recurse(sub_tree, child_split=None, parent=None): # pprint.pprint(sub_tree) route_path = {} if "threshold" in sub_tree: boundary_dict[sub_tree["split_index"]] = { "column": sub_tree["split_feature"], "value": sub_tree["threshold"], } if "split_index" in sub_tree["left_child"]: boundary_dict[sub_tree["split_index"]]["left"] = sub_tree["left_child"][ "split_index" ] if "split_index" in sub_tree["right_child"]: boundary_dict[sub_tree["split_index"]]["right"] = sub_tree[ "right_child" ]["split_index"] else: # we're a leaf! leaf_dict[parent] = leaf_dict.get(parent, {}) leaf_dict[parent][child_split] = sub_tree if "left_child" in sub_tree: try: route_path["left"] = sub_tree["left_child"]["split_index"] except Exception as e: # print("\tleft_child {}".format(e)) pass if "right_child" in sub_tree: try: route_path["right"] = sub_tree["right_child"]["split_index"] except Exception as e: # print("\tright_child {}".format(e)) pass # print(route_path) if len(route_path) > 0: tree_dict[sub_tree["split_index"]] = route_path.copy() if "left_child" in sub_tree: recurse(sub_tree["left_child"], "left", sub_tree["split_index"]) if "right_child" in sub_tree: recurse(sub_tree["right_child"], "right", sub_tree["split_index"]) # print("\n\n") recurse(tree) # combine leaf_dict and boundary_dict max_index = np.max(list(boundary_dict.keys())) for k in leaf_dict.keys(): # print(leaf_dict) if "left" in leaf_dict[k]: max_index += 1 boundary_dict[k]["left"] = max_index tree_dict[k] = tree_dict.get(k, {}) tree_dict[k]["left"] = max_index tree_dict[max_index] = {} pred_val = expit(leaf_dict[k]["left"]["leaf_value"]) boundary_dict[max_index] = { "predict": np.array([1 - pred_val, pred_val]), "leaf_value": leaf_dict[k]["left"]["leaf_value"], } if "right" in leaf_dict[k]: max_index += 1 boundary_dict[k]["right"] = max_index tree_dict[k] = tree_dict.get(k, {}) tree_dict[k]["right"] = max_index tree_dict[max_index] = {} # print(leaf_dict) pred_val = expit(leaf_dict[k]["right"]["leaf_value"]) boundary_dict[max_index] = { "predict": np.array([1 - pred_val, pred_val]), "leaf_value": leaf_dict[k]["right"]["leaf_value"], } return tree_dict, boundary_dict, leaf_dict
def result(self,t = None, anim = False, interval = 50, every_n_iter = 1): ''' Plot trainning process at animation Parameters ---------- t : array, optional evaluate at these points anim : bool, optional whether to plot animation or not, default set to False interval : integer, optional time duration between frames, in ms, default set to 50 ms every_n_iter : integer, optional plot every n iterations of the trainning process, if num of iteration if large, increase every_n_iter to save computing, default set to 1, plotting all the iterations ''' if t is None: t = self.t if anim: #training animation y_train = np.array([self.predict(t = t.reshape(-1,1), params_list=x) for x in self.x])[::-every_n_iter][::-1] n = y_train.shape[1] fig, ax = plt.subplots(1,2,figsize = (16,6)) #ground truth using scipy sol_cont = solve_ivp(self.f, [t.min(), t.max()], self.y0_list, method='Radau', rtol=1e-5) sol_dis = solve_ivp(self.f, t_span = [t.min(), t.max()], t_eval=t, y0 = self.y0_list, method='Radau', rtol=1e-5) y_diff = np.array([np.array(sol_dis.y[i]) for i in range(n)]) #set x y limit using min and max of the last iteration ax[0].set_xlim((t[0], t[-1])) ax[0].set_ylim((np.min(y_train[-1,:,:]), np.max(y_train[-1,:,:]))) ax[1].set_xlim((t[0], t[-1])) ax[1].set_ylim((np.min(y_train[-1,:,:] - y_diff), np.max(y_train[-1,:,:] - y_diff))) #plot ground truth for i in range(n): ax[0].plot(sol_cont.t, sol_cont.y[i], label='y{}'.format(i+1)) #plots of NN result scatters_pred = [] scatters_diff = [] for i in range(n): scat1 = ax[0].scatter([], [], label = 'y_pred{}'.format(i+1)) scatters_pred.append(scat1) scat2 = ax[1].scatter([], [], label = 'y{}'.format(i+1)) scatters_diff.append(scat2) ax[0].legend(loc='upper center', bbox_to_anchor=(0.5, -0.05),fancybox=True, ncol=4) ax[1].legend(loc='upper center', bbox_to_anchor=(0.5, -0.05),fancybox=True, ncol=4) ax[0].set_title("NN Prediction and Truth", fontsize = 18) ax[1].set_title("NN Prediction - Truth", fontsize = 18) # initialization function: plot the background of each frame def init(): for k in range(n): scatters_pred[k].set_offsets(np.hstack(([], []))) scatters_diff[k].set_offsets(np.hstack(([], []))) return (scatters_pred + scatters_diff) # animation function. This is called sequentially def animate(i): for k in range(n): scatters_pred[k].set_offsets(np.c_[t, y_train[i,k,:]]) scatters_diff[k].set_offsets(np.c_[t, y_train[i,k,:] - y_diff[k]]) return (scatters_pred + scatters_diff) # call the animator. blit=True means only re-draw the parts that have changed. anim_pred = animation.FuncAnimation(fig, animate, init_func=init, frames=y_train.shape[0], interval=interval, blit=True) return anim_pred
def get_xm(x_var, x_base): return np.matrix( [splinify(np.min(x_var), np.max(x_base), 1.0, x) for x in x_var])
def logmeanexp(x): e_x = np.exp(x - np.max(x)) return np.log(np.mean(e_x)) + np.max(x)
def forward_pass(self, data, params): assert len(data.shape) == 4 w, h = self.input_shape[2:] assert w%2==0 and h%2==0 data = data.reshape(self.input_shape[:2] + (w/2, 2, h/2, 2)) return np.max(np.max(data, axis=3), axis=4)
def fun(x): return to_scalar(np.max(np.array([[x, x ], [x, 0.5]]), axis=1)) d_fun = lambda x : to_scalar(grad(fun)(x))
def conv_layer(self, tensor, kernels): # square up tensor into tensor of patches tensor = np.reshape( tensor, (np.shape(tensor)[0], int( (np.shape(tensor)[1])**(0.5)), int( (np.shape(tensor)[1])**(0.5))), order='F') # pad tensor kernel = kernels[0] kernel_size = kernel.shape padded_tensor = self.pad_tensor(tensor, kernel_size) # window tensor wind_tensor = self.sliding_window_tensor(padded_tensor, kernel_size, stride=1) # normalize windows since they touch weights # a_means = np.mean(wind_tensor,axis = 0) # a_stds = np.std(wind_tensor,axis = 0) # wind_tensor = self.normalize(wind_tensor,a_means,a_stds) #### compute convolution feature maps / downsample via pooling one map at a time over entire tensor ##### kernel2 = np.ones((6, 6)) stride = 3 new_tensors = [] for kernel in kernels: #### make convolution feature map - via matrix multiplication over windowed tensor feature_map = np.dot(wind_tensor, kernel.flatten()[:, np.newaxis]) # reshape convolution feature map into array feature_map = np.reshape(feature_map, np.shape(tensor)) # now shove result through nonlinear activation feature_map = self.activation(feature_map) #### now pool / downsample feature map, first window then pool on each window wind_featmap = self.sliding_window_tensor(feature_map, kernel2.shape, stride=stride) # max pool on each collected patch ### mean or max on each dude max_pool = np.max(wind_featmap, axis=1) # reshape into new tensor max_pool = np.reshape( max_pool, (np.shape(tensor)[0], int((np.shape(max_pool)[0] / float(np.shape(tensor)[0])) **(0.5)), int((np.shape(max_pool)[0] / float(np.shape(tensor)[0])) **(0.5)))) # reshape into new downsampled pooled feature map new_tensors.append(max_pool) # turn into array new_tensors = np.array(new_tensors) # reshape into final feature vector to touch fully connected layer(s), otherwise keep as is in terms of shape new_tensors = new_tensors.swapaxes(0, 1) new_tensors = np.reshape( new_tensors, (np.shape(new_tensors)[0], np.shape(new_tensors)[1], np.shape(new_tensors)[2] * np.shape(new_tensors)[3])) new_tensors = np.reshape( new_tensors, (np.shape(new_tensors)[0], np.shape(new_tensors)[1] * np.shape(new_tensors)[2]), order='F') return new_tensors
def cost(R): Z = npy.dot(X, R) M = npy.max(Z, axis=1, keepdims=True) return npy.sum((Z / M)**2)
def _read_kurucz_spec(f): """ Read Kurucz spectra that have been precomputed Args: f (string) : path to the file to be read Returns: new_vel (real array) : velocity axis in km/s spectrum (real array) : spectrum for each velocity bin """ f = open(f, "rb") res = f.read() n_chunk = struct.unpack('i',res[0:4]) freq = [] stokes = [] cont = [] left = 4 for i in range(n_chunk[0]): right = left + 4 n = struct.unpack('i',res[left:right]) left = right right = left + 4 nmus = struct.unpack('i',res[left:right]) left = right right = left + 8*n[0] t1 = np.asarray(struct.unpack('d'*n[0],res[left:right])) freq.append(t1) left = right right = left + 8*n[0]*nmus[0] t2 = np.asarray(struct.unpack('d'*n[0]*nmus[0],res[left:right])).reshape((n[0],nmus[0])) stokes.append(t2) left = right right = left + 8*n[0]*nmus[0] t2 = np.asarray(struct.unpack('d'*n[0]*nmus[0],res[left:right])).reshape((n[0],nmus[0])) cont.append(t2) left = right freq = np.concatenate(freq) stokes = np.concatenate(stokes) cont = np.concatenate(cont) ind = np.argsort(freq) freq = freq[ind] stokes = stokes[ind] cont = cont[ind] wavelength = const.c.to('cm/s').value / freq mean_wavelength = np.mean(wavelength) vel = (wavelength - mean_wavelength) / mean_wavelength * const.c.to('km/s').value nl, nmus = stokes.shape # Reinterpolate in a equidistant velocity axis new_vel = np.linspace(np.min(vel), np.max(vel), nl) for i in range(nmus): interpolator = scipy.interpolate.interp1d(vel, stokes[:,i], kind='linear') stokes[:,i] = interpolator(new_vel) return new_vel, wavelength, stokes
def animate_it_2d_fit_only(self,savepath,w_hist,**kwargs): self.w_hist = w_hist ##### setup figure to plot ##### # initialize figure fig = plt.figure(figsize = (4,4)) artist = fig # create subplot with 3 panels, plot input function in center plot gs = gridspec.GridSpec(1, 1) ax1 = plt.subplot(gs[0]); # produce color scheme s = np.linspace(0,1,len(self.w_hist[:round(len(self.w_hist)/2)])) s.shape = (len(s),1) t = np.ones(len(self.w_hist[round(len(self.w_hist)/2):])) t.shape = (len(t),1) s = np.vstack((s,t)) self.colorspec = [] self.colorspec = np.concatenate((s,np.flipud(s)),1) self.colorspec = np.concatenate((self.colorspec,np.zeros((len(s),1))),1) # seed left panel plotting range xmin = np.min(copy.deepcopy(self.x)) xmax = np.max(copy.deepcopy(self.x)) xgap = (xmax - xmin)*0.1 xmin-=xgap xmax+=xgap x_fit = np.linspace(xmin,xmax,300) # seed right panel contour plot viewmax = 3 if 'viewmax' in kwargs: viewmax = kwargs['viewmax'] view = [20,100] if 'view' in kwargs: view = kwargs['view'] # start animation num_frames = len(self.w_hist) print ('starting animation rendering...') def animate(k): # clear panels ax1.cla() # current color color = self.colorspec[k] # print rendering update if np.mod(k+1,25) == 0: print ('rendering animation frame ' + str(k+1) + ' of ' + str(num_frames)) if k == num_frames - 1: print ('animation rendering complete!') time.sleep(1.5) clear_output() ###### make left panel - plot data and fit ###### # initialize fit w = self.w_hist[k] y_fit = w[0] + x_fit*w[1] # scatter data self.scatter_pts(ax1) # plot fit to data ax1.plot(x_fit,y_fit,color = color,linewidth = 3) return artist, anim = animation.FuncAnimation(fig, animate ,frames=num_frames, interval=num_frames, blit=True) # produce animation and save fps = 50 if 'fps' in kwargs: fps = kwargs['fps'] anim.save(savepath, fps=fps, extra_args=['-vcodec', 'libx264']) clear_output()
def log_softmax(self, batch): batch = batch - np.max(batch, axis=1, keepdims=True) return batch - logsumexp(batch, axis=1).reshape((batch.shape[0], -1))
def scatter_pts(self,ax): if np.shape(self.x)[1] == 1: # set plotting limits xmin = np.min(copy.deepcopy(self.x)) xmax = np.max(copy.deepcopy(self.x)) xgap = (xmax - xmin)*0.2 xmin -= xgap xmax += xgap ymin = np.min(copy.deepcopy(self.y)) ymax = np.max(copy.deepcopy(self.y)) ygap = (ymax - ymin)*0.2 ymin -= ygap ymax += ygap # initialize points ax.scatter(self.x,self.y,color = 'k', edgecolor = 'w',linewidth = 0.9,s = 40) # clean up panel ax.set_xlim([xmin,xmax]) ax.set_ylim([ymin,ymax]) # label axes ax.set_xlabel(r'$x$', fontsize = 16) ax.set_ylabel(r'$y$', rotation = 0,fontsize = 16,labelpad = 15) if np.shape(self.x)[1] == 2: # set plotting limits xmin1 = np.min(copy.deepcopy(self.x[:,0])) xmax1 = np.max(copy.deepcopy(self.x[:,0])) xgap1 = (xmax1 - xmin1)*0.35 xmin1 -= xgap1 xmax1 += xgap1 xmin2 = np.min(copy.deepcopy(self.x[:,1])) xmax2 = np.max(copy.deepcopy(self.x[:,1])) xgap2 = (xmax2 - xmin2)*0.35 xmin2 -= xgap2 xmax2 += xgap2 ymin = np.min(copy.deepcopy(self.y)) ymax = np.max(copy.deepcopy(self.y)) ygap = (ymax - ymin)*0.2 ymin -= ygap ymax += ygap # initialize points ax.scatter(self.x[:,0],self.x[:,1],self.y,s = 40,color = 'k', edgecolor = 'w',linewidth = 0.9) # clean up panel ax.set_xlim([xmin1,xmax1]) ax.set_ylim([xmin2,xmax2]) ax.set_zlim([ymin,ymax]) ax.set_xticks(np.arange(round(xmin1) +1, round(xmax1), 1.0)) ax.set_yticks(np.arange(round(xmin2) +1, round(xmax2), 1.0)) # label axes ax.set_xlabel(r'$x_1$', fontsize = 12,labelpad = 5) ax.set_ylabel(r'$x_2$', rotation = 0,fontsize = 12,labelpad = 5) ax.set_zlabel(r'$y$', rotation = 0,fontsize = 12,labelpad = -3) # clean up panel ax.xaxis.pane.fill = False ax.yaxis.pane.fill = False ax.zaxis.pane.fill = False ax.xaxis.pane.set_edgecolor('white') ax.yaxis.pane.set_edgecolor('white') ax.zaxis.pane.set_edgecolor('white') ax.xaxis._axinfo["grid"]['color'] = (1,1,1,0) ax.yaxis._axinfo["grid"]['color'] = (1,1,1,0) ax.zaxis._axinfo["grid"]['color'] = (1,1,1,0)
def fun(x): return to_scalar(np.max(np.array([x, x]))) d_fun = lambda x : to_scalar(grad(fun)(x))
perf = train_performances.to_numpy() order = "asc" if use_max_inverse_transform == "max_cutoff": perf = perf.clip(0, cutoff) perf = cutoff - perf order = "desc" elif use_max_inverse_transform == "max_par10": perf = par10 - perf order = "desc" perf_max = 1 if scale_target_to_unit_interval: perf_max = np.max(perf) perf = perf / perf_max train_performances = pd.DataFrame(data=perf, index=train_performances.index, columns=train_performances.columns) print(order) print("perf", perf) skip_value = None if skip_censored: if use_max_inverse_transform == "none": skip_value = train_performances.to_numpy().max() else: skip_value = train_performances.to_numpy().min() inst, perf, rank, sample_weights = util.construct_numpy_representation_with_ordered_pairs_of_rankings_and_features_and_weights( train_features,
def logsumexp(x): """Numerically stable log(sum(exp(x))), also defined in scipy.misc""" max_x = np.max(x) return max_x + np.log(np.sum(np.exp(x - max_x)))
def logsumexp(x): """Numerically stable log(sum(exp(x)))""" max_x = npa.max(x) return max_x + npa.log(npa.sum(npa.exp(x - max_x)))
def sample_from_mvn(mu, sigma): rs = npr.RandomState(0) return np.dot(np.linalg.cholesky(sigma+1e-6*np.eye(len(sigma))*np.max(np.diag(sigma))),rs.randn(len(sigma)))+mu if random == 1 else mu
def compare_2d3d(func1,func2,**kwargs): # input arguments view = [20,-65] if 'view' in kwargs: view = kwargs['view'] # define input space w = np.linspace(-3,3,200) # input range for original function if 'w' in kwargs: w = kwargs['w'] # define pts pt1 = 0 if 'pt1' in kwargs: pt1 = kwargs['pt1'] pt2 = [0,0] if 'pt2' in kwargs: pt2 = kwargs['pt2'] # construct figure fig = plt.figure(figsize = (6,3)) # remove whitespace from figure fig.subplots_adjust(left=0, right=1, bottom=0, top=1) # remove whitespace fig.subplots_adjust(wspace=0.01,hspace=0.01) # create subplot with 3 panels, plot input function in center plot gs = gridspec.GridSpec(1, 2, width_ratios=[1,2]) ### draw 2d version ### ax1 = plt.subplot(gs[0]); grad = compute_grad(func1) # generate a range of values over which to plot input function, and derivatives g_plot = func1(w) g_range = max(g_plot) - min(g_plot) # used for cleaning up final plot ggap = g_range*0.2 # grab the next input/output tangency pair, the center of the next approximation(s) pt1 = float(pt1) g_val = func1(pt1) # plot original function ax1.plot(w,g_plot,color = 'k',zorder = 1,linewidth=2) # plot the input/output tangency point ax1.scatter(pt1,g_val,s = 60,c = 'lime',edgecolor = 'k',linewidth = 2,zorder = 3) # plot point of tangency #### plot first order approximation #### # plug input into the first derivative g_grad_val = grad(pt1) # compute first order approximation w1 = pt1 - 3 w2 = pt1 + 3 wrange = np.linspace(w1,w2, 100) h = g_val + g_grad_val*(wrange - pt1) # plot the first order approximation ax1.plot(wrange,h,color = 'lime',alpha = 0.5,linewidth = 3,zorder = 2) # plot approx # make new x-axis ax1.plot(w,g_plot*0,linewidth=3,color = 'k') #### clean up panel #### # fix viewing limits on panel ax1.set_xlim([min(w),max(w)]) ax1.set_ylim([min(min(g_plot) - ggap,-4),max(max(g_plot) + ggap,0.5)]) # label axes ax1.set_xlabel('$w$',fontsize = 12,labelpad = -50) ax1.set_ylabel('$g(w)$',fontsize = 25,rotation = 0,labelpad = 50) ax1.grid(False) ax1.yaxis.set_visible(False) ax1.spines['right'].set_visible(False) ax1.spines['top'].set_visible(False) ax1.spines['left'].set_visible(False) ### draw 3d version ### ax2 = plt.subplot(gs[1],projection='3d'); grad = compute_grad(func2) w_val = [float(0),float(0)] # define input space w1_vals, w2_vals = np.meshgrid(w,w) w1_vals.shape = (len(w)**2,1) w2_vals.shape = (len(w)**2,1) w_vals = np.concatenate((w1_vals,w2_vals),axis=1).T g_vals = func2(w_vals) # evaluation points w_val = np.array([float(pt2[0]),float(pt2[1])]) w_val.shape = (2,1) g_val = func2(w_val) grad_val = grad(w_val) grad_val.shape = (2,1) # create and evaluate tangent hyperplane w1tan_vals, w2tan_vals = np.meshgrid(w,w) w1tan_vals.shape = (len(w)**2,1) w2tan_vals.shape = (len(w)**2,1) wtan_vals = np.concatenate((w1tan_vals,w2tan_vals),axis=1).T #h = lambda weh: g_val + np.dot( (weh - w_val).T,grad_val) h = lambda weh: g_val + (weh[0]-w_val[0])*grad_val[0] + (weh[1]-w_val[1])*grad_val[1] h_vals = h(wtan_vals + w_val) # vals for cost surface, reshape for plot_surface function w1_vals.shape = (len(w),len(w)) w2_vals.shape = (len(w),len(w)) g_vals.shape = (len(w),len(w)) w1tan_vals += w_val[0] w2tan_vals += w_val[1] w1tan_vals.shape = (len(w),len(w)) w2tan_vals.shape = (len(w),len(w)) h_vals.shape = (len(w),len(w)) ### plot function ### ax2.plot_surface(w1_vals, w2_vals, g_vals, alpha = 0.5,color = 'w',rstride=25, cstride=25,linewidth=1,edgecolor = 'k',zorder = 2) ### plot z=0 plane ### ax2.plot_surface(w1_vals, w2_vals, g_vals*0, alpha = 0.1,color = 'w',zorder = 1,rstride=25, cstride=25,linewidth=0.3,edgecolor = 'k') ### plot tangent plane ### ax2.plot_surface(w1tan_vals, w2tan_vals, h_vals, alpha = 0.4,color = 'lime',zorder = 1,rstride=50, cstride=50,linewidth=1,edgecolor = 'k') # scatter tangency ax2.scatter(w_val[0],w_val[1],g_val,s = 70,c = 'lime',edgecolor = 'k',linewidth = 2) ### clean up plot ### # plot x and y axes, and clean up ax2.xaxis.pane.fill = False ax2.yaxis.pane.fill = False ax2.zaxis.pane.fill = False #ax2.xaxis.pane.set_edgecolor('white') ax2.yaxis.pane.set_edgecolor('white') ax2.zaxis.pane.set_edgecolor('white') # remove axes lines and tickmarks ax2.w_zaxis.line.set_lw(0.) ax2.set_zticks([]) ax2.w_xaxis.line.set_lw(0.) ax2.set_xticks([]) ax2.w_yaxis.line.set_lw(0.) ax2.set_yticks([]) # set viewing angle ax2.view_init(view[0],view[1]) # set vewing limits wgap = (max(w) - min(w))*0.4 y = max(w) + wgap ax2.set_xlim([-y,y]) ax2.set_ylim([-y,y]) zmin = min(np.min(g_vals),-0.5) zmax = max(np.max(g_vals),+0.5) ax2.set_zlim([zmin,zmax]) # label plot fontsize = 12 ax2.set_xlabel(r'$w_1$',fontsize = fontsize,labelpad = -30) ax2.set_ylabel(r'$w_2$',fontsize = fontsize,rotation = 0,labelpad=-30) plt.show()
def minConf_SPG(funObj, x, funProj, options=None): """ This function implements Mark Schmidt's MATLAB implementation of spectral projected gradient (SPG) to solve for projected quasi-Newton direction min funObj(x) s.t. x in C Parameters ---------- funObj: function that returns objective function value and the gradient x: initial parameter value funProj: fcuntion that returns projection of x onto C options: verbose: level of verbosity (0: no output, 1: final, 2: iter (default), 3: debug) optTol: tolerance used to check for optimality (default: 1e-5) progTol: tolerance used to check for lack of progress (default: 1e-9) maxIter: maximum number of calls to funObj (default: 500) numDiff: compute derivatives numerically (0: use user-supplied derivatives (default), 1: use finite differences, 2: use complex differentials) suffDec: sufficient decrease parameter in Armijo condition (default : 1e-4) interp: type of interpolation (0: step-size halving, 1: quadratic, 2: cubic) memory: number of steps to look back in non-monotone Armijo condition useSpectral: use spectral scaling of gradient direction (default: 1) curvilinear: backtrack along projection Arc (default: 0) testOpt: test optimality condition (default: 1) feasibleInit: if 1, then the initial point is assumed to be feasible bbType: type of Barzilai Borwein step (default: 1) Notes: - if the projection is expensive to compute, you can reduce the number of projections by setting testOpt to 0 """ nVars = x.shape[0] options_default = {'verbose':2, 'numDiff':0, 'optTol':1e-5, 'progTol':1e-9,\ 'maxIter':500, 'suffDec':1e-4, 'interp':2, 'memory':10,\ 'useSpectral':1,'curvilinear':0,'feasibleInit':0,'testOpt':1,\ 'bbType':1} options = setDefaultOptions(options, options_default) if options['verbose'] >= 2: if options['testOpt'] == 1: print '{:10s}'.format('Iteration') + \ '{:10s}'.format('FunEvals') + \ '{:10s}'.format('Projections') + \ '{:15s}'.format('StepLength') + \ '{:15s}'.format('FunctionVal') + \ '{:15s}'.format('OptCond') else: print '{:10s}'.format('Iteration') + \ '{:10s}'.format('FunEvals') + \ '{:10s}'.format('Projections') + \ '{:15s}'.format('StepLength') + \ '{:15s}'.format('FunctionVal') funEvalMultiplier = 1 # evaluate initial point if options['feasibleInit'] == 0: x = funProj(x) [f, g] = funObj(x) projects = 1 funEvals = 1 # optionally check optimality if options['testOpt'] == 1: projects = projects + 1 if np.max(np.abs(funProj(x-g)-x)) < options['optTol']: if options['verbose'] >= 1: print "First-order optimality conditions below optTol at initial point" return (x, f, funEvals, projects) i = 1 while funEvals <= options['maxIter']: # compute step direction if i == 1 or options['useSpectral'] == 0: alpha = 1. else: y = g - g_old s = x - x_old if options['bbType'] == 1: alpha = np.dot(s,s)/np.dot(s,y) else: alpha = np.dot(s,y)/np.dot(y,y) if alpha <= 1e-10 or alpha >= 1e10: alpha = 1. d = -alpha * g f_old = f x_old = x g_old = g # compute projected step if options['curvilinear'] == 0: d = funProj(x+d) - x projects = projects + 1 # check that progress can be made along the direction gtd = np.dot(g, d) if gtd > -options['progTol']: if options['verbose'] >= 1: print "Directional derivtive below progTol" break # select initial guess to step length if i == 1: t = np.minimum(1., 1./np.sum(np.abs(g))) else: t = 1. # compute reference function for non-monotone condition if options['memory'] == 1: funRef = f else: if i == 1: old_fvals = np.ones(options['memory'])*(-1)*np.infty if i <= options['memory']: old_fvals[i-1] = f else: old_fvals = np.append(old_fvals[1:], f) funRef = np.max(old_fvals) # evaluate the objective and gradient at the initial step if options['curvilinear'] == 1: x_new = funProj(x + t*d) projects = projects + 1 else: x_new = x + t*d [f_new, g_new] = funObj(x_new) funEvals = funEvals + 1 # Backtracking line search lineSearchIters = 1 while f_new > funRef + options['suffDec']*np.dot(g,x_new-x) or \ isLegal(f_new) == False: temp = t if options['interp'] == 0 or isLegal(f_new) == False: if options['verbose'] == 3: print 'Halving step size' t = t/2. elif options['interp'] == 2 and isLegal(g_new): if options['verbose'] == 3: print "Cubic Backtracking" t = polyinterp(np.array([[0,f,gtd],\ [t,f_new,np.dot(g_new,d)]]))[0] elif lineSearchIters < 2 or isLegal(f_prev): if options['verbose'] == 3: print "Quadratic Backtracking" t = polyinterp(np.array([[0, f, gtd],\ [t, f_new, np.complex(0,1)]]))[0] else: if options['verbose'] == 3: print "Cubic Backtracking on Function Values" t = polyinterp(np.array([[0., f, gtd],\ [t,f_new,np.complex(0,1)],\ [t_prev,f_prev,np.complex(0,1)]]))[0] # adjust if change is too small if t < temp*1e-3: if options['verbose'] == 3: print "Interpolated value too small, Adjusting" t = temp * 1e-3 elif t > temp * 0.6: if options['verbose'] == 3: print "Interpolated value too large, Adjusting" t = temp * 0.6 # check whether step has become too small if np.max(np.abs(t*d)) < options['progTol'] or t == 0: if options['verbose'] == 3: print "Line Search failed" t = 0. f_new = f g_new = g break # evaluate new point f_prev = f_new t_prev = temp if options['curvilinear'] == True: x_new = funProj(x + t*d) projects = projects + 1 else: x_new = x + t*d [f_new, g_new] = funObj(x_new) funEvals = funEvals + 1 lineSearchIters = lineSearchIters + 1 # done with line search # take step x = x_new f = f_new g = g_new if options['testOpt'] == True: optCond = np.max(np.abs(funProj(x-g)-x)) projects = projects + 1 # output log if options['verbose'] >= 2: if options['testOpt'] == True: print '{:10d}'.format(i) + \ '{:10d}'.format(funEvals*funEvalMultiplier) + \ '{:10d}'.format(projects) + \ '{:15.5e}'.format(t) + \ '{:15.5e}'.format(f) + \ '{:15.5e}'.format(optCond) else: print '{:10d}'.format(i) + \ '{:10d}'.format(funEvals*funEvalMultiplier) + \ '{:10d}'.format(projects) + \ '{:15.5e}'.format(t) + \ '{:15.5e}'.format(f) # check optimality if options['testOpt'] == True: if optCond < options['optTol']: if options['verbose'] >= 1: print "First-order optimality conditions below optTol" break if np.max(np.abs(t*d)) < options['progTol']: if options['verbose'] >= 1: print "Step size below progTol" break if np.abs(f-f_old) < options['progTol']: if options['verbose'] >= 1: print "Function value changing by less than progTol" break if funEvals*funEvalMultiplier > options['maxIter']: if options['verbose'] >= 1: print "Function evaluation exceeds maxIter" break i = i + 1 return (x, f, funEvals, projects)
def rl1_selection(y_bin, y_ord, y_categ, zl1_ys, w_s): ''' Selects the number of factors on the first latent discrete layer y_bin (n x p_bin ndarray): The binary and count data matrix y_ord (n x p_ord ndarray): The ordinal data matrix y_categ (n x p_categ ndarray): The categorical data matrix zl1_ys (k_1D x r_1D ndarray): The first layer latent variables w_s (list): The path probabilities starting from the first layer ------------------------------------------------------------------ return (list of int): The dimensions to keep for the GLLVM layer ''' M0 = zl1_ys.shape[0] numobs = zl1_ys.shape[1] r0 = zl1_ys.shape[2] S0 = zl1_ys.shape[3] nb_bin = y_bin.shape[1] nb_ord = y_ord.shape[1] nb_categ = y_categ.shape[1] PROP_ZERO_THRESHOLD = 0.25 PVALUE_THRESHOLD = 0.10 # Detemine the dimensions that are weakest for Binomial variables zero_coef_mask = np.zeros(r0) for j in range(nb_bin): for s in range(S0): Nj = np.max(y_bin[:, j]) # The support of the jth binomial is [1, Nj] if Nj == 1: # If the variable is Bernoulli not binomial yj = y_bin[:, j] z = zl1_ys[:, :, :, s] else: # If not, need to convert Binomial output to Bernoulli output yj, z = bin_to_bern(Nj, y_bin[:, j], z[0]) # Put all the M0 points in a series X = z.flatten(order='C').reshape((M0 * numobs, r0), order='C') y_repeat = np.repeat(yj, M0).astype( int) # Repeat rather than tile to check lr = LogisticRegression(penalty='l1', solver='saga') lr.fit(X, y_repeat) zero_coef_mask += (lr.coef_[0] == 0) * w_s[s] # Detemine the dimensions that are weakest for Ordinal variables for j in range(nb_ord): for s in range(S0): ol = OrderedLogit() X = zl1_ys[:, :, :, s].flatten(order='C').reshape( (M0 * numobs, r0), order='C') y_repeat = np.repeat(y_ord[:, j], M0).astype( int) # Repeat rather than tile to check ol.fit(X, y_repeat) zero_coef_mask += np.array( ol.summary['p'] > PVALUE_THRESHOLD) * w_s[s] # Detemine the dimensions that are weakest for Categorical variables for j in range(nb_categ): for s in range(S0): z = zl1_ys[:, :, :, s] # Put all the M0 points in a series X = z.flatten(order='C').reshape((M0 * numobs, r0), order='C') y_repeat = np.repeat(y_categ[:, j], M0).astype( int) # Repeat rather than tile to check lr = LogisticRegression(penalty = 'l1', solver = 'saga', \ multi_class = 'multinomial') lr.fit(X, y_repeat) zero_coef_mask += (lr.coef_[0] == 0) * w_s[s] # Voting: Delete the dimensions which have been zeroed a majority of times zeroed_coeff_prop = zero_coef_mask / ((nb_ord + nb_bin + nb_categ)) # Need at least r1 = 2 for algorithm to work new_rl = np.sum(zeroed_coeff_prop <= PROP_ZERO_THRESHOLD) if new_rl < 2: dims_to_keep = np.argsort(zeroed_coeff_prop)[:2] else: dims_to_keep = list(set(range(r0)) - \ set(np.where(zeroed_coeff_prop > PROP_ZERO_THRESHOLD)[0].tolist())) dims_to_keep = np.sort(dims_to_keep) return dims_to_keep
def sample_from_mvn(mu, sigma): # make sure we return 2d, also make sure data is 2d rs = npr.RandomState(0) return np.atleast_2d(np.dot(np.linalg.cholesky(sigma+1e-6*np.eye(len(sigma))*np.max(np.diag(sigma))),rs.randn(len(sigma)))+mu if random == 1 else mu).T
def logsumexp(X, axis, keepdims=False): max_X = np.max(X) return max_X + np.log(np.sum(np.exp(X - max_X), axis=axis, keepdims=keepdims))
def logsumexp(X, axis): max_X = np.max(X) return max_X + np.log(np.sum(np.exp(X - max_X), axis=axis, keepdims=True))
def optimize(self, n_iters, objective, init_param): dim = init_param.size diagnostics = self._sgo._diagnostics k_conv = None # Iteration number when reached convergence k_stopped = None # Iteration number when MCSE/ESS conditions met k_Rhat = None # Iteration number when R hat convergence criterion met learning_rate = self._sgo._learning_rate variational_param = init_param.copy() variational_param_history = [] value_history = [] descent_dir_history = [] ess_and_mcse_k_history = [] ess_history = [] mcse_history = [] iterate_average_k_history = [] iterate_average_history = [] iterate_average = variational_param.copy() if diagnostics: iterate_average_k_history.append(0) iterate_average_history.append(iterate_average) total_opt_time = 0 # total time spent on optimization stopped = False with tqdm.trange(n_iters) as progress: try: for k in progress: # take step in descent direction with Timer() as opt_timer: object_val, object_grad = objective(variational_param) value_history.append(object_val) descent_dir = self._sgo.descent_direction(object_grad) variational_param -= learning_rate * descent_dir variational_param_history.append(variational_param.copy()) if diagnostics: descent_dir_history.append(descent_dir) total_opt_time += opt_timer.interval # If convergence has not been reached then check for # convergence using R hat if k_conv is None and k % self._k_check == 0: W_upper = int(0.95*k) if W_upper > self._W_min: windows = np.linspace(self._W_min, W_upper, num=5, dtype=int) R_hat_success, best_W = R_hat_convergence_check( variational_param_history, windows) iterate_average = np.mean(variational_param_history[-best_W:], axis=0) if diagnostics: iterate_average_k_history.append(k) iterate_average_history.append(iterate_average) if R_hat_success: k_Rhat = k k_conv = k - best_W W_check = best_W # immediately check MCSE # Once convergence has been reached compute the MCSE if k_conv is not None and k - k_conv == W_check: W = W_check converged_iterates = np.array(variational_param_history[-W:]) iterate_average = np.mean(converged_iterates, axis=0) if diagnostics and k not in iterate_average_k_history: iterate_average_k_history.append(k) iterate_average_history.append(iterate_average) # compute MCSE with Timer() as mcse_timer: if isinstance(objective.approx, MFGaussian): # For MF Gaussian, use MCSE(mu/sigma,log_sigma) iterate_diff = converged_iterates[W-2,:] - converged_iterates[W-1,:] iterate_diff_zero = iterate_diff == 0 # ignore constant variational parameters if np.any(iterate_diff_zero): indices = np.argwhere(iterate_diff_zero) converged_iterates = np.delete(converged_iterates, indices, 1) converged_log_sdevs = converged_iterates[:,-dim:] mean_log_stdev = np.mean(converged_log_sdevs, axis=0) ess, mcse = MCSE(converged_iterates) mcse_mean = mcse[:dim]/np.exp(mean_log_stdev) mcse_stdev = mcse[-dim:] mcse = np.concatenate((mcse_mean, mcse_stdev)) else: ess, mcse = MCSE(converged_iterates) if diagnostics: ess_and_mcse_k_history.append(k) ess_history.append(ess) mcse_history.append(mcse) if (np.max(mcse) < self._mcse_threshold and np.min(ess) > self._ESS_min): k_stopped = k break else: relative_mcse_time = mcse_timer.interval / W relative_opt_time = total_opt_time / k relative_time_ratio = relative_opt_time / relative_mcse_time recheck_scale = max(1.05, 1 + 1/np.sqrt(1 + relative_time_ratio)) W_check = int(recheck_scale*W_check+1) if k % self._k_check == 0: avg_loss = np.mean(value_history[max(0, k-1000):k+1]) R_conv = 'converged' if k_conv is not None else 'not converged' progress.set_description( 'average loss = {:,.5g} | R hat {}|'.format(avg_loss, R_conv)) except (KeyboardInterrupt, StopIteration) as e: # pragma: no cover # do not print log on the same line progress.close() finally: progress.close() if k_stopped is None: if k_conv is None: print('WARNING: stationarity not reached after maximum number of iterations') print('WARNING: try incresing the learning rate or the maximum number of iterations') else: print('WARNING: stationarity reached but MCSE too large and/or ESS too small') print('WARNING: maximum MCSE = {:.3g}'.format(np.max(mcse))) print('WARNING: minimum ESS = {:.1f}'.format(np.min(ess))) print(ess) else: print('Convergence reached at iteration', k_stopped) return dict(opt_param = iterate_average, k_conv = k_conv, k_Rhat = k_Rhat, k_stopped = k_stopped, variational_param_history = np.array(variational_param_history), value_history = np.array(value_history), iterate_average_k_history = np.array(iterate_average_k_history), iterate_average_history = np.array(iterate_average_history), descent_dir_history = np.array(descent_dir_history), ess_and_mcse_k_history = np.array(ess_and_mcse_k_history), ess_history = np.array(ess_history), mcse_history = np.array(mcse_history) )
def fun(x): return to_scalar(np.max(x, axis=1, keepdims=True)) d_fun = lambda x : to_scalar(grad(fun)(x))
def softmax(x): m = np.max(x) y = np.exp(x - m) return y / y.sum()
vars = {"b": b} # Data tau = np.genfromtxt(dataPath + 'tau.csv', delimiter=',') Xcif = np.genfromtxt(dataPath + 'Xcif.csv', delimiter=',') Y = np.genfromtxt(dataPath + 'Y.csv', delimiter=',') Eq = np.genfromtxt(dataPath + 'Eq.csv', delimiter=',') Ex = np.genfromtxt(dataPath + 'Ex.csv', delimiter=',') r = np.genfromtxt(dataPath + 'r.csv', delimiter=',') D = np.genfromtxt(dataPath + 'D.csv', delimiter=',') ccodes = np.genfromtxt(dataPath + 'ccodes.csv', delimiter=',', dtype="str") dists = np.genfromtxt(dataPath + 'cDists.csv', delimiter=',') M = np.genfromtxt(dataPath + "milex.csv", delimiter=",") M = M / np.max(M) # normalize milex W = np.log(dists + 1) N = len(Y) E = Eq + Ex data = { "tau": tau, "Xcif": Xcif, "Y": Y, "E": E, "r": r, "D": D, "W": W, "M": M