def m_step(self, expectations, datas, inputs, masks, tags, **kwargs): x = np.concatenate(datas) weights = np.concatenate([Ez for Ez, _, _ in expectations]) # T x D assert x.shape[0] == weights.shape[0] # convert angles to 2D representation and employ closed form solutions x_k = np.stack((np.sin(x), np.cos(x)), axis=1) # T x 2 x D r_k = np.tensordot(weights.T, x_k, axes=1) # K x 2 x D r_norm = np.sqrt(np.sum(np.power(r_k, 2), axis=1)) # K x D mus_k = np.divide(r_k, r_norm[:, None]) # K x 2 x D r_bar = np.divide(r_norm, np.sum(weights, 0)[:, None]) # K x D mask = (r_norm.sum(1) == 0) mus_k[mask] = 0 r_bar[mask] = 0 # Approximation kappa0 = r_bar * (self.D + 1 - np.power(r_bar, 2)) / ( 1 - np.power(r_bar, 2)) # K,D kappa0[kappa0 == 0] += 1e-6 for k in range(self.K): self.mus[k] = np.arctan2(*mus_k[k]) # self.log_kappas[k] = np.log(kappa0[k]) # K, D
def stf_4dim_time_day(tensor, r, random_seed=0, num_iter=100, eps=1e-8, lr=1): np.random.seed(random_seed) args_num = [1, 2, 3, 4] def cost(tensor, home, appliance, day, hour): pred = np.einsum('Hr, Ar, ADr, ATr ->HADT', home, appliance, day, hour) mask = ~np.isnan(tensor) error = (pred - tensor)[mask].flatten() return np.sqrt((error**2).mean()) mg = multigrad(cost, argnums=args_num) sizes = [(x, r) for x in tensor.shape] # ADr sizes[-2] = (tensor.shape[1], tensor.shape[-2], r) # ATr sizes[-1] = (tensor.shape[1], tensor.shape[-1], r) home = np.random.rand(*sizes[0]) appliance = np.random.rand(*sizes[1]) day = np.random.rand(*sizes[2]) hour = np.random.rand(*sizes[3]) sum_home = np.zeros_like(home) sum_appliance = np.zeros_like(appliance) sum_day = np.zeros_like(day) sum_hour = np.zeros_like(hour) # GD procedure for i in range(num_iter): del_home, del_appliance, del_day, del_hour = mg( tensor, home, appliance, day, hour) sum_home += eps + np.square(del_home) lr_home = np.divide(lr, np.sqrt(sum_home)) home -= lr_home * del_home sum_appliance += eps + np.square(del_appliance) lr_appliance = np.divide(lr, np.sqrt(sum_appliance)) appliance -= lr_appliance * del_appliance sum_day += eps + np.square(del_day) lr_day = np.divide(lr, np.sqrt(sum_day)) day -= lr_day * del_day sum_hour += eps + np.square(del_hour) lr_hour = np.divide(lr, np.sqrt(sum_hour)) hour -= lr_hour * del_hour # Projection to non-negative space home[home < 0] = 1e-8 appliance[appliance < 0] = 1e-8 day[day < 0] = 1e-8 hour[hour < 0] = 1e-8 if i % 50 == 0: print(cost(tensor, home, appliance, day, hour), i) sys.stdout.flush() return home, appliance, day, hour
def mat_cosine_dist(X, Y): prod = np.diagonal(np.dot(X, Y.T), offset=0, axis1=-1, axis2=-2) len1 = np.sqrt(np.diagonal(np.dot(X, X.T), offset=0, axis1=-1, axis2=-2)) len2 = np.sqrt(np.diagonal(np.dot(Y, Y.T), offset=0, axis1=-1, axis2=-2)) return np.divide(np.divide(prod, len1), len2)
def b_fwd_1d(r, R): """ Computes forward model weight function b. :param r: vector of differences between spatial coords :param R: cylinder radius parameter R :return: values of b_fwd for elements in r (same shape as r) """ b = np.sqrt(np.square(np.divide(r, R)) + 1) - np.sqrt(np.square(np.divide(r, R))) return b
def factorization(tensor, num_latent, num_iter=2000, lr=1, dis=False, random_seed=0, eps=1e-8, T_known=None): np.random.seed(random_seed) cost = cost_abs args_num = [0, 1, 2] mg = autograd.multigrad(cost, argnums=args_num) M, N, K = tensor.shape H = np.random.rand(M, num_latent) A = np.random.rand(N, num_latent) T = np.random.rand(K, num_latent) sum_square_gradients_A = np.zeros_like(A) sum_square_gradients_H = np.zeros_like(H) sum_square_gradients_T = np.zeros_like(T) if T_known is not None: T = set_known(T, T_known) # GD procedure for i in range(num_iter): del_h, del_a, del_t = mg(H, A, T, tensor) sum_square_gradients_A += eps + np.square(del_a) lr_a = np.divide(lr, np.sqrt(sum_square_gradients_A)) A -= lr_a * del_a sum_square_gradients_H += eps + np.square(del_h) sum_square_gradients_T += eps + np.square(del_t) lr_h = np.divide(lr, np.sqrt(sum_square_gradients_H)) lr_t = np.divide(lr, np.sqrt(sum_square_gradients_T)) H -= lr_h * del_h T -= lr_t * del_t if T_known is not None: T = set_known(T, T_known) # Projection to non-negative space H[H < 0] = 1e-8 A[A < 0] = 1e-8 T[T < 0] = 1e-8 if i % 500 == 0: if dis: print(cost(H, A, T, tensor)) return H, A, T
def rsq(nn_params, nn2_params, inp, obs, obs2, del_lens, num_samples, rs): rsqs1, rsqs2 = [], [] for idx in range(len(inp)): ## # MH-based deletion frequencies ## mh_scores = nn_match_score_function(nn_params, inp[idx]) Js = np.array(del_lens[idx]) unnormalized_fq = np.exp(mh_scores - 0.25*Js) # Add MH-less contribution at full MH deletion lengths mh_vector = inp[idx].T[0] mhfull_contribution = np.zeros(mh_vector.shape) for jdx in range(len(mh_vector)): if del_lens[idx][jdx] == mh_vector[jdx]: dl = del_lens[idx][jdx] mhless_score = nn_match_score_function(nn2_params, np.array(dl)) mhless_score = np.exp(mhless_score - 0.25*dl) mask = np.concatenate([np.zeros(jdx,), np.ones(1,) * mhless_score, np.zeros(len(mh_vector) - jdx - 1,)]) mhfull_contribution = mhfull_contribution + mask unnormalized_fq = unnormalized_fq + mhfull_contribution normalized_fq = np.divide(unnormalized_fq, np.sum(unnormalized_fq)) rsq1 = pearsonr(normalized_fq, obs[idx])[0]**2 rsqs1.append(rsq1) ## # Deletion length frequencies, only up to 28 # (Restricts training to library data, else 27 bp.) ## dls = np.arange(1, 28+1) dls = dls.reshape(28, 1) nn2_scores = nn_match_score_function(nn2_params, dls) unnormalized_nn2 = np.exp(nn2_scores - 0.25*np.arange(1, 28+1)) # iterate through del_lens vector, adding mh_scores (already computed above) to the correct index mh_contribution = np.zeros(28,) for jdx in range(len(Js)): dl = Js[jdx] if dl > 28: break mhs = np.exp(mh_scores[jdx] - 0.25*dl) mask = np.concatenate([np.zeros(dl - 1,), np.ones(1, ) * mhs, np.zeros(28 - (dl - 1) - 1,)]) mh_contribution = mh_contribution + mask unnormalized_nn2 = unnormalized_nn2 + mh_contribution normalized_fq = np.divide(unnormalized_nn2, np.sum(unnormalized_nn2)) rsq2 = pearsonr(normalized_fq, obs2[idx])[0]**2 rsqs2.append(rsq2) return rsqs1, rsqs2
def adam_optimizer(cost, past_time_step, beta1=0.9, beta2=0.999, epsilon=10e-8): #Adam (adaptive moment estimation) optimizer as developed by D. Kingma and J. Ba. #Initialized with default settings from the original paper, https://arxiv.org/pdf/1412.6980.pdf #Shout out to Jimmy for being a fantastic professor costgrad = grad(cost) params = past_time_step[0] m_prev = past_time_step[1] v_prev = past_time_step[2] t = past_time_step[3] + 1 grad_dict = costgrad(params) m_curr = {'encoder': {}, 'decoder': {}} v_curr = {'encoder': {}, 'decoder': {}} update_rates = {'encoder': {}, 'decoder': {}} for key in params: for weights in params[key]: m_curr[key][weights] = (beta1 * m_prev[key][weights]) + ( (1 - beta1) * grad_dict[key][weights]) m_curr[key][weights] = m_curr[key][weights] / (1 - (beta1**t) ) #bias correction v_curr[key][weights] = (beta2 * v_prev[key][weights]) + ( (1 - beta2) * (np.square(grad_dict[key][weights]))) v_curr[key][weights] = v_curr[key][weights] / (1 - (beta2**t)) update_rates[key][weights] = np.divide( m_curr[key][weights], (np.sqrt(v_curr[key][weights]) + epsilon)) params[key] = update(params[key], update_rates[key], LEARNING_RATE) return [params, m_curr, v_curr, t]
def data_snr_maximized_extrinsic(frequencies, data, detector, chirpm, symmratio, spin1, spin2, Luminosity_Distance, theta, phi, iota, alpha_squared, bppe, NSflag, cosmology=cosmology.Planck15): noise_temp, noisefunc, f = IMRPhenomD.populate_noise(detector, int_scheme='quad') noise = noisefunc(frequencies)**2 template_detector_response = detector_response_dCS( frequencies, chirpm, symmratio, spin1, spin2, Luminosity_Distance, theta, phi, iota, alpha_squared, bppe, NSflag, cosmology) int1 = 4 * simps((np.conjugate(template_detector_response) * template_detector_response).real / noise, frequencies) snr_template = np.sqrt(int1) g_tilde = 4 * np.divide( np.multiply(np.conjugate(data), template_detector_response), noise) g = np.fft.ifft(g_tilde) gmag = np.abs(g) deltaf = frequencies[1] - frequencies[0] maxg = np.amax(gmag).real * (len(frequencies)) * (deltaf) return maxg / snr_template
def cost(self, controls, states, system_step): """ Args: controls :: ndarray - the control parameters for all time steps states :: ndarray - an array of the initial states evolved to the current time step system_step :: int - the system time step Returns: cost :: float - the penalty """ cost = 0 # Compute the fidelity for each evolution state and its forbidden states. for i, state_forbidden_states_dagger in enumerate( self.forbidden_states_dagger): state = states[i] state_cost = 0 for forbidden_state_dagger in state_forbidden_states_dagger: inner_product = anp.matmul(forbidden_state_dagger, state)[0, 0] state_cost = state_cost + anp.square(anp.abs(inner_product)) #ENDFOR cost = cost + anp.divide(state_cost, self.state_normalization_constants[i]) #ENDFOR # Normalize the cost for the number of evolving states # and the number of time evolution steps. cost = (cost / self.normalization_constant) return self.cost_multiplier * cost
def adagrad_gd(param_init, cost, n, lr, eps): from copy import deepcopy grad_cost = grad(cost) params = deepcopy(param_init) param_array, grad_array, lr_array, cost_array = [params], [], [[ lr for _ in params ]], [cost(params)] sum_squares_gradients = [np.zeros_like(param) for param in params] for i in range(n): out_params = [] gradients = grad_cost(params) # At each iteration, we add the square of the gradients to sum_squares_gradients sum_squares_gradients = [ eps + sum_prev + np.square(g) for sum_prev, g in zip(sum_squares_gradients, gradients) ] # Adapted learning rate for parameter list lrs = [np.divide(lr, np.sqrt(sg)) for sg in sum_squares_gradients] # Paramter update params = [ param - (adapted_lr * grad_param) for param, adapted_lr, grad_param in zip(params, lrs, gradients) ] param_array.append(params) lr_array.append(lrs) grad_array.append(gradients) cost_array.append(cost(params)) return params, param_array, grad_array, lr_array, cost_array
def log_prior(self): alpha = 1.1 # or 0.02 beta = 1e-3 # or 0.02 dyn_vars = np.exp(self.accum_log_sigmasq) var_prior = np.sum(-(alpha + 1) * np.log(dyn_vars) - np.divide(beta, dyn_vars)) return var_prior
def KM_mixing_multiplepigments( K_vector, S_vector, weights, r=1.0, h=1.0, model='normal'): ### here the weights should be normalized. ###### Normalize weights!!! W_sum = weights.sum(axis=1).reshape((-1, 1)) W_sum = np.maximum(W_sum, 1e-15) #### to fit for autograd. weights_normalized = np.divide(weights, W_sum) # weights_normalized=weights/weights.sum(axis=1).reshape((-1,1)) nominator = np.dot(weights_normalized, K_vector) denominator = np.dot(weights_normalized, S_vector) ### default is on white background,r=1.0 and thickness=0.5 r_array = np.ones(nominator.shape) * r R_vector = equations_in_RealPigments(nominator, denominator, r_array, h, model=model) return R_vector #### shape is N*L
def expectation(self): for i in range(self.m): like = self.dist.like(self.x, self.c, self.n, *self.params[i]) like = np.multiply(self.w[i], like) self.p[i] = like self.p = np.divide(self.p, np.sum(self.p, axis=0)) self.w = np.sum(self.p, axis=1) / self.N
def cost(self, controls, densities, system_step): """ Args: controls :: ndarray - the control parameters for all time steps densities :: ndarray - an array of the initial densities evolved to the current time step system_step :: int - the system time step Returns: cost :: float - the penalty """ cost = 0 # Compute the fidelity for each evolution density and its forbidden densities. for i, density_forbidden_densities_dagger in enumerate( self.forbidden_densities_dagger): density = densities[i] density_cost = 0 for forbidden_density_dagger in density_forbidden_densities_dagger: inner_product = ( anp.trace(anp.matmul(forbidden_density_dagger, density)) / self.hilbert_size) density_cost = density_cost + anp.square( anp.abs(inner_product)) #ENDFOR cost = cost + anp.divide(density_cost, self.density_normalization_constants[i]) #ENDFOR # Normalize the cost for the number of evolving densities # and the number of time evolution steps. cost = (cost / self.normalization_constant) return self.cost_multiplier * cost
def gradient_descent(g, alpha, max_its, w, num_pts, batch_size, **kwargs): # flatten the input function, create gradient based on flat function g_flat, unflatten, w = flatten_func(g, w) grad = value_and_grad(g_flat) # record history w_hist = [] w_hist.append(unflatten(w)) # how many mini-batches equal the entire dataset? num_batches = int(np.ceil(np.divide(num_pts, batch_size))) # over the line for k in range(max_its): # loop over each minibatch for b in range(num_batches): # collect indices of current mini-batch batch_inds = np.arange(b * batch_size, min((b + 1) * batch_size, num_pts)) # plug in value into func and derivative cost_eval, grad_eval = grad(w, batch_inds) grad_eval.shape = np.shape(w) # take descent step with momentum w = w - alpha * grad_eval # record weight update w_hist.append(unflatten(w)) return w_hist
def cf_user(rating_matrix, item_vectors, current_vector, indices, K): # user_vector is 1*K vector np.random.seed(0) user_vector = np.random.random(size=current_vector.shape) index_matrix = rating_matrix[indices] num_iter = 20 eps = 1e-8 lr = 0.1 # set the variable user_vector to be gradient # mg = grad(lossfunction, argnum=2) sum_square_u = eps + np.zeros_like(user_vector) # SGD procedure: for i in range(num_iter): # print(i) delta_u = selfgradu(index_matrix, item_vectors, current_vector, user_vector) # print("self",delta_u) # delta_u = mg(index_matrix, movie_vectors, user_vector) # print("mg",delta_u) sum_square_u += np.square(delta_u) lr_u = np.divide(lr, np.sqrt(sum_square_u)) # print(np.dot(lr_u * delta_u,lr_u * delta_u)) user_vector -= lr_u * delta_u user_vector = user_vector + current_vector return user_vector
def newtons_method(g, max_its, w, num_pts, batch_size, **kwargs): # flatten input funciton, in case it takes in matrices of weights g_flat, unflatten, w = flatten_func(g, w) # compute the gradient / hessian functions of our input function - gradient = value_and_grad(g_flat) hess = hessian(g_flat) # set numericxal stability parameter / regularization parameter epsilon = 10**(-7) if 'epsilon' in kwargs: epsilon = kwargs['epsilon'] # record history w_hist = [] w_hist.append(unflatten(w)) cost_hist = [g_flat(w, np.arange(num_pts))] # how many mini-batches equal the entire dataset? num_batches = int(np.ceil(np.divide(num_pts, batch_size))) # over the line for k in range(max_its): # loop over each minibatch for b in range(num_batches): # collect indices of current mini-batch batch_inds = np.arange(b * batch_size, min((b + 1) * batch_size, num_pts)) # evaluate the gradient, store current weights and cost function value cost_eval, grad_eval = gradient(w, batch_inds) # evaluate the hessian hess_eval = hess(w, batch_inds) # reshape for numpy linalg functionality hess_eval.shape = (int( (np.size(hess_eval))**(0.5)), int((np.size(hess_eval))**(0.5))) ''' # compute minimum eigenvalue of hessian matrix eigs, vecs = np.linalg.eig(hess_eval) smallest_eig = np.min(eigs) adjust = 0 if smallest_eig < 0: adjust = np.abs(smallest_eig) ''' # solve second order system system for weight update A = hess_eval + (epsilon) * np.eye(np.size(w)) b = grad_eval w = np.linalg.lstsq(A, np.dot(A, w) - b)[0] #w = w - np.dot(np.linalg.pinv(hess_eval + epsilon*np.eye(np.size(w))),grad_eval) # record weights after each epoch w_hist.append(unflatten(w)) cost_hist.append(g_flat(w, np.arange(num_pts))) return w_hist, cost_hist
def gradient_descent(g, w, x_train, x_val, alpha, max_its, batch_size, **kwargs): verbose = True if 'verbose' in kwargs: verbose = kwargs['verbose'] # flatten the input function, create gradient based on flat function g_flat, unflatten, w = flatten_func(g, w) grad = value_and_grad(g_flat) # record history num_train = x_train.shape[1] num_val = x_val.shape[1] w_hist = [unflatten(w)] train_hist = [g_flat(w, x_train, np.arange(num_train))] val_hist = [g_flat(w, x_val, np.arange(num_val))] # how many mini-batches equal the entire dataset? num_batches = int(np.ceil(np.divide(num_train, batch_size))) # over the line for k in range(max_its): # loop over each minibatch start = timer() train_cost = 0 for b in range(num_batches): # collect indices of current mini-batch batch_inds = np.arange(b * batch_size, min((b + 1) * batch_size, num_train)) # plug in value into func and derivative cost_eval, grad_eval = grad(w, x_train, batch_inds) grad_eval.shape = np.shape(w) # take descent step with momentum w = w - alpha * grad_eval end = timer() # update training and validation cost train_cost = g_flat(w, x_train, np.arange(num_train)) val_cost = g_flat(w, x_val, np.arange(num_val)) # record weight update, train and val costs w_hist.append(unflatten(w)) train_hist.append(train_cost) val_hist.append(val_cost) if verbose == True: print('step ' + str(k + 1) + ' done in ' + str(np.round(end - start, 1)) + ' secs, train cost = ' + str(np.round(train_hist[-1][0], 4)) + ', val cost = ' + str(np.round(val_hist[-1][0], 4))) if verbose == True: print('finished all ' + str(max_its) + ' steps') #time.sleep(1.5) #clear_output() return w_hist, train_hist, val_hist
def Gamma_trans_img3(RGB_linear_img): # print "#3" eps = 1e-50 RGB_linear_img = RGB_linear_img.clip(eps, 1.0) thres = 0.0031308 a = 0.055 ### what if some value of RGB_lienar_img is equal to thres? then some error will happen, but probability is very small out1 = np.minimum(RGB_linear_img, thres) - thres out2 = np.maximum(RGB_linear_img, thres) - thres temp1 = 12.92 * RGB_linear_img temp2 = (1 + a) * (RGB_linear_img**(1.0 / 2.4)) - a out = np.divide((temp1 * out1), (out1 + eps)) + np.divide((temp2 * out2), (out2 + eps)) return out
def stf_3dim(tensor, r, random_seed=0, num_iter=100, eps=1e-8, lr=1): np.random.seed(random_seed) args_num = [1, 2, 3] def cost(tensor, home, appliance, time): pred = np.einsum('Hr, Ar, Tr ->HAT', home, appliance, time) mask = ~np.isnan(tensor) error = (pred - tensor)[mask].flatten() return np.sqrt((error**2).mean()) mg = grad(cost, argnum=args_num) sizes = [(x, r) for x in tensor.shape] home = np.random.rand(*sizes[0]) appliance = np.random.rand(*sizes[1]) time = np.random.rand(*sizes[2]) sum_home = np.zeros_like(home) sum_appliance = np.zeros_like(appliance) sum_time = np.zeros_like(time) # GD procedure for i in range(num_iter): del_home, del_appliance, del_time = mg(tensor, home, appliance, time) sum_home += eps + np.square(del_home) lr_home = np.divide(lr, np.sqrt(sum_home)) home -= lr_home * del_home sum_appliance += eps + np.square(del_appliance) lr_appliance = np.divide(lr, np.sqrt(sum_appliance)) appliance -= lr_appliance * del_appliance sum_time += eps + np.square(del_time) lr_time = np.divide(lr, np.sqrt(sum_time)) time -= lr_time * del_time # Projection to non-negative space home[home < 0] = 1e-8 appliance[appliance < 0] = 1e-8 time[time < 0] = 1e-8 if i % 50 == 0: print(cost(tensor, home, appliance, time), i) sys.stdout.flush() return home, appliance, time
def initialize_ramp(ys,cohs, bin_size): coh5 = np.where(cohs==4)[0] y_end = np.array([y[-10:] for y in ys]) y_end_5 = y_end[coh5] C = np.mean(y_end_5,axis=(0,1)) / bin_size y0_mean = np.mean([y[0:2] for y in ys],axis=0) / bin_size x0 = np.mean(np.divide(y0_mean, C)) return C, x0
def d_ll(x, T, \ robot_mu_x, robot_mu_y, \ ped_mu_x, ped_mu_y, \ cov_robot_x, cov_robot_y, \ inv_cov_robot_x, inv_cov_robot_y, \ cov_ped_x, cov_ped_y, \ inv_cov_ped_x, inv_cov_ped_y, \ one_over_cov_sum_x, one_over_cov_sum_y, normalize): d_alpha = [0. for _ in range(4 * T)] d_beta = [0. for _ in range(4 * T)] d_llambda = np.asarray([0. for _ in range(4 * T)]) n = 2 vel_x = x[:T] - x[n * T:(n + 1) * T] vel_y = x[T:2 * T] - x[(n + 1) * T:(n + 2) * T] one_over_var_sum_x = np.diag(one_over_cov_sum_x) one_over_var_sum_y = np.diag(one_over_cov_sum_y) # if normalize == True: # normalize_x = np.multiply(np.power(2*np.pi, -0.5), \ # np.diag(one_over_std_sum_x)) # normalize_y = np.multiply(np.power(2*np.pi, -0.5), \ # np.diag(one_over_std_sum_y)) # else: normalize_x = 1. normalize_y = 1. quad_x = np.multiply(one_over_var_sum_x, np.power(vel_x, 2)) quad_y = np.multiply(one_over_var_sum_y, np.power(vel_y, 2)) Z_x = np.multiply(normalize_x, np.exp(-0.5 * quad_x)) Z_y = np.multiply(normalize_y, np.exp(-0.5 * quad_y)) Z = np.multiply(Z_x, Z_y) X = np.divide(Z, 1. - Z) alpha_x = np.multiply(X, np.multiply(vel_x, one_over_var_sum_x)) alpha_y = np.multiply(X, np.multiply(vel_y, one_over_var_sum_y)) # X and Y COMPONENT OF R DERIVATIVE d_alpha[:T] = np.add(d_alpha[:T], alpha_x) d_alpha[T:2 * T] = np.add(d_alpha[T:2 * T], alpha_y) d_alpha[n * T:(n + 1) * T] = -alpha_x d_alpha[(n + 1) * T:(n + 2) * T] = -alpha_y d_beta[n * T:(n + 1) * T] = -np.dot(x[n * T:(n + 1) * T] - ped_mu_x, inv_cov_ped_x) d_beta[(n + 1) * T:(n + 2) * T] = -np.dot(x[(n + 1) * T:(n + 2) * T] - ped_mu_y, inv_cov_ped_y) d_beta[:T] = -np.dot(x[:T] - robot_mu_x, inv_cov_robot_x) d_beta[T:2 * T] = -np.dot(x[T:2 * T] - robot_mu_y, inv_cov_robot_y) d_llambda[0:2 * T] = np.add(d_alpha[0:2 * T], d_beta[0:2 * T]) d_llambda[2 * T:] = np.add(d_alpha[2 * T:], d_beta[2 * T:]) return -1. * d_llambda
def cost(self, controls, states, system_step): """ Args: controls :: ndarray - the control parameters for all time steps states :: ndarray - an array of the states evolved to the current time step system_step :: int - the system time step Returns: cost :: float - the penalty """ fidelity = anp.sum(anp.square( anp.abs(anp.matmul(self.target_states_dagger, states)[:, 0, 0])), axis=0) fidelity_normalized = anp.divide(fidelity, self.state_count) infidelity = 1 - fidelity_normalized infidelity_normalized = anp.divide(infidelity, self.step_count) return self.cost_multiplier * infidelity_normalized
def prediction_accuracy(data, labels, theta): accuracy = 0 for i in range(len(data)): prob_arr = log_bernoulli_prod(data[i], theta) pred = np.argmax(prob_arr) target = np.argmax(labels[i]) if pred == target: accuracy += 1 return np.divide(accuracy, len(data))
def newtons_method(g, max_its, w, num_pts, batch_size, **kwargs): # flatten input funciton, in case it takes in matrices of weights flat_g, unflatten, w = flatten_func(g, w) # compute the gradient / hessian functions of our input function - # note these are themselves functions. In particular the gradient - # - when evaluated - returns both the gradient and function evaluations (remember # as discussed in Chapter 3 we always ge the function evaluation 'for free' when we use # an Automatic Differntiator to evaluate the gradient) gradient = value_and_grad(flat_g) hess = hessian(flat_g) # set numericxal stability parameter / regularization parameter epsilon = 10**(-7) if 'epsilon' in kwargs: epsilon = kwargs['epsilon'] # record history w_hist = [] w_hist.append(unflatten(w)) # how many mini-batches equal the entire dataset? num_batches = int(np.ceil(np.divide(num_pts, batch_size))) # over the line for k in range(max_its): # loop over each minibatch for b in range(num_batches): # collect indices of current mini-batch batch_inds = np.arange(b * batch_size, min((b + 1) * batch_size, num_pts)) # evaluate the gradient, store current weights and cost function value cost_eval, grad_eval = gradient(w, batch_inds) # evaluate the hessian hess_eval = hess(w, batch_inds) # reshape for numpy linalg functionality hess_eval.shape = (int( (np.size(hess_eval))**(0.5)), int((np.size(hess_eval))**(0.5))) # solve second order system system for weight update A = hess_eval + epsilon * np.eye(np.size(w)) b = grad_eval w = np.linalg.lstsq(A, np.dot(A, w) - b)[0] #w = w - np.dot(np.linalg.pinv(hess_eval + epsilon*np.eye(np.size(w))),grad_eval) # record weights after each epoch w_hist.append(unflatten(w)) # collect final weights w_hist.append(unflatten(w)) return w_hist
def probabilities(params, inputs = None, exemplars = None, hps = None): # softmax output_activation = forward(params, inputs = inputs, exemplars = exemplars, hps = hps)[-1] return np.divide( np.exp(output_activation * hps['phi']), #---------# np.sum( np.exp(output_activation * hps['phi']), axis=1, keepdims=True ) )
def recover(image, transmission, airlight): """ Recovers pixels without their airlight component. """ alpha = np.ones(transmission.shape) - transmission for c in range(3): ac = np.multiply(alpha, airlight[c]) iac = image[:, :, c] - ac image[:, :, c] = np.divide(iac, transmission) return image
def gaussian_area(x, mean, sigma): """ :param x: lower/higher bound :param mean: gaussian param mean :param sigma: gaussian param sigma :return: area under curve from x -> inf or x-> -inf """ double_prob = agnp.abs(sp.erf((x - mean) / (sigma * agnp.sqrt(2)))) p_zero_to_bound = agnp.divide(double_prob, 2) return agnp.subtract(0.5, p_zero_to_bound)
def avg_log_likelihood(data, labels, weights): average = [] for i in range(len(data)): denomenator = [] for j in weights: denomenator.append(np.dot(j, data[i])) nonminator = np.exp(np.dot(labels[i], denomenator)) prob_i = np.divide(nonminator, logsumexp(np.array(denomenator))) average.append(prob_i) return -np.log(np.mean(np.array(average)))
def prediction_accuracy(data, labels, theta): accuracy = 0 for i in range(len(data)): denomenator = [] for j in weights: denomenator.append(np.dot(j, data[i])) pred = np.argmax(denomenator) target = np.argmax(labels[i]) if pred == target: accuracy += 1 return np.divide(accuracy, len(data))
def adjust_coef(self, w): if self.prob_func_ == "sigmoid": coef, intercept = baseRegression.adjust_coef(self, w) else: # self.prob_func_ == "softmax" coef = np.divide(w[:-1].T, self.scaler_.scale_) intercept = w[-1] - np.sum(coef * self.scaler_.mean_) if self.penalty_ == "l1": # ===FIXME=== # I don't now the condition to shrink the coef to 0 coef = np.array([0.0 if abs(wi) < 0.1 else wi for wi in coef]) intercept = 0.0 if abs(intercept) < 0.1 else intercept return coef, intercept
def test_divide_arg1(): fun = lambda x, y : np.divide(x, y) d_fun = grad(fun, 1) check_grads(fun, npr.rand(), npr.rand()) check_grads(d_fun, npr.rand(), npr.rand())
def test_divide_arg1(): fun = lambda x, y : np.divide(x, y) check_grads(fun)(npr.rand(), npr.rand())