def powerm(x, power): ndim = x.ndim new_x = to_ndarray(x, to_ndim=3) if _is_symmetric(new_x): eigvals, eigvecs = np.linalg.eigh(new_x) if (eigvals > 0).all(): eigvals = eigvals ** power eigvals = np.vectorize(np.diag, signature='(n)->(n,n)')(eigvals) transp_eigvecs = np.transpose(eigvecs, axes=(0, 2, 1)) result = np.matmul(eigvecs, eigvals) result = np.matmul(result, transp_eigvecs) else: log_x = np.vectorize(scipy.linalg.logm, signature='(n,m)->(n,m)')(new_x) p_log_x = power * log_x result = np.vectorize(scipy.linalg.expm, signature='(n,m)->(n,m)')(p_log_x) else: log_x = np.vectorize(scipy.linalg.logm, signature='(n,m)->(n,m)')(new_x) p_log_x = power * log_x result = np.vectorize(scipy.linalg.expm, signature='(n,m)->(n,m)')(p_log_x) if ndim == 2: return result[0] return result
def predict(self, X, ret='Attributes'): X_arr = np.array(X) smfc = np.vectorize(self.__softmaxFunc, signature='(n)->(m)') result = smfc(X) i2n = np.vectorize(self.index_to_names) if (ret == 'Values'): return result else: return i2n(np.argmax(result, axis=1))
def preprocessData(self, ys, u=None, computeMarginal=True): ys is not None ys = np.array(ys) if (ys.ndim == 2): ys = ys[None] else: assert ys.ndim == 3 assert self.J1Emiss.shape[0] == ys.shape[2] self._T = ys.shape[1] # This is A.T @ sigInv @ y for each y, summed over each measurement self.hy = ys.dot(self._hy).sum(axis=0) # P( y | x ) ~ N( -0.5 * Jy, hy ) self.computeMarginal = computeMarginal if (computeMarginal): print('self.Jy', self.Jy) print('self.hy', self.hy) partition = np.vectorize( lambda J, h: Normal.log_partition(nat_params=(-0.5 * J, h)), signature='(n,n),(n)->()') self.log_Zy = partition(self.Jy, self.hy) else: self.log_Zy = np.zeros(self.hy.shape[0]) if (u is not None): assert u.shape == (self.T, self.D_latent) uMask = np.isnan(u) self.u = (u, uMask, None)
def _expsym(x): eigvals, eigvecs = np.linalg.eigh(x) eigvals = np.exp(eigvals) eigvals = np.vectorize(np.diag, signature='(n)->(n,n)')(eigvals) transp_eigvecs = np.transpose(eigvecs, axes=(0, 2, 1)) result = np.matmul(eigvecs, eigvals) result = np.matmul(result, transp_eigvecs) return result
def k_class_predict(self, X, flag=True): X = np.array(X) soft = np.vectorize(self.softmax, signature='(n)->(m)') r = soft(X) if flag == True: return r else: return np.argmax(r, axis=1) pass
def logm(x): ndim = x.ndim new_x = to_ndarray(x, to_ndim=3) if _is_symmetric(new_x): eigvals, eigvecs = np.linalg.eigh(new_x) if (eigvals > 0).all(): eigvals = np.log(eigvals) eigvals = np.vectorize(np.diag, signature="(n)->(n,n)")(eigvals) transp_eigvecs = np.transpose(eigvecs, axes=(0, 2, 1)) result = np.matmul(eigvecs, eigvals) result = np.matmul(result, transp_eigvecs) else: result = np.vectorize(scipy.linalg.logm, signature="(n,m)->(n,m)")(new_x) else: result = np.vectorize(scipy.linalg.logm, signature="(n,m)->(n,m)")(new_x) if ndim == 2: return result[0] return result
def diag(x): x = to_ndarray(x, to_ndim=2) _, n = shape(x) aux = _np.vectorize(_np.diagflat, signature='(m,n)->(k,k)')(x) k, k = shape(aux) m = int(k / n) result = zeros((m, n, n)) for i in range(m): result[i] = aux[i * n:(i + 1) * n, i * n:(i + 1) * n] return result
def get_loss_function(self): vresid = np.vectorize(self.resid, excluded=[0]) x = np.linspace(0, self.L, self.nx) sum = 0. def loss_function(params): res_arr = vresid(params, x) return np.sum(res_arr) / (res_arr.shape[0]) return loss_function
def is_real_num(X): """return true if x is a real number. Work for a numpy array as well. Return an array of the same dimension.""" def each_elem_true(x): try: float(x) return not (np.isnan(x) or np.isinf(x)) except: return False f = np.vectorize(each_elem_true) return f(X)
def predict(self, X, round=True): X_arr = np.array(X) y_hat = np.matmul(X_arr, self.__coef[1:]) y_hat = y_hat + self.__coef[0] sig = np.vectorize(self.__sigmoid) y_hat = sig(y_hat) if (not round): return y_hat else: y_hat = np.where(y_hat < 0.5, 0, 1) return y_hat
def expm(x): ndim = x.ndim new_x = to_ndarray(x, to_ndim=3) if _is_symmetric(new_x): result = _expsym(new_x) else: result = np.vectorize(scipy.linalg.expm, signature='(n,m)->(n,m)')(new_x) if ndim == 2: return result[0] return result
def get_local_eq_loss_function(self, resid): vresid = np.vectorize(resid, excluded=[0]) def loss_function(params): sum = 0. for (X, Y) in self.domain: res_arr = vresid(params, X, Y) sum = sum + np.sum(res_arr) / (res_arr.shape[0] * res_arr.shape[1]) return sum return loss_function
def plot_error_surface(loss_fun, params, ax=None): if ax is None: fig = plt.figure() ax = fig.add_subplot(111) w0s = np.linspace(-2*params[0], 2*params[0], 10) w1s = np.linspace(-2*params[1], 2*params[1], 10) w0_grid, w1_grid = np.meshgrid(w0s, w1s) lossvec = np.vectorize(loss_fun) z = lossvec(w0_grid, w1_grid) cs = ax.contour(w0s, w1s, z) ax.clabel(cs) ax.plot(params[0], params[1], 'rx', markersize=14) return ax
def solve_sylvester(a, b, q): if a.shape == b.shape: axes = (0, 2, 1) if a.ndim == 3 else (1, 0) if np.all(a == b) and np.all(np.abs(a - np.transpose(a, axes)) < 1e-12): eigvals, eigvecs = eigh(a) if np.all(eigvals >= 1e-12): tilde_q = np.transpose(eigvecs, axes) @ q @ eigvecs tilde_x = tilde_q / (eigvals[..., :, None] + eigvals[..., None, :]) return eigvecs @ tilde_x @ np.transpose(eigvecs, axes) return np.vectorize( scipy.linalg.solve_sylvester, signature="(m,m),(n,n),(m,n)->(m,n)" )(a, b, q)
def plot_error_surface(xtrain, ytrain, model, ax=None): params = model.params if ax is None: fig = plt.figure() ax = fig.add_subplot(111) w0s = np.linspace(-2*params[0], 2*params[0], 10) w1s = np.linspace(-2*params[1], 2*params[1], 10) w0_grid, w1_grid = np.meshgrid(w0s, w1s) def loss(w0, w1): return model.objective([w0, w1], xtrain, ytrain) lossvec = np.vectorize(loss) z = lossvec(w0_grid, w1_grid) cs = ax.contour(w0s, w1s, z) ax.clabel(cs) ax.plot(params[0], params[1], 'rx', markersize=14)
def plot_error_surface(xtrain, ytrain, model, ax=None): params = model.params if ax is None: fig = plt.figure() ax = fig.add_subplot(111) w0s = np.linspace(-2 * params[0], 2 * params[0], 10) w1s = np.linspace(-2 * params[1], 2 * params[1], 10) w0_grid, w1_grid = np.meshgrid(w0s, w1s) def loss(w0, w1): return model.objective([w0, w1], xtrain, ytrain) lossvec = np.vectorize(loss) z = lossvec(w0_grid, w1_grid) cs = ax.contour(w0s, w1s, z) ax.clabel(cs) ax.plot(params[0], params[1], 'rx', markersize=14)
def plot_runtime(ex, fname, func_xvalues, xlabel, func_title=None): results = glo.ex_load_result(ex, fname) value_accessor = lambda job_results: job_results['time_secs'] vf_pval = np.vectorize(value_accessor) # results['job_results'] is a dictionary: # {'test_result': (dict from running perform_test(te) '...':..., } times = vf_pval(results['job_results']) repeats, _, n_methods = results['job_results'].shape time_avg = np.mean(times, axis=0) time_std = np.std(times, axis=0) xvalues = func_xvalues(results) #ns = np.array(results[xkey]) #te_proportion = 1.0 - results['tr_proportion'] #test_sizes = ns*te_proportion line_styles = func_plot_fmt_map() method_labels = get_func2label_map() func_names = [f.__name__ for f in results['method_job_funcs']] for i in range(n_methods): te_proportion = 1.0 - results['tr_proportion'] fmt = line_styles[func_names[i]] #plt.errorbar(ns*te_proportion, mean_rejs[:, i], std_pvals[:, i]) method_label = method_labels[func_names[i]] plt.errorbar(xvalues, time_avg[:, i], yerr=time_std[:, i], fmt=fmt, label=method_label) ylabel = 'Time (s)' plt.ylabel(ylabel) plt.xlabel(xlabel) plt.xlim([np.min(xvalues), np.max(xvalues)]) plt.xticks(xvalues, xvalues) plt.legend(loc='best') plt.gca().set_yscale('log') title = '%s. %d trials. ' % ( results['prob_label'], repeats) if func_title is None else func_title(results) plt.title(title) #plt.grid() return results
def calc_pairwise_term_mat(cont_mat: np.ndarray, grant: int) -> np.ndarray: """ Calculates the matrix of pairwise terms. Args: cont_mat: The matrix of contributions grant: The grant under consideration Returns: An upper-triangular matrix where the (i,j) entry is (i donation to grant) * (j donation to grant) """ num_users = cont_mat.shape[0] calc_pair_term = lambda i, j: (i < j) * pairwise_term( cont_mat, grant, i, j) match_mat = np.fromfunction(np.vectorize(calc_pair_term), shape=(num_users, num_users), dtype=int) return match_mat
def init_parameter_dict(self, n_users, n_items, train_tuple): ''' Initialize parameter dictionary attribute for this instance. Post Condition -------------- Updates the following attributes of this instance: * param_dict : dict Keys are string names of parameters Values are *numpy arrays* of parameter values ''' # TODO fix the lines below to have right dimensionality & val random_state = self.random_state # inherited N = n_items.size avg = ag_np.mean(train_tuple[2]) self.param_dict = dict( mu=ag_np.full((N, ), avg), b_per_user=ag_np.zeros(n_users), c_per_item=ag_np.zeros(n_items), ) self.faster = ag_np.vectorize(self.calcPred)
def calc_pairwise_coord_mat(cont_mat: np.ndarray) -> np.ndarray: """ Returns an upper triangular matrix T where T[i,j] gives the pairwise coordination penalty for users i and users j (assuming i < j). Args: cont_mat: A matrix of user contributions to grants. Returns: coord_penalty_mat: An upper triangular matrix T where T[i,j] is the pairwise coordination coefficient for users i and j """ num_users = cont_mat.shape[0] calc_pair_coord = lambda i, j: (i < j) * pairwise_coord(cont_mat, i, j) coord_penalty_mat = np.fromfunction(np.vectorize(calc_pair_coord), shape=(num_users, num_users), dtype=int) return coord_penalty_mat
def plot_save_results(self, id): Phi = lambda x: (self.U(x) - self.G(x)) / self.D(x) plt.figure() x = np.linspace(0, self.L, self.nx) u = np.zeros(x.shape) g = np.zeros(x.shape) d = np.zeros(x.shape) phi = np.zeros(x.shape) for i, x_i in enumerate(x): u[i] = self.U(x_i) g[i] = self.G(x_i) d[i] = self.D(x_i) phi[i] = Phi(x_i) # PLOT COMPONENTS: G,D, Phi plt.plot(x, g, label="$G(%s)$" % self.var_id, color='g') plt.plot(x, d, label="$D(%s)$" % self.var_id, color='tab:olive') plt.plot(x, phi, label="$\Phi(%s)$" % self.var_id, color='r') plt.xlabel("$%s$" % self.var_id) plt.legend() plt.savefig(self.fig_dir + id + "_ugd.eps") #plt.show(block=True) # PLOT U, the solution plt.figure() plt.plot(x, u, label="$U(%s)$" % self.var_id, color='k') plt.xlabel("$%s$" % self.var_id) #plt.ylabel("$U(%s)$" % self.var_id) plt.legend() plt.savefig(self.fig_dir + id + "_u.eps") #plt.show(block=True) #PLOT RESIDUAL vresid = np.vectorize(self.resid, excluded=[0]) plt.figure() plt.plot(x, vresid(self.p_U, x), label="$R(%s)$" % self.var_id) plt.ylabel("$R(%s)$" % self.var_id) plt.xlabel("$%s$" % self.var_id) plt.legend() plt.savefig(self.fig_dir + id + "_resid.eps") plt.show(block=True) return 0
def plot_runtime(ex, fname, func_xvalues, xlabel, func_title=None): results = glo.ex_load_result(ex, fname) value_accessor = lambda job_results: job_results['time_secs'] vf_pval = np.vectorize(value_accessor) # results['test_results'] is a dictionary: # {'test_result': (dict from running perform_test(te) '...':..., } times = vf_pval(results['test_results']) repeats, _, n_methods = results['test_results'].shape time_avg = np.mean(times, axis=0) time_std = np.std(times, axis=0) xvalues = func_xvalues(results) #ns = np.array(results[xkey]) #te_proportion = 1.0 - results['tr_proportion'] #test_sizes = ns*te_proportion line_styles = exglo.func_plot_fmt_map() method_labels = exglo.get_func2label_map() func_names = [f.__name__ for f in results['method_job_funcs'] ] for i in range(n_methods): te_proportion = 1.0 - results['tr_proportion'] fmt = line_styles[func_names[i]] #plt.errorbar(ns*te_proportion, mean_rejs[:, i], std_pvals[:, i]) method_label = method_labels[func_names[i]] plt.errorbar(xvalues, time_avg[:, i], yerr=time_std[:,i], fmt=fmt, label=method_label) ylabel = 'Time (s)' plt.ylabel(ylabel) plt.xlabel(xlabel) plt.gca().set_yscale('log') plt.xlim([np.min(xvalues), np.max(xvalues)]) plt.xticks( xvalues, xvalues) plt.legend(loc='best') title = '%s. %d trials. '%( results['prob_label'], repeats ) if func_title is None else func_title(results) plt.title(title) #plt.grid() return results
def get_loss_function_wave_eq_first_order(u, nx, nt, L, t_max): """ Generate loss function to be used for optimization Returns ----------- loss_function: loss_function(params) returns sum of residual error (squared) over grid of evenly spaced collocation points Positions and amount of collocation points specified by nx,nt,L, t_max """ #todo: make x, t generation independent of this. Perhaps random generation? t = np.linspace(0, t_max, nt) #The displacement on boundaries prescribed. Leave those alone. x = np.linspace(L / 10, L * 9 / 10, nx) #Get and vectorized resid to evaluate and sum over all the collocation points resid = get_resid_wave_eq_first_order(u) vresid = np.vectorize(resid, excluded=[0]) def loss_function(params): res_arr = vresid(params, x[:, None], t[None, :]) return np.sum(res_arr) / (res_arr.shape[0] * res_arr.shape[1]) return loss_function
return np.random.multivariate_normal(mu_P1.reshape(-1), np.diag(reparam_fwd(rho_P1)**2), mc).T ############ # D and dD # ############ def D(alpha, E_batch, r_batch): """ Estimator of C(alpha), corresponding to E_batch [ r^(r_batch) ], with batch input Z and output Y """ local_theta_samples = theta_sample(alpha, E_batch) #[0] # Get _mc_ new samples of the posterior return np.mean( empirical_risk(r_batch, local_theta_samples) ) # Approximate expectation using MC D_vec = np.vectorize(D, excluded=[1,2]) # Vectorized version of D taking an array of alphas! def dD_MC(alpha, E_batch, r_batch): """ Monte-Carlo estimator of the derivative of D(alpha) with E_batch [ r^(r_batch) ] """ local_theta_samples = theta_sample(alpha, E_batch) # Get _mc_ new samples of the posterior return MC_risk_covariance(E_batch, r_batch, empirical_risk, local_theta_samples) ############## # Strategies # ############## def bayes(alpha, X, X_1, X_2, X_3):
#BFGS_alpha = alpha = np.linspace(0.1, 1.0, max_iter) BFGS_alpha = alpha = np.linspace(1, 1, max_iter) ############################################## # Design variables at mesh points i1 = np.arange(xmin, xmax, step) i2 = np.arange(ymin, ymax, step) x1_mesh, x2_mesh = np.meshgrid(i1, i2) # Create a contour plot fig, ax = plt.subplots() plt.ylim(ymin, ymax) plt.xlim(xmin, xmax) v_func = np.vectorize(f_sep) ax.contour(x1_mesh, x2_mesh, v_func(x1_mesh, x2_mesh)) # Add some text to the plot ax.set_title(title) ax.set_xlabel('x1') ax.set_ylabel('x2') ################################################## # Newton's method ################################################## xn = NewtonMethod(max_iter, f, dfdx, hessian, x_start) ax.plot(xn[:, 0], xn[:, 1], 'k-o', label="Newton") print("Newton's method returns")
print('Test AUC: {:.2f}'.format(auc_test)) avg_precision = average_precision_score(y_test.T, y_hat.T) print('Test Average Precision: {:.2f}'.format(avg_precision)) fpr, tpr, thresholds = roc_curve(y_test.T, y_hat.T) # Find optimal probability threshold threshold = Find_Optimal_Cutoff(y_test.T, y_hat.T) print('Test threshold: {:.2f}'.format(threshold[0])) # data['pred'] = data['pred_proba'].map(lambda x: 1 if x > threshold else 0) import numpy as np squarer = lambda t: 1 if t > threshold[0] else 0 vfunc = np.vectorize(squarer) y_pred = vfunc(y_hat) test = f1_score(y_test.T, y_pred.T) print('Test f1-score: {:.2f}'.format(test)) print('\n') print('\n') y_tree = tree.predict(X_test.T) apl = average_path_length(tree, X_test.T) print('APL for tree: {:.2f}'.format(apl)) #auc_tree = roc_auc_score(y_test.T, y_tree.T) #print('Test AUC for tree: {:.2f}'.format(auc_tree)) y_pred_tree = vfunc(y_tree) test_tree = f1_score(y_hat_int.T, y_pred_tree.T)
def main(): train_data = load_train_csv("../data") # You may optionally use the sparse matrix. sparse_matrix = load_train_sparse("../data") val_data = load_valid_csv("../data") test_data = load_public_test_csv("../data") ##################################################################### # TODO: # # Tune learning rate and number of iterations. With the implemented # # code, report the validation and test accuracy. # ##################################################################### lr = 0.01 n_iter = 15 theta_train, beta_train, val_acc_lst, train_nlld_lst = irt(train_data, val_data, lr, n_iter) ##################################################################### # END OF YOUR CODE # ##################################################################### ##################################################################### # TODO: # # Implement part (c) # ##################################################################### test_acc = evaluate(test_data, theta_train, beta_train) print("The validation set accuracy is %f" % (val_acc_lst[-1])) print("The test accuracy of the trained model is %f" % (test_acc)) # Plot and report _, ax = plt.subplots(1, 2) ax[0].plot(np.arange(0, n_iter), train_nlld_lst) ax[0].set_xlabel('# iterations') ax[0].set_ylabel('training negative log likelihood') ax[1].plot(np.arange(0, n_iter), val_acc_lst) # ax[1].set_xticks(np.arange(0, n_iter+1)) ax[1].set_xlabel('# iterations') ax[1].set_ylabel('validation accuracy') plt.show() ##################################################################### # Part d ##################################################################### # Randomly select 5 questions np.random.seed(0) questions = np.random.randint(1774, size=5) prob_lst = np.zeros((5, 542)) # output probabilities # Vectorize sigmoid function for one question over all students vec_sig = np.vectorize(sigmoid) _, ax = plt.subplots() # plot for j in range(5): beta_j = beta_train[questions[j]] prob_lst[j] = vec_sig(theta_train - beta_j) ax.plot(theta_train, prob_lst[j], 'x', label='question %d' % (questions[j])) ax.legend() ax.set_xlabel('theta') ax.set_ylabel('probability of correct response') plt.show()
# Create a contour plot fig, ax = plt.subplots() plt.ylim(-4, 4) plt.xlim(-4, 4) # Specify contour lines #lines = range(2, 52, 2) # Plot contours #CS = ax.contour(x1_mesh, x2_mesh, f_mesh, lines) # Label contours #ax.clabel(CS, inline=1, fontsize=10) v_func = np.vectorize(f_sep) # major key! ax.contour(x1_mesh, x2_mesh, v_func(x1_mesh, x2_mesh)) # Add some text to the plot ax.set_title('f(x) = x1^2 - 2*x1*x2 + 4*x2^2') ax.set_xlabel('x1') ax.set_ylabel('x2') # Show the plot # plt.show() ################################################## # Newton's method ################################################## xn = np.zeros((2, 2)) xn[0] = x_start # Get gradient at start location (df/dx or grad(f))
f = 30 def l(th): f1 = 1 / (np.sqrt(h**2 + b**2 + c**2 + 2 * c * h * np.sin(th))) f2 = 1 / (np.sqrt(h**2 + b**2 + c**2 - 2 * c * h * np.sin(th))) f3 = 1 / (h**2 + c**2 + 2 * c * h * np.sin(th)) f4 = 1 / (h**2 + c**2 - 2 * c * h * np.sin(th)) f5 = (2 * b * np.cos(th) / (b**2 + h**2 * (np.cos(th))**2)) return (m0 * a**2 / 4) * (f5 * ((h * np.sin(th) + c) * f1 - (h * np.sin(th) - c) * f2) + (2 * b * c * np.cos(th) * f1 * f3) + (2 * b * c * np.cos(th) * f2 * f4)) L = np.vectorize(l) DerL = np.vectorize(grad(l)) def th(t): return 2 * np.pi * f * t def aux(t): return N * I1 * l(th(t)) e = grad(aux) E = np.vectorize(e) theta = np.linspace(0, 4 * np.pi, 1000)
def cast_to_complex(x): return _np.vectorize(complex)(x)
def plot_prob_reject(ex, fname, func_xvalues, xlabel, func_title=None, return_plot_values=False): """ plot the empirical probability that the statistic is above the threshold. This can be interpreted as type-1 error (when H0 is true) or test power (when H1 is true). The plot is against the specified x-axis. - ex: experiment number - fname: file name of the aggregated result - func_xvalues: function taking aggregated results dictionary and return the values to be used for the x-axis values. - xlabel: label of the x-axis. - func_title: a function: results dictionary -> title of the plot - return_plot_values: if true, also return a PlotValues as the second output value. Return loaded results """ # from IPython.core.debugger import Tracer # Tracer()() results = glo.ex_load_result(ex, fname) def rej_accessor(jr): rej = jr["test_result"]["h0_rejected"] # When used with vectorize(), making the value float will make the resulting # numpy array to be of float. nan values can be stored. return float(rej) # value_accessor = lambda job_results: job_results['test_result']['h0_rejected'] vf_pval = np.vectorize(rej_accessor) # results['job_results'] is a dictionary: # {'test_result': (dict from running perform_test(te) '...':..., } rejs = vf_pval(results["job_results"]) repeats, _, n_methods = results["job_results"].shape # yvalues (corresponding to xvalues) x #methods mean_rejs = np.mean(rejs, axis=0) # print mean_rejs # std_pvals = np.std(rejs, axis=0) # std_pvals = np.sqrt(mean_rejs*(1.0-mean_rejs)) xvalues = func_xvalues(results) # ns = np.array(results[xkey]) # te_proportion = 1.0 - results['tr_proportion'] # test_sizes = ns*te_proportion line_styles = func_plot_fmt_map() method_labels = get_func2label_map() func_names = [f.__name__ for f in results["method_job_funcs"]] plotted_methods = [] for i in range(n_methods): te_proportion = 1.0 - results["tr_proportion"] fmt = line_styles[func_names[i]] # plt.errorbar(ns*te_proportion, mean_rejs[:, i], std_pvals[:, i]) method_label = method_labels[func_names[i]] plotted_methods.append(method_label) plt.plot(xvalues, mean_rejs[:, i], fmt, label=method_label) """ else: # h0 is true z = stats.norm.isf( (1-confidence)/2.0) for i in range(n_methods): phat = mean_rejs[:, i] conf_iv = z*(phat*(1-phat)/repeats)**0.5 #plt.errorbar(test_sizes, phat, conf_iv, fmt=line_styles[i], label=method_labels[i]) plt.plot(test_sizes, mean_rejs[:, i], line_styles[i], label=method_labels[i]) """ ylabel = "Rejection rate" plt.ylabel(ylabel) plt.xlabel(xlabel) plt.xticks(np.hstack((xvalues))) alpha = results["alpha"] plt.legend(loc="best") title = ("%s. %d trials. $\\alpha$ = %.2g." % (results["prob_label"], repeats, alpha) if func_title is None else func_title(results)) plt.title(title) plt.grid() if return_plot_values: return results, PlotValues(xvalues=xvalues, methods=plotted_methods, plot_matrix=mean_rejs.T) else: return results
def vectorize(x, pyfunc, multiple_args=False, signature=None, **kwargs): if multiple_args: return np.vectorize(pyfunc, signature=signature)(*x) return np.vectorize(pyfunc, signature=signature)(x)
def plot_prob_stat_above_thresh(ex, fname, h1_true, func_xvalues, xlabel, func_title=None): """ plot the empirical probability that the statistic is above the theshold. This can be interpreted as type-1 error (when H0 is true) or test power (when H1 is true). The plot is against the specified x-axis. - ex: experiment number - fname: file name of the aggregated result - h1_true: True if H1 is true - func_xvalues: function taking results dictionary and return the values to be used for the x-axis values. - xlabel: label of the x-axis. - func_title: a function: results dictionary -> title of the plot Return loaded results """ results = glo.ex_load_result(ex, fname) f_pval = lambda job_result: job_result['test_result']['h0_rejected'] #f_pval = lambda job_result: job_result['h0_rejected'] vf_pval = np.vectorize(f_pval) pvals = vf_pval(results['test_results']) repeats, _, n_methods = results['test_results'].shape mean_rejs = np.mean(pvals, axis=0) #std_pvals = np.std(pvals, axis=0) #std_pvals = np.sqrt(mean_rejs*(1.0-mean_rejs)) xvalues = func_xvalues(results) #ns = np.array(results[xkey]) #te_proportion = 1.0 - results['tr_proportion'] #test_sizes = ns*te_proportion line_styles = exglo.func_plot_fmt_map() method_labels = exglo.get_func2label_map() func_names = [f.__name__ for f in results['method_job_funcs'] ] for i in range(n_methods): te_proportion = 1.0 - results['tr_proportion'] fmt = line_styles[func_names[i]] #plt.errorbar(ns*te_proportion, mean_rejs[:, i], std_pvals[:, i]) method_label = method_labels[func_names[i]] plt.plot(xvalues, mean_rejs[:, i], fmt, label=method_label) ''' else: # h0 is true z = stats.norm.isf( (1-confidence)/2.0) for i in range(n_methods): phat = mean_rejs[:, i] conf_iv = z*(phat*(1-phat)/repeats)**0.5 #plt.errorbar(test_sizes, phat, conf_iv, fmt=line_styles[i], label=method_labels[i]) plt.plot(test_sizes, mean_rejs[:, i], line_styles[i], label=method_labels[i]) ''' ylabel = 'Test power' if h1_true else 'Type-I error' plt.ylabel(ylabel) plt.xlabel(xlabel) plt.xticks( np.hstack((xvalues) )) alpha = results['alpha'] """ if not h1_true: # plot Wald interval if H0 is true # https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval z = stats.norm.isf( (1-confidence)/2.0) gap = z*(alpha*(1-alpha)/repeats)**0.5 lb = alpha-gap ub = alpha+gap plt.plot(test_sizes, np.repeat(lb, len(test_sizes)), '--', linewidth=2, label='99%-Conf', color='k') plt.plot(test_sizes, np.repeat(ub, len(test_sizes)), '--', linewidth=2, color='k') plt.ylim([lb-0.005, ub+0.005]) """ plt.legend(loc='best') title = '%s. %d trials. $\\alpha$ = %.2g.'%( results['prob_label'], repeats, alpha) if func_title is None else func_title(results) plt.title(title) #plt.grid() return results