def contour_plot(ax, g, pts, wmax, num_contours, my_colors, pts_levels): #### define input space for function and evaluate #### w1 = np.linspace(-wmax, wmax, 100) w2 = np.linspace(-wmax, wmax, 100) w1_vals, w2_vals = np.meshgrid(w1, w2) w1_vals.shape = (len(w1)**2, 1) w2_vals.shape = (len(w2)**2, 1) h = np.concatenate((w1_vals, w2_vals), axis=1) func_vals = np.asarray([g(s) for s in h]) w1_vals.shape = (len(w1), len(w1)) w2_vals.shape = (len(w2), len(w2)) func_vals.shape = (len(w1), len(w2)) ### make contour right plot - as well as horizontal and vertical axes ### # set level ridges levelmin = min(func_vals.flatten()) levelmax = max(func_vals.flatten()) cutoff = 0.3 cutoff = (levelmax - levelmin) * cutoff numper = 3 levels1 = np.linspace(cutoff, levelmax, numper) num_contours -= numper ##### plot filled contours with generic contour lines ##### # produce generic contours levels2 = np.linspace(levelmin, cutoff, min(num_contours, numper)) levels = np.unique(np.append(levels1, levels2)) num_contours -= numper while num_contours > 0: cutoff = levels[1] levels2 = np.linspace(levelmin, cutoff, min(num_contours, numper)) levels = np.unique(np.append(levels2, levels)) num_contours -= numper # plot the contours ax.contour(w1_vals, w2_vals, func_vals, levels=levels[1:], colors='k') ax.contourf(w1_vals, w2_vals, func_vals, levels=levels, cmap='Blues') ###### add contour curves based on input points ##### # add to this list the contours passing through input points ax.contour(w1_vals, w2_vals, func_vals, levels=pts_levels, colors='k', linewidths=3) ax.contour(w1_vals, w2_vals, func_vals, levels=pts_levels, colors=my_colors, linewidths=2.5) ###### clean up plot ###### ax.set_xlabel('$w_0$', fontsize=12) ax.set_ylabel('$w_1$', fontsize=12, rotation=0) ax.axhline(y=0, color='k', zorder=0, linewidth=0.5) ax.axvline(x=0, color='k', zorder=0, linewidth=0.5)
def reduce_grid(x): """ Undoes expand_grid to take (nx, 2) array to two vectors containing unique values of each col. :param x: (nx, 2) points :return: x1, x2 each vectors """ x1 = np.sort(np.unique(x[:, 0])) x2 = np.sort(np.unique(x[:, 1])) return x1, x2
def surface_plot(self,g,ax,wmax,view): ##### Produce cost function surface ##### r = np.linspace(-wmax,wmax,300) # create grid from plotting range w1_vals,w2_vals = np.meshgrid(r,r) w1_vals.shape = (len(r)**2,1) w2_vals.shape = (len(r)**2,1) w_ = np.concatenate((w1_vals,w2_vals),axis = 1) g_vals = [] for i in range(len(r)**2): g_vals.append(g(w_[i,:])) g_vals = np.asarray(g_vals) w1_vals.shape = (np.size(r),np.size(r)) w2_vals.shape = (np.size(r),np.size(r)) ### is this a counting cost? if so re-calculate ### levels = np.unique(g_vals) if np.size(levels) < 30: # plot each level of the counting cost levels = np.unique(g_vals) for u in levels: # make copy of cost and nan out all non level entries z = g_vals.copy() ind = np.argwhere(z != u) ind = [v[0] for v in ind] z[ind] = np.nan # plot the current level z.shape = (len(r),len(r)) ax.plot_surface(w1_vals,w2_vals,z,alpha = 1,color = '#696969',zorder = 0,shade = True,linewidth=0) else: # smooth cost function, plot usual # reshape and plot the surface, as well as where the zero-plane is g_vals.shape = (np.size(r),np.size(r)) # plot cost surface ax.plot_surface(w1_vals,w2_vals,g_vals,alpha = 1,color = 'w',rstride=25, cstride=25,linewidth=1,edgecolor = 'k',zorder = 2) ### clean up panel ### ax.xaxis.pane.fill = False ax.yaxis.pane.fill = False ax.zaxis.pane.fill = False ax.xaxis.pane.set_edgecolor('white') ax.yaxis.pane.set_edgecolor('white') ax.zaxis.pane.set_edgecolor('white') ax.xaxis._axinfo["grid"]['color'] = (1,1,1,0) ax.yaxis._axinfo["grid"]['color'] = (1,1,1,0) ax.zaxis._axinfo["grid"]['color'] = (1,1,1,0) ax.view_init(view[0],view[1])
def predict(self, X, y=None): ''' Function to predict ''' AL = self.forward_prop(X, self.final_params) # print(AL[:,2],AL[:,4]) if self.activations[-1] == 'softmax': # print(AL.shape-) y_hat = AL.argmax(axis=0) y = np.argmax(y, axis=0) acc = (y_hat == y).mean() # print(y_hat.shape, y.shape) print(y_hat) # y_hat = y_hat.reshape((1, len(y_hat))) # # print(y) else: # Classification Problem # print(np.unique(y)) if len(np.unique(y)) == self.layer_info[-1] or len( np.unique(y)) == 2: y_hat = np.zeros((1, AL.shape[1])) for i in range(AL.shape[1]): if AL[0, i] > 0.5: y_hat[0, i] = 1 else: y_hat[0, i] = 0 # print(AL.shape) acc = (abs(y_hat - y)).mean() print("Accuracy:", acc) return y_hat, acc # Regression Problem else: y_hat = AL sq_sum = 0 for i in range(y_hat.shape[1]): sq_sum += (y[0, i] - y_hat[0, i])**2 # print(y[0,i], y_hat[0,i]) rmse = sq_sum / y_hat.shape[1] print("RMSE: ", rmse) return y_hat, rmse if y is None: return y_hat print("Test Accuracy:", acc) return y_hat, acc
def rank_data(x): """ Ranks a set of observations, assigning the average of ranks to ties. Arguments: x: `ndarray(nsamples)`. Vector of data to be compared Returns: ranks: `ndarray(nsamples)`. Ranks for each observation """ x = x.flatten() nsamples = x.size # Sort in ascenting order idx = np.argsort(x) ranks = np.empty(idx.size) ranks[idx] = np.arange(idx.size) + 1 # Now average the ranks for ties unique_x = np.unique(x) if unique_x.size < nsamples: for i, xi in enumerate(unique_x): if x[x == xi].size > 1: ranks[x == xi] = np.mean(ranks[x == xi]) return ranks
def m_step(self, expectations, datas, inputs, masks, tags, optimizer="adam", num_iters=10, **kwargs): """ Fit a logistic regression for the transitions. Technically, this is a stochastic M-step since the states are sampled from their posterior marginals. """ K, M, D = self.K, self.M, self.D zps, zns = [], [] for Ez, _, _ in expectations: z = np.array([np.random.choice(K, p=p) for p in Ez]) zps.append(z[:-1]) zns.append(z[1:]) X = np.vstack([ np.hstack((input[1:], data[:-1])) for input, data in zip(inputs, datas) ]) y = np.concatenate(zns) # Identify used states used = np.unique(y) K_used = len(used) unused = np.setdiff1d(np.arange(K), used) # Reset parameters before filling in self.Ws = np.zeros((K, M)) self.Rs = np.zeros((K, D)) self.r = np.zeros((K, )) if K_used == 1: warn( "RecurrentOnlyTransitions: Only using 1 state in expectation. " "M-step cannot proceed. Resetting transition parameters.") return # Fit the logistic regression self._lr.fit(X, y) # Extract the coefficients assert self._lr.coef_.shape[0] == (K_used if K_used > 2 else 1) if K_used == 2: # lr thought there were only two classes self.Ws[used[1]] = self._lr.coef_[0, :M] self.Rs[used[1]] = self._lr.coef_[0, M:] else: self.Ws[used] = self._lr.coef_[:, :M] self.Rs[used] = self._lr.coef_[:, M:] # Set the intercept self.r[used] = self._lr.intercept_
def preprocessing(problem): path = os.path.join('datasets', '{}.csv'.format(problem)) data = np.genfromtxt(path, delimiter=',') inputs = data[:, :-1] labels = data[:, -1] n_classes = len(np.unique(labels)) n_dims = inputs.shape[1] # one-hot code targets if np.min(labels) != 0: labels -= 1 # need dummy code to start at zero for this to work labels = labels.astype(int) labels = np.eye(n_classes)[np.array(labels)] # norm data to be between -1 and 1 if problem[:-1] != 'shj': inputs -= np.min(inputs, axis=0) inputs /= np.ptp(inputs, axis=0) inputs *= 2 inputs -= 1 full_set = np.append(inputs, labels, 1) else: full_set = np.append(inputs, labels, 1) full_set = np.concatenate((full_set, full_set), axis=0) # to match Nosofsky+ '94 return [full_set, n_classes, n_dims]
def _prox(self, beta, thresh): """Proximal operator.""" #print('beginprox', beta[0:2],thresh) group_ids = np.unique(self.group) result = np.zeros(beta.shape) result = np.asarray(result, dtype=float) #print('gids',group_ids) for i in range(len(group_ids)): gid = i #print(self.group) idxs_to_update = np.where(self.group == gid)[0] #print('idx',idxs_to_update) #print('norm', np.linalg.norm(beta[idxs_to_update])) if np.linalg.norm(beta[idxs_to_update]) > 0.: #print('in here') potentialoutput = beta[idxs_to_update] - ( thresh / np.linalg.norm( beta[idxs_to_update])) * beta[idxs_to_update] posind = np.where(beta[idxs_to_update] > 0.)[0] negind = np.where(beta[idxs_to_update] < 0.)[0] po = beta[idxs_to_update].copy() #print('potention', potentialoutput[0:2]) po[posind] = np.asarray(np.clip(potentialoutput[posind], a_min=0., a_max=1e15), dtype=float) po[negind] = np.asarray(np.clip(potentialoutput[negind], a_min=-1e15, a_max=0.), dtype=float) result[idxs_to_update] = po #print('end', result[0:2]) return result
def predict_cumulative_hazard(self, X, times=None, ancillary_X=None): """ Return the cumulative hazard rate of subjects in X at time points. Parameters ---------- X: numpy array or DataFrame a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns can be in any order. If a numpy array, columns must be in the same order as the training data. times: iterable, optional an iterable of increasing times to predict the cumulative hazard at. Default is the set of all durations (observed and unobserved). Uses a linear interpolation if points in time are not in the index. ancillary_X: numpy array or DataFrame, optional a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns can be in any order. If a numpy array, columns must be in the same order as the training data. Returns ------- cumulative_hazard_ : DataFrame the cumulative hazard of individuals over the timeline """ import numpy as np times = coalesce(times, self.timeline, np.unique(self.durations)) exp_mu_, sigma_ = self._prep_inputs_for_prediction_and_return_scores(X, ancillary_X) mu_ = np.log(exp_mu_) Z = np.subtract.outer(np.log(times), mu_) / sigma_ return pd.DataFrame(-logsf(Z), columns=_get_index(X), index=times)
def getLevelClusters(rowNum): clusterLabels = np.unique(self.clusterAssignments[rowNum]) clusters = { c: np.where(self.clusterAssignments[rowNum] == c)[0] for c in clusterLabels } return clusters
def handle_time_inds(times, h=None): """ Takes a list of time vectors and returns the unique, potentially augmented, vector """ # get size of each vector t_sizes = [len(t) for t in times] # concatenate to single time vector tt = np.concatenate(times) # get the distinct times, and the indices tt_uni, inv_ind = np.unique(tt, return_inverse=True) # split inv_ind up ind_ti = util._unpack_vector(inv_ind, t_sizes) if h is None: return tt_uni, ind_ti elif isinstance(h, float) and h > 0: # augment the time vector so that diff is at most h ttc, inds_c = augment_times(tt_uni, h) data_inds = [inds_c[ind_i] for ind_i in ind_ti] return ttc, data_inds else: raise ValueError("h should be a float > 0")
def fit_k_class_regularised(self, X, y, batch_size=None, n_iter=200, lr=0.01, lr_type='constant'): if batch_size == None: batch_size = len(X) self.batch_size = batch_size self.n_iter = n_iter k = len(np.unique(y)) n = len(X) batch_size = len(X) temp = lr column_length = len(X.columns) theta = np.zeros((k, column_length)) self.coef_ = theta soft = self.k_class_predict(X) for i in range(1, n_iter): current = theta.copy() for j in range(k): theta[j] = current[j] + lr * (np.sum(X * (np.tile( np.where(y == j, 1, 0) - soft[:, j], (len(current[0]), 1)).T), axis=0)) self.coef_ = theta pass
def plot_data(self,ax,special_class,special_size): # scatter points in both panels class_nums = np.unique(self.y) C = len(class_nums) for c in range(C): ind = np.argwhere(self.y == class_nums[c]) ind = [v[1] for v in ind] s = 80 if class_nums[c] == special_class: s = special_size ax.scatter(self.x[0,ind],self.x[1,ind],s = s,color = self.color_opts[c],edgecolor = 'k',linewidth = 1.5) # control viewing limits minx = min(self.x[0,:]) maxx = max(self.x[0,:]) gapx = (maxx - minx)*0.1 minx -= gapx maxx += gapx miny = min(self.x[1,:]) maxy = max(self.x[1,:]) gapy = (maxy - miny)*0.1 miny -= gapy maxy += gapy ax.set_xlim([minx,maxx]) ax.set_ylim([miny,maxy]) #ax.axis('equal') ax.axis('off')
def fit_multiclass(self, X, y): """ multiclass learning X: samples y: labels """ self.labels = np.unique(y) self.X = X.copy() bias = np.ones((self.X.shape[0], 1)) self.X = np.append(bias, self.X, axis=1) self.X = np.array(self.X) self.y = y self.nofFeatures = self.X.shape[1] self.samples = len(self.X) self.coef_ = np.ones((self.nofFeatures, y.shape[1])) for i in range(self.epoch): err = 1 / (1 + np.exp(-(self.X.dot(self.coef_)))) - y x = sum([ np.exp(self.X.dot(self.coef_[:, j])) for j in range(self.y.shape[1]) ]) for j in range(self.y.shape[1]): err = np.exp(self.X.dot(self.coef_[:, j])) / x - self.y[:, j] self.coef_[:, j] = self.coef_[:, j] - self.lr * err.dot( self.X) / self.samples return self.coef_
def fit_k_class_regularised_autograd(self, X, y, batch_size=None, n_iter=200, lr=0.01, lr_type='constant'): if batch_size == None: batch_size = len(X) self.batch_size = batch_size self.n_iter = n_iter k = len(np.unique(y)) n = len(X) batch_size = len(X) temp = lr column_length = len(X.columns) theta = np.zeros((k, column_length)) self.coef_ = theta soft = self.k_class_predict(X) kclass = grad(self.kclass) for i in range(1, n_iter): current = theta.copy() for j in range(k): theta[j] = current[j] + lr * kclass(X, y, theta, soft) self.coef_ = theta pass
def organize_data_from_txt(data_filepath, delimiter=','): data = np.genfromtxt(data_filepath, delimiter=delimiter) data = { 'inputs': data[:, :-1], 'labels': data[:, -1], 'categories': np.unique(data[:, -1]), } # map categories to label indices data['idx_map'] = { category: idx for category, idx in zip(data['categories'], range(len(data['categories']))) } # map original labels to label indices data['labels_indexed'] = [ data['idx_map'][label] for label in data['labels'] ] # generate one hot targets data['one_hot_targets'] = np.eye(len( data['categories']))[data['labels_indexed']] return data
def fit(self, X, y): if y.ndim == 1: y = y.reshape(-1, 1) n_classes = y.shape[ 1] if self.loss == CategoricalCrossEntropy else np.unique(y).size if self.loss in (SparseCategoricalCrossEntropy, CategoricalCrossEntropy): if self.layers[-1].activation != softmax: raise ValueError( f'NeuralNetworkClassifier with {type(self.loss).__name__} loss ' 'function only works with softmax output layer') if self.layers[-1].fan_out != n_classes: raise ValueError( 'the number of neurons in the output layer must ' f'be equal to the number of classes, i.e., {n_classes}') elif self.loss in (MeanSquaredError, BinaryCrossEntropy): if n_classes > 2: raise ValueError( f'NeuralNetworkClassifier with {type(self.loss).__name__} ' 'loss function only works for binary classification') if self.layers[-1].activation != sigmoid: raise ValueError( f'NeuralNetworkClassifier with {type(self.loss).__name__} ' 'loss function only works with sigmoid output layer') if self.layers[-1].fan_out != 1: raise ValueError( f'NeuralNetworkClassifier with {type(self.loss).__name__} loss ' 'function only works with one neuron in the output layer') return super(NeuralNetworkClassifier, self).fit(X, y)
def plot_2d(zl, classes): ''' Plot a representation of 2D latent variables zl (numobs, M^{(l)} x r_l ndarray): The latent variable of layer l classes (numobs x n_clusters ndarray): The predicted or ground truth labels --------------------------------------------------------------------------- returns (None): The plot of the latent variables colorized by class ''' n_clusters = len(np.unique(classes)) colors = ['red', 'green', 'blue', 'silver', 'purple', 'black',\ 'gold', 'orange'] # For a 2 classes classification if n_clusters >= len(colors): raise ValueError('Too many classes for plotting,\ please add some colors names above this line') fig = plt.figure(figsize=(16, 9)) ax = plt.axes() ax.scatter(zl[:, 0], zl[:, 1] , c = classes,\ cmap=matplotlib.colors.ListedColormap(colors[:n_clusters])) plt.title("2D Latent space representation of the data") ax.set_xlabel('Latent dimension 1', fontweight='bold') ax.set_ylabel('Latent dimension 2', fontweight='bold') plt.show()
def random(self, size, X, *params): dist_params = np.array(params[0:self.k_dist]) phi_params = np.array(params[self.k_dist:]) x = [] X_out = [] if type(X) == tuple: X = np.random.uniform(*X, size) for stress in np.unique(X, axis=0): life_param_mask = np.array(range( 0, len(dist_params))) == self.param_map[self.life_parameter] dist_params = np.where( life_param_mask, self.param_transform(self.phi(stress, *phi_params)), dist_params) U = np.random.uniform(0, 1, size) x.append(self.dist.qf(U, *dist_params)) if np.isscalar(stress): cols = 1 else: cols = len(stress) X_out.append((np.ones((size, cols)) * stress)) return np.array(x).flatten(), np.concatenate(X_out)
def rank_grouped_data(x, g): """ Ranks observations taken across several groups Arguments: x: `ndarray(nsamples)`. Vector of data to be compared g: `ndarray(nsamples)`. Group ID's Returns: ranks: `ndarray(nsamples)`. Ranks for each observation G: `ndarray(nsamples, ngroups)`. Matrix indicating whether sample i is in group j R: `ndarray((nsamples, ngroups))`. Matrix indicating the rank for sample i in group j lab: `ndarray(ngroups)`. Group labels """ nsamples = x.size ngroups = np.unique(g).size # Sort in ascending order idx = np.argsort(x) G, lab = make_onehot(g[idx]) ranks = rank_data() R = np.tile(ranks.reshape(-1, 1), [1, ngroups]) * G return ranks, G, R, lab
def predict_cumulative_hazard(self, df, times=None): """ Return the cumulative hazard rate of subjects in X at time points. Parameters ---------- X: numpy array or DataFrame a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns can be in any order. If a numpy array, columns must be in the same order as the training data. times: iterable, optional an iterable of increasing times to predict the cumulative hazard at. Default is the set of all durations (observed and unobserved). Uses a linear interpolation if points in time are not in the index. Returns ------- cumulative_hazard_ : DataFrame the cumulative hazard of individuals over the timeline """ times = np.asarray( coalesce(times, self.timeline, np.unique(self.durations))) n = times.shape[0] times = times.reshape((n, 1)) lambdas_ = self._prep_inputs_for_prediction_and_return_parameters(df) bp = self.breakpoints M = np.minimum(np.tile(bp, (n, 1)), times) M = np.hstack([M[:, tuple([0])], np.diff(M, axis=1)]) return pd.DataFrame(np.dot(M, (1 / lambdas_)), columns=_get_index(df), index=times[:, 0])
def predict_cumulative_hazard(self, X, times=None, ancillary_X=None): """ Return the cumulative hazard rate of subjects in X at time points. Parameters ---------- X: numpy array or DataFrame a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns can be in any order. If a numpy array, columns must be in the same order as the training data. times: iterable, optional an iterable of increasing times to predict the cumulative hazard at. Default is the set of all durations (observed and unobserved). Uses a linear interpolation if points in time are not in the index. ancillary_X: numpy array or DataFrame, optional a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns can be in any order. If a numpy array, columns must be in the same order as the training data. Returns ------- cumulative_hazard_ : DataFrame the cumulative hazard of individuals over the timeline """ times = coalesce(times, self.timeline, np.unique(self.durations)) alpha_, beta_ = self._prep_inputs_for_prediction_and_return_scores(X, ancillary_X) return pd.DataFrame(np.log1p(np.outer(times, 1 / alpha_) ** beta_), columns=_get_index(X), index=times)
def __init__(self, X, y, kernels, likelihood=None, mu=None, obs_idx=None, max_grad=10., noise=1e-6): """ Args: X (): data (full grid) y (): response kernels (): list of kernel objects likelihood (): likelihood object mu (): prior mean obs_idx (): indices of observed points on grid max_grad (): for gradient clipping noise (): observation noise jitter """ self.X = X self.y = y self.m = self.X.shape[0] self.d = self.X.shape[1] self.obs_idx = obs_idx self.n = len(self.obs_idx) if self.obs_idx is not None else self.m self.X_dims = [np.expand_dims(np.unique(X[:, i]), 1) for i in range(self.d)] self.mu = np.zeros(self.m) if mu is None else mu self.max_grad = max_grad self.init_Ks(kernels, noise) if likelihood is not None: self.likelihood = likelihood self.likelihood_grad = egrad(self.likelihood.log_like)
def assign_to_modal_uparams(this_uparam, modal_uparam): try: mid_pts = 0.5 * (modal_uparam[1:] + modal_uparam[:-1]) bins = np.concatenate(((-np.inf, ), mid_pts, (np.inf, ))) inds_in_modal = np.digitize(this_uparam, bins) - 1 numerical = True except: print('non-numerical parameter') numerical = False if numerical: uinds = np.unique(inds_in_modal) inds_in_this = np.zeros((0, ), dtype='int') for uind in uinds: candidates = np.where(inds_in_modal == uind)[0] dist_from_modal = np.abs(this_uparam[candidates] - modal_uparam[uind]) to_keep = candidates[np.argmin(dist_from_modal)] inds_in_this = np.concatenate((inds_in_this, (to_keep, ))) inds_in_modal = inds_in_modal[inds_in_this] bool_in_this = np.zeros((len(this_uparam), ), dtype='bool') bool_in_modal = np.zeros((len(modal_uparam), ), dtype='bool') bool_in_this[inds_in_this] = True bool_in_modal[inds_in_modal] = True else: assert (np.all(this_uparam == modal_uparam)) bool_in_this, bool_in_modal = [ np.ones(this_uparam.shape, dtype='bool') for iparam in range(2) ] return bool_in_this, bool_in_modal
def xgb_train_pred(train, y): # Function outputs clfs, importances, and oof_preds xg_clfs = [] xg_importances = pd.DataFrame() xgb_oof_train = np.zeros((len(train), np.unique(y).shape[0])) w = y.value_counts() weights = {i: np.sum(w) / w[i] for i in w.index} for fold_, (trn_, val_) in enumerate(folds.split(y, y)): trn_x, trn_y = train.iloc[trn_], y.iloc[trn_] val_x, val_y = train.iloc[val_], y.iloc[val_] clf = xgb.XGBClassifier(**xgb_params) clf.fit(trn_x, trn_y, eval_set=[(trn_x, trn_y), (val_x, val_y)], eval_metric=xgb_multi_weighted_logloss, verbose=100, early_stopping_rounds=50, sample_weight=trn_y.map(weights)) xgb_oof_train[val_, :] = clf.predict_proba( val_x, ntree_limit=clf.best_ntree_limit) print(multi_weighted_logloss(val_y, xgb_oof_train[val_, :])) imp_df = pd.DataFrame() imp_df['feature'] = train.columns imp_df['gain'] = clf.feature_importances_ imp_df['fold'] = fold_ + 1 xg_importances = pd.concat([xg_importances, imp_df], axis=0) xg_clfs.append(clf) return xg_clfs, xg_importances, xgb_oof_train
def _subsfs_list(sfs, n_chunks, rnd): n_snps = int(sfs.n_snps()) logger.debug("Splitting {} SNPs into {} minibatches".format( n_snps, n_chunks)) logger.debug("Building list of length {}".format(n_snps)) idxs = np.zeros(n_snps, dtype=int) total_counts = np.array(sfs._total_freqs, dtype=int) curr = 0 for i, cnt in enumerate(total_counts): idxs[curr:(curr + cnt)] = i curr += cnt logger.debug("Permuting list of {} SNPs".format(n_snps)) idxs = rnd.permutation(idxs) logger.debug( "Splitting permuted SNPs into {} minibatches".format(n_chunks)) ret = [] for chunk in range(n_chunks): chunk_idxs, chunk_cnts = np.unique(idxs[chunk::n_chunks], return_counts=True) sub_configs = _ConfigList_Subset(sfs.configs, chunk_idxs) ret.append( Sfs.from_matrix(np.array([chunk_cnts]).T, sub_configs, folded=sfs.folded, length=None)) return ret
def plot_subproblem_data(self): C = len(np.unique(self.y)) # construct figure fig = plt.figure(figsize=(9,2.5)) # create subplot with 2 panels gs = gridspec.GridSpec(1, C) # scatter points for c in range(C): # create subproblem data y_temp = copy.deepcopy(self.y) ind = np.argwhere(y_temp.astype(int) == (c)) ind = ind[:,0] ind2 = np.argwhere(y_temp.astype(int) != (c)) ind2 = ind2[:,0] y_temp[ind] = 1 y_temp[ind2] = -1 # create new axis to plot ax = plt.subplot(gs[c]) xmin,xmax = self.scatter_pts(ax,self.x,y_temp) # pretty up panel title = 'class ' + str(c+1) + ' versus all' ax.set_title(title,fontsize = 14)
def contour_plot(self,ax,wmax,num_contours): #### define input space for function and evaluate #### w1 = np.linspace(-wmax,wmax,100) w2 = np.linspace(-wmax,wmax,100) w1_vals, w2_vals = np.meshgrid(w1,w2) w1_vals.shape = (len(w1)**2,1) w2_vals.shape = (len(w2)**2,1) h = np.concatenate((w1_vals,w2_vals),axis=1) func_vals = np.asarray([ self.g(np.reshape(s,(2,1))) for s in h]) #func_vals = np.asarray([self.g(s) for s in h]) w1_vals.shape = (len(w1),len(w1)) w2_vals.shape = (len(w2),len(w2)) func_vals.shape = (len(w1),len(w2)) ### make contour right plot - as well as horizontal and vertical axes ### # set level ridges levelmin = min(func_vals.flatten()) levelmax = max(func_vals.flatten()) cutoff = 0.5 cutoff = (levelmax - levelmin)*cutoff numper = 3 levels1 = np.linspace(cutoff,levelmax,numper) num_contours -= numper levels2 = np.linspace(levelmin,cutoff,min(num_contours,numper)) levels = np.unique(np.append(levels1,levels2)) num_contours -= numper while num_contours > 0: cutoff = levels[1] levels2 = np.linspace(levelmin,cutoff,min(num_contours,numper)) levels = np.unique(np.append(levels2,levels)) num_contours -= numper a = ax.contour(w1_vals, w2_vals, func_vals,levels = levels,colors = 'k') ax.contourf(w1_vals, w2_vals, func_vals,levels = levels,cmap = 'Blues') # clean up panel ax.set_xlabel('$w_0$',fontsize = 12) ax.set_ylabel('$w_1$',fontsize = 12,rotation = 0) ax.set_title(r'$g\left(w_0,w_1\right)$',fontsize = 13) ax.axhline(y=0, color='k',zorder = 0,linewidth = 0.5) ax.axvline(x=0, color='k',zorder = 0,linewidth = 0.5) ax.set_xlim([-wmax,wmax]) ax.set_ylim([-wmax,wmax])
def train(x,y,feature_transforms,**kwargs): # get and run optimizer to solve two-class problem N = np.shape(x)[0] C = np.size(np.unique(y)) max_its = 100; alpha_choice = 1; cost_name = 'softmax'; normalize = 'standard' w = 0.1*np.random.randn(N+1,1); # switches for user choices if 'max_its' in kwargs: max_its = kwargs['max_its'] if 'alpha_choice' in kwargs: alpha_choice = kwargs['alpha_choice'] if 'cost_name' in kwargs: cost_name = kwargs['cost_name'] if 'w' in kwargs: w = kwargs['w'] if 'normalize' in kwargs: normalize = kwargs['normalize'] # loop over subproblems and solve weight_histories = [] for c in range(0,C): # prepare temporary C vs notC sub-probem labels y_temp = copy.deepcopy(y) ind = np.argwhere(y_temp.astype(int) == c) ind = ind[:,1] ind2 = np.argwhere(y_temp.astype(int) != c) ind2 = ind2[:,1] y_temp[0,ind] = 1 y_temp[0,ind2] = -1 # run on normalized data run = basic_runner.Setup(x,y_temp,feature_transforms,cost_name,normalize = normalize) run.fit(w=w,alpha_choice = alpha_choice,max_its = max_its) # store each weight history weight_histories.append(run.weight_history) # combine each individual classifier weights into single weight # matrix per step R = len(weight_histories[0]) combined_weights = [] for r in range(R): a = [] for c in range(C): a.append(weight_histories[c][r]) a = np.array(a).T a = a[0,:,:] combined_weights.append(a) # run combined weight matrices through fusion rule to calculate # number of misclassifications per step counter = basic_runner.Setup(x,y,feature_transforms,'multiclass_counter',normalize = normalize).cost_func count_history = [counter(v) for v in combined_weights] return combined_weights, count_history
def __init__(self, X, y, i_, n_users=10, n_items=5, d=3, lambda_=0., gamma=1., gamma_v=0., n_epoch=10, df=None, fair=False, training='ll'): self.X = X self.y = y self.i_ = i_ self.n_users = n_users self.n_items = n_items print(n_users, 'users', n_items, 'items') # sys.exit(0) self.d = d self.GAMMA = gamma self.GAMMA_V = gamma_v self.LAMBDA = lambda_ self.mu = 0. # self.w = np.random.random(n_users + n_items) # self.V = np.random.random((n_users + n_items, d)) self.y_pred = [] self.predictions = [] self.item_bias = np.random.random(n_items) self.item_slopes = np.random.random(n_items) self.w = np.random.random(n_users) self.V = np.random.random((n_users, d)) # self.V = np.random.random((10, 3)) # self.w = np.random.random(3) # self.item_bias = np.random.random(3) self.item_embed = np.random.random((n_items, d)) self.users = np.random.random((n_users, 5)) self.items = np.random.random((n_items, 5)) # self.V2 = np.power(self.V, 2) self.fair = fair self.metrics = defaultdict(list) self.prepare_sets() attr_ids = self.X_train[:, 2] n_attr = len(np.unique(attr_ids)) self.n_samples = len(i_['train']) print(self.n_samples, 'samples') self.W_attr = np.zeros((n_attr, self.n_samples)) self.W_attr[attr_ids, range(self.n_samples)] = 1 self.W_attr /= self.W_attr.sum(axis=1)[:, None] # Normalize self.n_epoch = n_epoch self.batch_size = BATCH_SIZE self.n_batches = self.n_samples // self.batch_size print('n_iter will be', self.n_epoch, self.n_batches, self.n_epoch * self.n_batches) self.c = 0. self.training = training self.prepare_model()
def inf_up_loss_influence( self, X_test, y_test, include_reg=False, include_hessian=True, ): """ Non-sklearn function. This is the influence of a training point on a testing point. """ y_fit = self.y_ assert set(np.unique(y_fit)).issubset(set([0, 1])), \ "y values must be 0 or 1" # TODO put inside other functions X_fit = self.X_ assert len(X_fit) == len(y_fit) L2_alpha = self.L2_alpha if not include_reg: L2_alpha = 1e-10 # Precompute global hess curr_hess = self.hess_loss( X_fit, y_fit, L2_alpha=L2_alpha, ) inv_emp_hess = slin.inv(curr_hess) # invert curr_losses_train = np.zeros((len(X_fit), len(self.W_b))) for i, (X_i, y_i) in enumerate(zip(X_fit, y_fit)): curr_loss_i = self.grad_loss( X_i.reshape(1, -1), y_i, L2_alpha=L2_alpha, ) curr_losses_train[i] = curr_loss_i curr_losses_test = np.zeros((len(X_test), len(self.W_b))) for i, (X_i, y_i) in enumerate(zip(X_test, y_test)): curr_loss_i = self.grad_loss( X_i.reshape(1, -1), y_i, L2_alpha=L2_alpha, ) curr_losses_test[i] = curr_loss_i # Rows are test points LOO_infs = np.zeros((len(X_test), len(X_fit))) for i, curr_loss_i in enumerate(curr_losses_test): for j, curr_loss_j in enumerate(curr_losses_train): if include_hessian: LOO_inf = -curr_loss_i.dot(inv_emp_hess).dot(curr_loss_j.T) else: LOO_inf = -curr_loss_i.dot(curr_loss_j.T) LOO_infs[i, j] = LOO_inf return LOO_infs
def init_weight(self, x, y): self.classes_ = np.unique(y) if self.prob_func_ == "sigmoid" and len(self.classes_) > 2: raise ValueError() if self.prob_func_ is None: if len(self.classes_) == 2: self.prob_func_ = "sigmoid" else: self.prob_func_ = "softmax" if self.prob_func_ == "sigmoid": return np.array([self.eps_] * (x.shape[1] + 1)) else: # self.prob_func_ == "softmax" return np.array([[self.eps_] * len(self.classes_) for i in xrange(x.shape[1] + 1)])
def compute_rotated_map(self, rotation): """ Compute stellar maps projected on the plane of the sky for a given rotation of the star Args: rotation (float) : rotation around the star in degrees given as [longitude, latitude] in degrees Returns: pixel_unique (int) : vector with the "active" healpix pixels pixel_map (int) : map showing the healpix pixel projected on the plane of the sky mu_pixel (float): map of the astrocentric angle for each pixel on the plane of the sky (zero for pixels not in the star) T_pixel (float): map of temperatures for each pixel on the plane of the sky """ mu_pixel = np.zeros_like(self.mu_angle) T_pixel = np.zeros_like(self.mu_angle) # Get the projection of the healpix pixel indices on the plane of the sky pixel_map = self.projector.projmap(self.indices, self.f_vec2pix, rot=rotation)[:,0:int(self.npix/2)] # Get the unique elements in the vector pixel_unique = np.unique(pixel_map) # Now loop over all unique pixels, filling up the array of the projected map with the mu and temeperature values for j in range(len(pixel_unique)): ind = np.where(pixel_map == pixel_unique[j]) if (np.all(np.isfinite(self.mu_angle[ind[0],ind[1]]))): if (self.mu_angle[ind[0],ind[1]].size == 0): value = 0.0 else: value = np.nanmean(self.mu_angle[ind[0],ind[1]]) mu_pixel[ind[0],ind[1]] = value T_pixel[ind[0],ind[1]] = self.temperature_map[int(pixel_unique[j])] else: mu_pixel[ind[0],ind[1]] = 0.0 T_pixel[ind[0],ind[1]] = 0.0 return pixel_unique, pixel_map, mu_pixel, T_pixel
def split_classes(X, y): """split samples in X by classes in y """ lstsclass = np.unique(y) return [X[y == i, :].astype(np.float32) for i in lstsclass]
def precompute_rotation_maps(self, rotations=None): """ Compute the averaged spectrum on the star for a given temperature map and for a given rotation Args: rotations (float) : [N_phases x 2] giving [longitude, latitude] in degrees for each phase Returns: None """ if (rotations is None): print("Use some angles for the rotations") return self.n_phases = rotations.shape[0] self.avg_mu = [None] * self.n_phases self.avg_v = [None] * self.n_phases self.velocity = [None] * self.n_phases self.n_pixel_unique = [None] * self.n_phases self.n_pixels = [None] * self.n_phases self.pixel_unique = [None] * self.n_phases for loop in range(self.n_phases): mu_pixel = np.zeros_like(self.mu_angle) v_pixel = np.zeros_like(self.vel_projection) pixel_map = self.projector.projmap(self.indices, self.f_vec2pix, rot=rotations[loop,:])[:,0:int(self.npix/2)] pixel_unique = np.unique(pixel_map[np.isfinite(pixel_map)]) for j in range(len(pixel_unique)): ind = np.where(pixel_map == pixel_unique[j]) if (np.all(np.isfinite(self.mu_angle[ind[0],ind[1]]))): if (self.mu_angle[ind[0],ind[1]].size == 0): mu_pixel[ind[0],ind[1]] = 0.0 v_pixel[ind[0],ind[1]] = 0.0 else: if (self.clv): value = np.nanmean(self.mu_angle[ind[0],ind[1]]) else: value = 1.0 mu_pixel[ind[0],ind[1]] = value value = np.nanmean(self.vel_projection[ind[0],ind[1]]) v_pixel[ind[0],ind[1]] = value else: mu_pixel[ind[0],ind[1]] = 0.0 v_pixel[ind[0],ind[1]] = 0.0 self.n_pixel_unique[loop] = len(pixel_unique) self.avg_mu[loop] = np.zeros(self.n_pixel_unique[loop]) self.avg_v[loop] = np.zeros(self.n_pixel_unique[loop]) self.velocity[loop] = np.zeros(self.n_pixel_unique[loop]) self.n_pixels[loop] = np.zeros(self.n_pixel_unique[loop], dtype='int') self.pixel_unique[loop] = pixel_unique.astype('int') for i in range(len(pixel_unique)): ind = np.where(pixel_map == pixel_unique[i]) self.n_pixels[loop][i] = len(ind[0]) self.avg_mu[loop][i] = np.unique(mu_pixel[ind[0], ind[1]]) self.avg_v[loop][i] = np.unique(v_pixel[ind[0], ind[1]]) self.velocity[loop][i] = self.avg_mu[loop][i] * self.avg_v[loop][i]