def _minmax( arr, mask=None, q_low=0, q_upp=0, cenfunc='median' ): # General setup (nkeep and maxrej as dummy) _arr, _masks, _, cenfunc, _nvals = _setup_reject( arr=arr, mask=mask, nkeep=1, maxrej=None, cenfunc=cenfunc ) # mask == input_mask | ~isfinite mask, _, _, mask_skiprej = _masks # nkeep and maxrej not used in MINMAX. _, ncombine, n_old = _nvals # nit is not used in MINMAX. # adding 0.001 following IRAF n_rej_low = (n_old * q_low + 0.001).astype(n_old.dtype) n_rej_upp = (n_old * q_upp + 0.001).astype(n_old.dtype) n_low = np.max(n_rej_low) # only ~ 0.1 ms for 1k x 1k array of int n_upp = np.max(n_rej_upp) dmin, dmax = _get_dtype_limits(_arr.dtype) # remove lower values _arr[mask] = dmax # replace with largest value low = np.max(bn.partition(_arr, kth=n_low, axis=0)[:n_low, ], axis=0) # remove upper values _arr[mask] = dmin # replace with lowest values upp = np.max(-bn.partition(-_arr, kth=n_upp, axis=0)[:n_upp, ], axis=0) # propagate with rejection mask mask |= (_arr < low) | (upp < _arr) code = np.zeros(_arr.shape[1:], dtype=np.uint8) no_rej = (n_rej_low == 0) | (n_rej_upp == 0) # code += return (mask, low, upp, nit, code)
def run_query(self, query, pooling='mean', n=10): if self.corpus is None: raise AttributeError( 'Model not built yet, please call the fit method before running queries!' ) assert type(query) == str similarities = [] query_embedding = BERT_sentence_embeddings(query, query=True) for item in self.corpus_sent_emb: sent_sims = np.dot(item, query_embedding.T) if pooling == 'top2': if len(item) > 2: similarities.append( np.mean(-bn.partition(-sent_sims, kth=2, axis=0)[:2], axis=0)) else: similarities.append(np.mean(sent_sims, axis=0)) elif pooling == 'max': similarities.append(np.amax(sent_sims, axis=0)) elif pooling == 'mean': similarities.append(np.mean(sent_sims, axis=0)) similarities = np.squeeze(np.array(similarities)) return self.__create_query_result(query, similarities, n)
def mean_rrank_at_k_batch(train_data, heldout_data, Et, Eb, user_idx, k=5, mu=None, vad_data=None): ''' mean reciprocal rank@k: For each user, make predictions and rank for all the items. Then calculate the mean reciprocal rank for the top K that are in the held-out set. ''' batch_users = user_idx.stop - user_idx.start X_pred = _make_prediction(train_data, Et, Eb, user_idx, batch_users, mu=mu, vad_data=vad_data) all_rrank = 1. / (np.argsort(np.argsort(-X_pred, axis=1), axis=1) + 1) X_true_binary = (heldout_data[user_idx] > 0).toarray() heldout_rrank = X_true_binary * all_rrank top_k = bn.partition(-heldout_rrank, k, axis=1) return -top_k[:, :k].mean(axis=1)
def convertToNumber(self, nparr): if self.ngrams < 2: byte_int = np.zeros(256) for row in nparr: try: int_val = int(row, 16) except: int_val = random.randint(0, 254) byte_int[int_val] += 1 else: arrsize = math.pow(256, self.ngrams) byte_int = np.zeros(int(arrsize)) for row in nparr: row = row.replace(" ", "") try: int_val = int(row, 16) except: int_val = random.randint(0, 254) byte_int[int_val] += 1 byte_int = -bottleneck.partition(-byte_int, 2000)[:2000] return byte_int
def _get_object_influence(self, dist_mat, idx): k_nearest_idx = np.argpartition(dist_mat[:, idx], range(self.n_neighbors + 1))[1:self.n_neighbors + 1] k_nearest_dist = bn.partition(dist_mat[:, idx], self.n_neighbors) local_density = 1 / k_nearest_dist[self.n_neighbors] knn_dist = sum(k_nearest_dist[1:self.n_neighbors + 1]) return np.array([k_nearest_idx, local_density, knn_dist], dtype=object)
def mean_over_k_largest(vector, k): '''Return the mean over the k largest values of a vector''' if k == 0: return 0 if k <= len(vector): return vector.sum() / len(vector) z = -bottleneck.partition(-vector, kth = k) return z.sum() / k
def update(self, arr): self.data = np.append(self.data, arr[:self.window_size]) if len(self.data) > self.window_size: arr_pre = self.data[:self.window_size] self.data = arr[-(self.window_size - 1):] top10_arr = -bn.partition(-arr_pre, 10)[:10] self.threshold = np.average(top10_arr) / 2.6 self.history = np.append(self.history, self.threshold) if len(self.history) > 10: self.history = self.history[1:]
def updateBestVector(self, v): G = self.G max = 0 max_percentiles = [0 for i in range(len(self.thresholds))] ordered_percentiles = [[] for i in range(len(self.thresholds))] for u in G.neighbors(v): if max < self.max_counts[u]: max = self.max_counts[u] indices = np.digitize(self.orderedMatrix[u], self.thresholds) - 1 for i in range(len(self.thresholds) - 1): if max_percentiles[i] < self.max_counts_percentiles[u][i]: max_percentiles[i] = self.max_counts_percentiles[u][i] ordered_percentiles[i] = np.concatenate( (ordered_percentiles[i], self.orderedMatrix[u][indices == i])) for i in range(len(self.thresholds) - 1): length = min(len(ordered_percentiles[i]), max_percentiles[i]) if length == 0: ordered_percentiles[i] = [] else: ordered_percentiles[i] = bottle.partition(ordered_percentiles[i], length - 1) ordered_percentiles[i] = ordered_percentiles[i][0:length] remains = max index_perc = 0 #print("max: "+str(max)) #print("max_percentiles"+str(max_percentiles)) #print("ordered_percentiles: "+str(ordered_percentiles)) best_vector = np.asarray([]) while (remains > 0): #print(remains) if remains > len(ordered_percentiles[index_perc]): best_vector = np.concatenate( (best_vector, ordered_percentiles[index_perc])) remains -= len(ordered_percentiles[index_perc]) else: best_vector = np.concatenate( (best_vector, ordered_percentiles[index_perc][0:remains])) remains = 0 index_perc += 1 return best_vector
def check_trivial_answer(n_assets, mu, EST, return_matrix): max_index = np.argmax(mu) w = np.zeros(n_assets) for i in range(n_assets): if i != max_index: w[i] = 0 else: w[i] = 1 w_v = w @ return_matrix length = int(np.floor(alpha * len(w_v))) - 1 ESt = bn.partition(w_v, kth=length) ESt = np.mean(ESt[0:length]) if (ESt / EST < 1): return max_index else: return -1
def step(self, state): ''' Predict the action given the curent state in gerenerating training data. Args: state (dict): An dictionary that represents the current state Returns: action (int): The action predicted (randomly chosen) by the random agent ''' current_hand = state['raw_obs']['current_hand'] legal_actions = state['legal_actions'] # # check # if len(state['raw_obs']['trace']) >= 2: # if state['raw_obs']['trace'][-1][1] == 'pass' and state['raw_obs']['trace'][-2][1] == 'pass': # if set(legal_actions) != PaodekuaiJudger.playable_cards_from_hand(current_hand): # print(legal_actions) # print(current_hand) # raise ValueError('Error') # Win the game if possible if current_hand in legal_actions: return current_hand # If no choice, e.g. ['pass'] if len(legal_actions) == 1: return legal_actions[0] # set the model to evaluation mode, otherwise the output would be wrong with torch.no_grad(): self.model.eval() obs = torch.FloatTensor(state['obs']).reshape(-1, 6, 4, 13) prediction = self.model(obs).view(-1, 1, 4, 13) if self.generate_data: # return choices by prob softmax = nn.Softmax(dim=0) tensor_cards = torch.FloatTensor([cards_encode_tensor(cards) for cards in legal_actions]).view(-1, 1, 4, 13) inner_product = torch.FloatTensor([(prediction * cards).sum() for cards in tensor_cards]) # similarity = softmax(inner_product).numpy() top_cards_idx = bottleneck.argpartition(-similarity, 1)[:2] top_cards_prob = -bottleneck.partition(-similarity, 1)[:2] top_cards_prob = top_cards_prob/sum(top_cards_prob) return np.random.choice(np.array(legal_actions)[top_cards_idx], p=top_cards_prob) elif not self.entropy: # SL card: select the nearest card to the predicted tensor # use similarity = inner product choice = '' similarity = -1 for cards in legal_actions: tensor_card = torch.FloatTensor(cards_encode_tensor(cards)).reshape(-1, 1, 4, 13) new_sim = (prediction * tensor_card).sum() if new_sim > similarity: choice = cards similarity = new_sim return choice else: choice = '' loss = 100000 for cards in legal_actions: tensor_card = torch.FloatTensor(cards_encode_tensor(cards)).reshape(-1, 1, 4, 13) new_loss = loss_function(prediction, tensor_card) if new_loss < loss: choice = cards loss = new_loss return choice
a = np.asarray([0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 13, 34]) thresholds = np.asarray([0, 0.000001, 3, 7, 29, 50]) indices = np.digitize(a, thresholds) - 1 print(indices) lista = [] print(indices == 1) lista.append(a[indices == 1]) print(lista) raise ValueError a = np.random.rand(100000000) * 10 #a=list(range(90,100))+list(range(40,50))+list(range(0,5))+list(range(10,19)) #a=np.asarray(a) #print(a) print() t_start = time.time() b = bottle.partition(a, 10)[:10] b = np.sort(b) t_end = time.time() print("Elapsed time: ", time.strftime("%H:%M:%S", time.gmtime(t_end - t_start))) print(b) print() t_start = time.time() #print(np.sort(a)[:1]) print(np.min(a)) t_end = time.time() print("Elapsed time: ", time.strftime("%H:%M:%S", time.gmtime(t_end - t_start)))
eem_data = read_data('EEM.csv') dbc_data = read_data('DBC.csv') dbv_data = read_data('DBV.csv') spy_2008_data = read_data('SPY_2008.csv') spy_2008_closing = list(spy_2008_data.values()) spy_2008_return = np.zeros(len(spy_2008_closing) - 1) for i in range(len(spy_2008_closing) - 1): spy_2008_return[i] = (float(spy_2008_closing[i + 1]) - float( spy_2008_closing[i])) / float(spy_2008_closing[i]) sigma_2008 = np.std(spy_2008_return) VaR = int(np.floor(alpha * len(spy_2008_return))) EST_2008 = bn.partition(spy_2008_return, kth=VaR - 1) EST_2008 = np.mean(EST_2008[0:VaR - 1]) list_of_data = [spy_data, agg_data, gld_data, eem_data, dbc_data, dbv_data] #list_of_data = [agg_data,eem_data] assets = process_data(list_of_data) assets = np.array(assets) assets = assets.astype(np.float) n_assets = assets.shape[0] n_obs = assets.shape[1] return_matrix = np.zeros((n_assets, n_obs - 1)) for i in range(n_assets): for j in range(n_obs - 1): return_matrix[i, j] = (assets[i][j + 1] - assets[i][j]) / assets[i][j]
def _iter_rej( arr, mask=None, sigma_lower=3., sigma_upper=3., maxiters=5, ddof=0, nkeep=3, maxrej=None, cenfunc='median', ccdclip=False, irafmode=True, rdnoise_ref=0., snoise_ref=0., scale_ref=1, zero_ref=0 ): """ The common function for iterative rejection algorithms. Parameters ---------- arr : ndarray The array to find the mask. It must be gain-corrected if ``ccdclip=True``. rdnoise_ref, snoise_ref : float The representative readnoise and sensitivity noise to estimate the error-bar for ``ccdclip=True``. scale_ref, zero_ref : float The representative scaling and zeroing value to estimate the error-bar for ``ccdclip=True``. """ def __calc_censtd(_arr): # most are defined in upper _iter_rej function cen = cenfunc(_arr, axis=0) if ccdclip: # use abs(pix value) to avoid NaN from negative pixels. std = np.sqrt( ((1 + snoise_ref)*np.abs(cen + zero_ref)*scale_ref) + rdnoise_ref**2 ) # restore zeroing & scaling ; then add rdnoise else: std = bn.nanstd(_arr, axis=0, ddof=ddof) return cen, std # General setup _arr, _masks, keeprej, cenfunc, _nvals, lowupp = _setup_reject( arr=arr, mask=mask, nkeep=nkeep, maxrej=maxrej, cenfunc=cenfunc ) mask_nan, mask_nkeep, mask_maxrej, mask_pix = _masks nkeep, maxrej = keeprej nit, ncombine, n_finite_old = _nvals low, upp, low_new, upp_new = lowupp nrej = ncombine - n_finite_old k = 0 # mask_pix is where **NO** rejection should occur. if (nkeep == 0) and (maxrej == ncombine): print("nkeep, maxrej turned off.") # no need to check mask_pix iteratively while k < maxiters: cen, std = __calc_censtd(_arr=_arr) low_new[~mask_pix] = (cen - sigma_lower*std)[~mask_pix] upp_new[~mask_pix] = (cen + sigma_upper*std)[~mask_pix] # In numpy, > or < automatically applies along axis=0!! mask_bound = (_arr < low_new) | (_arr > upp_new) | ~np.isfinite(_arr) _arr[mask_bound] = np.nan n_finite_new = ncombine - np.count_nonzero(mask_bound, axis=0) n_change = n_finite_old - n_finite_new total_change = np.sum(n_change) mask_nochange = (n_change == 0) # identical to say "max-iter reached" # no need to backup if total_change == 0: break # I put the test below because I thought it will be quicker # to halt clipping if all pixels are masked. But now I feel # testing this in every iteration is an unnecessary overhead # for "nearly impossible" situation. # - ysBach (2020-10-14 21:15:44 (KST: GMT+09:00)) # if np.all(mask_pix): # break # update only non-masked pixels nrej[~mask_pix] = n_change[~mask_pix] # update only changed pixels nit[~mask_nochange] += 1 k += 1 n_finite_old = n_finite_new else: while k < maxiters: cen, std = __calc_censtd(_arr=_arr) low_new[~mask_pix] = (cen - sigma_lower*std)[~mask_pix] upp_new[~mask_pix] = (cen + sigma_upper*std)[~mask_pix] # In numpy, > or < automatically applies along axis=0!! mask_bound = (_arr < low_new) | (_arr > upp_new) | ~np.isfinite(_arr) _arr[mask_bound] = np.nan n_finite_new = ncombine - np.count_nonzero(mask_bound, axis=0) n_change = n_finite_old - n_finite_new total_change = np.sum(n_change) mask_nochange = (n_change == 0) # identical to say "max-iter reached" mask_nkeep = ((ncombine - nrej) < nkeep) mask_maxrej = (nrej > maxrej) # mask pixel position if any of these happened. # Including mask_nochange here will not change results but only # spend more time. mask_pix = mask_nkeep | mask_maxrej # revert to the previous ones if masked. # By doing this, pixels which was mask_nkeep now, e.g., will # again be True in mask_nkeep in the next iter but unchanged. # This should be done at every iteration (unfortunately) # because, e.g., if nkeep is very large, excessive rejection may # happen for many times, and the restoration CANNOT be done # after all the iterations. low_new[mask_pix] = low[mask_pix].copy() upp_new[mask_pix] = upp[mask_pix].copy() low = low_new upp = upp_new if total_change == 0: break # I put the test below because I thought it will be quicker # to halt clipping if all pixels are masked. But now I feel # testing this in every iteration is an unnecessary overhead # for "nearly impossible" situation. # - ysBach (2020-10-14 21:15:44 (KST: GMT+09:00)) # if np.all(mask_pix): # break # update only non-masked pixels nrej[~mask_pix] = n_change[~mask_pix] # update only changed pixels nit[~mask_nochange] += 1 k += 1 n_finite_old = n_finite_new mask = mask_nan | (arr < low_new) | (arr > upp_new) code = np.zeros(_arr.shape[1:], dtype=np.uint8) if (maxiters == 0): code += 1 else: code += (2*mask_nochange + 4*mask_nkeep + 8*mask_maxrej).astype(np.uint8) if irafmode: n_minimum = max(nkeep, ncombine - maxrej) if n_minimum > 0: try: resid = np.abs(_arr - cen) except UnboundLocalError: # cen undefined when maxiters=0 resid = np.abs(_arr - cenfunc(_arr, axis=0)) # need this cuz bn.argpartition cannot handle NaN: resid[np.isnan(resid)] = _get_dtype_limits(resid.dtype)[1] # ^ replace with max of dtype # after this, resid is guaranteed to have **NO** NaN values. resid_cut = np.max( bn.partition(resid, n_minimum, axis=0)[:n_minimum, ], axis=0 ) mask[resid <= resid_cut] = False # Note the mask returned here is mask from rejection PROPAGATED with # the input mask. So to extract the pixels masked PURELY from # rejection, you need ``mask_output^mask_input`` because the input # mask is a subset of the output one. return (mask, low, upp, nit, code)
(scan_number, particle_number)) # --- GET SCAN DATA & INITIALIZE PROCESSED FILE --- datafile = scan_analyzer.getScanDataSet(data, scan_number, particle_number) processedData = h5py.File( processed_filepath + "IntensityAnalysisData.hdf5", "a") pdata = processedData.create_group('scan%s/particle%s' % (scan_number, particle_number)) # --- INFINITY 3 FIRST IMAGE --- infinity3_maxima_to_average = 10 infinity3_image = np.array( datafile['Infinity3_First_Processed_Image']) infinity3_z = -bottleneck.partition( -infinity3_image.flatten(), infinity3_maxima_to_average)[:infinity3_maxima_to_average] infinity3_averaged_maxima_list.append(np.mean(infinity3_z)) # --- RAMAN WHITE LIGHT IMAGE --- white0order_maxima_to_average = 10 white0order_image = np.array( datafile['Raman_White_Light_0Order_Processed_Image']) white0order_z = -bottleneck.partition( -white0order_image.flatten(), white0order_maxima_to_average)[:white0order_maxima_to_average] white0order_averaged_maxima_list.append(np.mean(white0order_z)) # --- RAMAN LASER LIGHT IMAGE --- laser0order_maxima_to_average = 10 laser0order_image = np.array(
def _get_kernel(self, X, Y=None, nystroem_kernel=False): X, Y = check_pairwise_arrays(X, Y) if nystroem_kernel: ##Cannot use self.nytroem since kernel needs also be computable for full data for prediction when Nystroem sampling is used if self.component_indices is None: rnd = check_random_state(self.random_state) n_samples = X.shape[0] # get basis vectors if self.n_components > n_samples: # XXX should we just bail? n_components = n_samples warnings.warn( "n_components > n_samples. This is not possible.\n" "n_components was set to n_samples, which results" " in inefficient evaluation of the full kernel.") else: n_components = self.n_components n_components = min(n_samples, n_components) self.component_indices = rnd.permutation( n_samples)[:n_components] X = X[self.component_indices].copy() d = euclidean_distances(X, X) else: d = euclidean_distances(X, Y) ##Get n_neighbors largest element to find range if not given if (self.theta is None): if (self.n_neighbors == "inf") | ( self.n_neighbors == np.inf ): ##special case: chose theta such that it equals the average distance to the farest neighbor self.n_neighbors = X.shape[0] - 1 self.range_adjust = 1. if (not self.prctg_neighbors is None) & (self.n_neighbors is None): self.n_neighbors = int(X.shape[0] * self.prctg_neighbors) if not self.n_neighbors is None: if self.kernel == "GW": ##Choose theta such that on average every point has n_neighbors non-zero entries ds = d.flatten() ds = ds[~(ds == 0)] ##Remove diagonal self.theta = bn.partition( ds, d.shape[0] * self.n_neighbors - 1)[d.shape[0] * self.n_neighbors - 1] else: ##Choose theta as average distance to n_neighbors'th nearest neighbor kdt = scipy.spatial.cKDTree(X) dists, neighs = kdt.query( X, self.n_neighbors + 1 ) ##get distance to n_neighbors+1 nearest neighbors (incl. point itself) self.theta = np.mean( dists[:, self.n_neighbors] ) ##calculate average distance to n_neighbors'th nearest neighbor (only true neighbors excl. point itself) if self.kernel == "rbf": self.theta = self.theta / ( self.range_adjust**0.5 ) ##range_adjust=3 (4.6) correlation should drop to 5% (1%) at distance = theta if self.kernel == "laplace": self.theta = self.theta / self.range_adjust print("Chosen theta: " + str(round(self.theta, 4))) if self.kernel == "GW": d *= -1. / self.theta d2 = d.copy() d += 1. d[d < 0] = 0 d *= d d2 *= -2 d2 += 1 d *= d2 ##Above code does the same as below: # tmp=1-d/self.theta # tmp[tmp<0]=0 # d=tmp**2*(1+2*d/self.theta) if self.kernel == "rbf": ##np.exp(-(d/self.theta)**2) d *= (1. / self.theta) d *= -d np.exp(d, d) if self.kernel == "laplace": ##np.exp(-d/self.theta) d *= (-1. / self.theta) np.exp(d, d) if self.sparse: # print("Sparsity ratio: " +str(round(float(100*np.sum(d>0))/X.shape[0]/X.shape[0],2))+"%") return csc_matrix(d) else: return d
def top_k_bottleneck(ndarr, k=10): return bn.partition(ndarr, ndarr.size - k)[-k:]
def test_transpose(): "partition transpose test" a = np.arange(12).reshape(4, 3) actual = bn.partition(a.T, 2, -1).T desired = bn.slow.partition(a.T, 2, -1).T assert_equal(actual, desired, 'partition transpose test')
def time_partition(self, dtype, shape): bn.partition(self.arr, self.half)
def __init__(self, a, b, C, reg, ns_budget=None, nt_budget=None, uniform=False, restricted=True, one_init=False, maxiter=10000, maxfun=10000, pgtol=1e-09, verbose=True, log=False): # check if bottleneck module exists try: import bottleneck except ImportError: warnings.warn( "Bottleneck module is not installed. Install it from https://pypi.org/project/Bottleneck/ for better performance." ) bottleneck = np # time tic_initial = time() self.a = np.asarray(a, dtype=np.float64) self.b = np.asarray(b, dtype=np.float64) # if autograd package is used, we then have to change # some arrays from "ArrayBox" type to "np.array". if isinstance(C, np.ndarray) == False: C = C._value self.C = np.asarray(C, dtype=np.float64) self.reg = reg ns = C.shape[0] nt = C.shape[1] self.ns_budget = ns_budget self.nt_budget = nt_budget self.verbose = verbose self.uniform = uniform self.restricted = restricted self.maxiter = maxiter self.maxfun = maxfun self.pgtol = pgtol self.one_init = one_init self.log = log # by default, we keep only 50% of the sample data points if self.ns_budget is None: self.ns_budget = int(np.floor(0.5 * ns)) if self.nt_budget is None: self.nt_budget = int(np.floor(0.5 * nt)) # calculate the Gibbs kernel K self.K = np.empty_like(self.C) np.divide(self.C, -self.reg, out=self.K) np.exp(self.K, out=self.K) # screening test (see Lemma 1 in the paper) ## full number of budget points (ns, nt) = (ns_budget, nt_budget) if self.ns_budget == ns and self.nt_budget == nt: # I, J self.Isel = np.ones(ns, dtype=bool) self.Jsel = np.ones(nt, dtype=bool) # epsilon self.epsilon = 0.0 # kappa self.fact_scale = 1.0 # restricted Sinkhron self.cst_u = 0. self.cst_v = 0. # box constraints in LBFGS self.bounds_u = [(0.0, np.inf)] * ns self.bounds_v = [(0.0, np.inf)] * nt # self.K_IJ = self.K self.a_I = self.a self.b_J = self.b self.K_IJc = [] self.K_IcJ = [] else: # sum of rows and columns of K K_sum_cols = self.K.sum(axis=1) K_sum_rows = self.K.sum(axis=0) if self.uniform: if ns / self.ns_budget < 4: aK_sort = np.sort(K_sum_cols) epsilon_u_square = a[0] / aK_sort[self.ns_budget - 1] else: aK_sort = bottleneck.partition(K_sum_cols, self.ns_budget - 1)[self.ns_budget - 1] epsilon_u_square = a[0] / aK_sort if nt / self.nt_budget < 4: bK_sort = np.sort(K_sum_rows) epsilon_v_square = b[0] / bK_sort[self.nt_budget - 1] else: bK_sort = bottleneck.partition(K_sum_rows, self.nt_budget - 1)[self.nt_budget - 1] epsilon_v_square = b[0] / bK_sort else: aK = a / K_sum_cols bK = b / K_sum_rows aK_sort = np.sort(aK)[::-1] epsilon_u_square = aK_sort[self.ns_budget - 1] bK_sort = np.sort(bK)[::-1] epsilon_v_square = bK_sort[self.nt_budget - 1] # I, J self.Isel = self.a >= epsilon_u_square * K_sum_cols self.Jsel = self.b >= epsilon_v_square * K_sum_rows if sum(self.Isel) != self.ns_budget: print("test error", sum(self.Isel), self.ns_budget) if self.uniform: aK = a / K_sum_cols aK_sort = np.sort(aK)[::-1] epsilon_u_square = aK_sort[self.ns_budget - 1:self.ns_budget + 1].mean() self.Isel = self.a >= epsilon_u_square * K_sum_cols self.ns_budget = sum(self.Isel) if sum(self.Jsel) != self.nt_budget: print("test error", sum(self.Jsel), self.nt_budget) if self.uniform: bK = b / K_sum_rows bK_sort = np.sort(bK)[::-1] epsilon_v_square = bK_sort[self.nt_budget - 1:self.nt_budget + 1].mean() self.Jsel = self.b >= epsilon_v_square * K_sum_rows self.nt_budget = sum(self.Jsel) # epsilon, kappa self.epsilon = (epsilon_u_square * epsilon_v_square)**(1 / 4) self.fact_scale = (epsilon_v_square / epsilon_u_square)**(1 / 2) if self.verbose: print("epsilon = %s\n" % self.epsilon) print("kappa = %s\n" % self.fact_scale) print( 'Cardinality of selected points: |Isel| = %s \t |Jsel| = %s \n' % (sum(self.Isel), sum(self.Jsel))) # Ic, Jc: complementary sets of I and J self.Ic = ~self.Isel self.Jc = ~self.Jsel # K self.K_IJ = self.K[np.ix_(self.Isel, self.Jsel)] self.K_IcJ = self.K[np.ix_(self.Ic, self.Jsel)] self.K_IJc = self.K[np.ix_(self.Isel, self.Jc)] K_min = self.K_IJ.min() if K_min == 0: K_min = np.finfo(float).tiny # a_I, b_J, a_Ic, b_Jc self.a_I = self.a[self.Isel] self.b_J = self.b[self.Jsel] if not self.uniform: self.a_I_min = self.a_I.min() self.a_I_max = self.a_I.max() self.b_J_max = self.b_J.max() self.b_J_min = self.b_J.min() else: self.a_I_min = self.a_I[0] self.a_I_max = self.a_I[0] self.b_J_max = self.b_J[0] self.b_J_min = self.b_J[0] # box constraints in L-BFGS-B (see Proposition 1 in the paper) self.bounds_u = [(max(self.a_I_min / (self.epsilon * (nt - self.nt_budget) \ + self.nt_budget * (self.b_J_max / (self.epsilon * self.fact_scale * ns * K_min))), \ self.epsilon / self.fact_scale), \ self.a_I_max / (self.epsilon * nt * K_min))] * self.ns_budget self.bounds_v = [(max(self.b_J_min / (self.epsilon * (ns - self.ns_budget) \ + self.ns_budget * (self.fact_scale * self.a_I_max / (self.epsilon * nt * K_min))), \ self.epsilon * self.fact_scale), \ self.b_J_max / (self.epsilon * ns * K_min))] * self.nt_budget # constants in the objective function of the screened Sinkhorn divergence self.vec_eps_IJc = self.epsilon * self.fact_scale \ * (self.K_IJc * np.ones(nt - self.nt_budget).reshape((1, -1))).sum(axis=1) self.vec_eps_IcJ = (self.epsilon / self.fact_scale) \ * (np.ones(ns - self.ns_budget).reshape((-1, 1)) * self.K_IcJ).sum(axis=0) # restricted-Sinkhron if self.ns_budget != ns or self.ns_budget != nt: self.cst_u = self.fact_scale * self.epsilon * self.K_IJc.sum( axis=1) self.cst_v = self.epsilon * self.K_IcJ.sum( axis=0) / self.fact_scale if not self.one_init: u0 = np.full(self.ns_budget, (1. / self.ns_budget) + self.epsilon / self.fact_scale) v0 = np.full(self.nt_budget, (1. / self.nt_budget) + self.epsilon * self.fact_scale) else: print('one initialization') u0 = np.full(self.ns_budget, 1.) v0 = np.full(self.nt_budget, 1.) if self.restricted: self.u0, self.v0 = self._restricted_sinkhorn(u0, v0, max_iter=5) else: print('no restricted') self.u0 = u0 self.v0 = v0 self.toc_initial = time() - tic_initial if self.verbose: print('time of initialization: %s' % self.toc_initial)
def _iter_rej(arr, mask=None, sigma_lower=3., sigma_upper=3., maxiters=5, ddof=0, nkeep=3, maxrej=None, cenfunc='median', ccdclip=False, irafmode=True, rdnoise_ref=0., snoise_ref=0., scale_ref=1, zero_ref=0): """ The common function for iterative rejection algorithms. Parameters ---------- arr : ndarray The array to find the mask. It must be gain-corrected if ``ccdclip=True``. rdnoise_ref, snoise_ref : float The representative readnoise and sensitivity noise to estimate the error-bar for ``ccdclip=True``. scale_ref, zero_ref : float The representative scaling and zeroing value to estimate the error-bar for ``ccdclip=True``. """ # General setup _arr, _masks, keeprej, cenfunc, _nvals, lowupp = _setup_reject( arr=arr, mask=mask, nkeep=nkeep, maxrej=maxrej, cenfunc=cenfunc) mask_nan, mask_nkeep, mask_maxrej, mask_pix = _masks nkeep, maxrej = keeprej nit, ncombine, n_finite_old = _nvals low, upp, low_new, upp_new = lowupp nrej = ncombine - n_finite_old # same as nrej_old at the moment k = 0 # mask_pix is where **NO** rejection should occur. while k < maxiters: if ccdclip: cen = cenfunc(_arr, axis=0) # use absolute of cen to avoid NaN from negative pixels. std = np.sqrt( np.abs((1 + snoise_ref) * (cen + zero_ref) * scale_ref) # restore zeroing & scaling + rdnoise_ref**2) else: cen = cenfunc(_arr, axis=0) std = bn.nanstd(_arr, axis=0, ddof=ddof) low_new[~mask_pix] = (cen - sigma_lower * std)[~mask_pix] upp_new[~mask_pix] = (cen + sigma_upper * std)[~mask_pix] # In numpy, > or < automatically applies along axis=0!! mask_bound = (_arr < low_new) | (_arr > upp_new) | ~np.isfinite(_arr) _arr[mask_bound] = np.nan n_finite_new = ncombine - np.count_nonzero(mask_bound, axis=0) n_change = n_finite_old - n_finite_new total_change = np.sum(n_change) mask_nochange = (n_change == 0) # identical to say "max-iter reached" mask_nkeep = ((ncombine - nrej) < nkeep) mask_maxrej = (nrej > maxrej) # mask pixel position if any of these happened. # Including mask_nochange here will not change results but only # spend more time. mask_pix = mask_nkeep | mask_maxrej # revert to the previous ones if masked. # By doing this, pixels which was mask_nkeep now, e.g., will # again be True in mask_nkeep in the next iter but unchanged. # This should be done at every iteration (unfortunately) # because, e.g., if nkeep is very large, excessive rejection may # happen for many times, and the restoration CANNOT be done # after all the iterations. low_new[mask_pix] = low[mask_pix].copy() upp_new[mask_pix] = upp[mask_pix].copy() low = low_new upp = upp_new if total_change == 0: break if np.all(mask_pix): break # update only non-masked pixels nrej[~mask_pix] = n_change[~mask_pix] # update only changed pixels nit[~mask_nochange] += 1 k += 1 n_finite_old = n_finite_new mask = mask_nan | (arr < low_new) | (arr > upp_new) code = np.zeros(_arr.shape[1:], dtype=np.uint8) if (maxiters == 0): code += 1 else: code += (2 * mask_nochange + 4 * mask_nkeep + 8 * mask_maxrej).astype( np.uint8) if irafmode: n_minimum = max(nkeep, ncombine - maxrej) resid = np.abs(_arr - cen) # need this cuz bn.argpartition cannot handle NaN: resid[mask_nan] = _get_dtype_limits(resid.dtype)[1] # max of dtype # after this, resid is guaranteed to have **NO** NaN values. resid_cut = np.max(bn.partition(resid, n_minimum, axis=0)[:n_minimum, ], axis=0) mask[resid <= resid_cut] = False # Note the mask returned here is mask from rejection PROPAGATED with # the input mask. So to extract the pixels masked PURELY from # rejection, you need ``mask_output^mask_input`` because the input # mask is a subset of the output one. return (mask, low, upp, nit, code)
def partsort(a, n): return bn.partition(a, kth=n-1)
def execute(self): percentiles = [i * 10 for i in range(0, 10)] totale = [] for g in self.G.nodes: totale += list(self.matrix[g][self.matrix[g] < 1]) thresholds = np.percentile(totale, percentiles) thresholds = list(thresholds) + [1] print(thresholds) orderedMatrix = [[] for i in range(10000)] max_counts = [0 for i in range(10000)] # mi pare sia corretto for g in self.G.nodes: orderedMatrix[g] = list(which_diff(self.matrix[g])) max_counts[g] = len( orderedMatrix[g]) # ad ogni nodo è associato il max_count max_counts_percentiles = [0 for i in range(10)] for g in self.G.nodes: counts = list(np.histogram(orderedMatrix[g], thresholds)[0]) for i in range(len(thresholds) - 1): if max_counts_percentiles[i] < counts[i]: max_counts_percentiles[i] = counts[i] print(max_counts_percentiles) ordered_percentiles = [[] for i in range(len(thresholds) - 1)] cont = 0 for g in self.G.nodes: indices = list(np.digitize(orderedMatrix[g], thresholds) - 1) cont += 1 for i in range(len(indices)): ordered_percentiles[indices[i]].append(orderedMatrix[g][i]) #print("cont: "+str(cont)) for i in range(len(ordered_percentiles)): ordered_percentiles[i] = bottle.partition( ordered_percentiles[i], max_counts_percentiles[i])[:max_counts_percentiles[i]] #print(ordered_percentiles) counts_k = sorted(max_counts, reverse=True)[0:self.k] best_vectors = [[] for i in range(self.k)] for i in range(len(counts_k)): index_perc = len(thresholds) - 2 best_vectors[i] = np.ones(len(self.samples) - counts_k[i]) remains = counts_k[i] while (remains > 0): if remains > len(ordered_percentiles[index_perc]): best_vectors[i] = np.concatenate( (best_vectors[i], ordered_percentiles[index_perc])) remains -= len(ordered_percentiles[index_perc]) else: best_vectors[i] = np.concatenate( (best_vectors[i], ordered_percentiles[index_perc][0:remains])) remains = 0 result = best_vectors[0] for i in range(1, len(best_vectors)): result = np.multiply(result, best_vectors[i]) score = np.sum(result) score_max = len(self.samples) - score print("MinVersion: " + str(score)) print("MaxVersion: " + str(score_max))
def calc_IA_features(packet_list, filter_con): """ function to calculate inter-arrival times related features """ global prev_packet global IA_times global IA_times_list global device_list global slice_length IA_times_list = [] for i, (packet, dev_name) in enumerate(packet_list): if prev_packet == "": print("No previous packet to calculate inter-arrival time") else: time_gap = packet.time - prev_packet.time IA_times.append(abs(time_gap)) prev_packet = packet yield packet, dev_name IA_times_list.append(IA_times) IA_times = [] prev_packet = "" for i, (data) in enumerate(IA_times_list): data = data[:min(slice_length, len(data) - 1)] min_IAT = min(data) # minimum packet inter-arrival time max_IAT = max(data) # maximum packet inter-arrival time q1_IAT = np.percentile(data, 25) # first quartile of inter-arrival time median_IAT = np.percentile(data, 50) # median of inter-arrival time mean_IAT = np.mean(data) # mean of inter-arrival time q3_IAT = np.percentile(data, 75) # third quartile of inter-arrival time var_IAT = np.var(data) # variance of inter-arrival time iqr_IAT = q3_IAT - q1_IAT # inter quartile range of inter-arrival time feature_list[i].append(min_IAT) feature_list[i].append(max_IAT) feature_list[i].append(q1_IAT) feature_list[i].append(median_IAT) feature_list[i].append(mean_IAT) feature_list[i].append(q3_IAT) feature_list[i].append(var_IAT) feature_list[i].append(iqr_IAT) # FFT calculation for inter-arrival times data = np.array(data[:min(slice_length, len(data) - 1)]) min_len = min( len(data), 10 ) # get 10 fft components or the minimum length of input data to fft fft_data = fft(data) # calculate fft with scipy fft_data = np.abs(fft_data) # get the magnitudes of fft components z = -bottleneck.partition( -fft_data, min_len - 1)[:min_len] # get the max components sorted_fft = np.sort(z) sorted_fft[:] = sorted_fft[:: -1] # sort the fft components from largest to smallest if len( sorted_fft ) < 10: # pad the array with zeros if at least 10 fft components are not there sorted_fft = np.append(sorted_fft, np.zeros(10 - len(sorted_fft))) for fft_val in sorted_fft: feature_list[i].append( fft_val) # append fft values to feature list
def test_transpose(): """partition transpose test""" a = np.arange(12).reshape(4, 3) actual = bn.partition(a.T, 2, -1).T desired = bn.slow.partition(a.T, 2, -1).T assert_equal(actual, desired, "partition transpose test")
def update(self, C): """ we use this function to gain more efficiency in OTDA experiments """ self.C = np.asarray(C, dtype=np.float64) nt = C.shape[0] ns = C.shape[1] self.K = np.exp(-self.C / self.reg) # sum of rows and columns of K K_sum_cols = self.K.sum(axis=1) K_sum_rows = self.K.sum(axis=0) if self.uniform: if ns / self.ns_budget < 4: aK_sort = np.sort(K_sum_cols) epsilon_u_square = self.a[0] / aK_sort[self.ns_budget - 1] else: aK_sort = bottleneck.partition(K_sum_cols, self.ns_budget - 1)[self.ns_budget - 1] epsilon_u_square = self.a[0] / aK_sort if nt / self.nt_budget < 4: bK_sort = np.sort(K_sum_rows) epsilon_v_square = self.b[0] / bK_sort[self.nt_budget - 1] else: bK_sort = bottleneck.partition(K_sum_rows, self.nt_budget - 1)[self.nt_budget - 1] epsilon_v_square = self.b[0] / bK_sort else: aK = self.a / K_sum_cols bK = self.b / K_sum_rows aK_sort = np.sort(aK)[::-1] epsilon_u_square = aK_sort[self.ns_budget - 1] bK_sort = np.sort(bK)[::-1] epsilon_v_square = bK_sort[self.nt_budget - 1] # I, J self.Isel = self.a >= epsilon_u_square * K_sum_cols self.Jsel = self.b >= epsilon_v_square * K_sum_rows if sum(self.Isel) != self.ns_budget: if self.uniform: aK = self.a / K_sum_cols aK_sort = np.sort(aK)[::-1] epsilon_u_square = aK_sort[self.ns_budget - 1:self.ns_budget + 1].mean() self.Isel = self.a >= epsilon_u_square * K_sum_cols self.ns_budget = sum(self.Isel) if sum(self.J) != self.nt_budget: if self.uniform: bK = self.b / K_sum_rows bK_sort = np.sort(bK)[::-1] epsilon_v_square = bK_sort[self.nt_budget - 1:self.nt_budget + 1].mean() self.Jsel = self.b >= epsilon_v_square * K_sum_rows self.nt_budget = sum(self.Jsel) self.epsilon = (epsilon_u_square * epsilon_v_square)**(1 / 4) self.fact_scale = (epsilon_v_square / epsilon_u_square)**(1 / 2) # Ic, Jc self.Ic = ~self.Isel self.Jc = ~self.Jsel # K self.K_IJ = self.K[np.ix_(self.Isel, self.Jsel)] self.K_IcJ = self.K[np.ix_(self.Ic, self.Jsel)] self.K_IJc = self.K[np.ix_(self.Isel, self.Jc)] K_min = self.K_IJ.min() if K_min == 0: K_min = np.finfo(float).tiny # a_I,b_J,a_Ic,b_Jc self.a_I = self.a[self.Isel] self.b_J = self.b[self.Jsel] if not self.uniform: self.a_I_min = self.a_I.min() self.a_I_max = self.a_I.max() self.b_J_max = self.b_J.max() self.b_J_min = self.b_J.min() else: self.a_I_min = self.a_I[0] self.a_I_max = self.a_I[0] self.b_J_max = self.b_J[0] self.b_J_min = self.b_J[0] # box constraints in LBFGS solver (see Proposition 1 in the paper) self.bounds_u = [(max(self.a_I_min / (self.epsilon * (nt - self.nt_budget) \ + self.nt_budget * (self.b_J_max / (self.epsilon * self.fact_scale * ns * K_min))), \ self.epsilon / self.fact_scale), \ self.a_I_max / (self.epsilon * nt * K_min))] * self.ns_budget self.bounds_v = [(max(self.b_J_min / (self.epsilon * (ns - self.ns_budget) \ + self.ns_budget * (self.fact_scale * self.a_I_max / (self.epsilon * nt * K_min))), \ self.epsilon * self.fact_scale), \ self.b_J_max / (self.epsilon * ns * K_min))] * self.nt_budget self.vec_eps_IJc = self.epsilon * self.fact_scale \ * (self.K_IJc * np.ones(nt-self.nt_budget).reshape((1, -1))).sum(axis=1) self.vec_eps_IcJ = (self.epsilon / self.fact_scale) \ * (np.ones(ns-self.ns_budget).reshape((-1, 1)) * self.K_IcJ).sum(axis=0) # pre-calculed constans for restricted Sinkhron if self.ns_budget != ns or self.ns_budget != nt: self.cst_u = self.fact_scale * self.epsilon * self.K_IJc.sum( axis=1) self.cst_v = self.epsilon * self.K_IcJ.sum( axis=0) / self.fact_scale if not self.one_init: u0 = np.full(self.ns_budget, (1. / self.ns_budget) + self.epsilon / self.fact_scale) v0 = np.full(self.nt_budget, (1. / self.nt_budget) + self.epsilon * self.fact_scale) else: u0 = np.full(self.ns_budget, 1.) v0 = np.full(self.nt_budget, 1.) if self.restricted: self.u0, self.v0 = self._restricted_sinkhorn(u0, v0, max_iter=5) else: self.u0 = u0 self.v0 = v0