Esempio n. 1
0
def _minmax(
        arr, mask=None, q_low=0, q_upp=0, cenfunc='median'
):
    # General setup (nkeep and maxrej as dummy)
    _arr, _masks, _, cenfunc, _nvals = _setup_reject(
        arr=arr, mask=mask, nkeep=1, maxrej=None, cenfunc=cenfunc
    )
    # mask == input_mask | ~isfinite
    mask, _, _, mask_skiprej = _masks  # nkeep and maxrej not used in MINMAX.
    _, ncombine, n_old = _nvals  # nit is not used in MINMAX.

    # adding 0.001 following IRAF
    n_rej_low = (n_old * q_low + 0.001).astype(n_old.dtype)
    n_rej_upp = (n_old * q_upp + 0.001).astype(n_old.dtype)
    n_low = np.max(n_rej_low)  # only ~ 0.1 ms for 1k x 1k array of int
    n_upp = np.max(n_rej_upp)

    dmin, dmax = _get_dtype_limits(_arr.dtype)
    # remove lower values
    _arr[mask] = dmax  # replace with largest value
    low = np.max(bn.partition(_arr, kth=n_low, axis=0)[:n_low, ], axis=0)
    # remove upper values
    _arr[mask] = dmin  # replace with lowest values
    upp = np.max(-bn.partition(-_arr, kth=n_upp, axis=0)[:n_upp, ], axis=0)

    # propagate with rejection mask
    mask |= (_arr < low) | (upp < _arr)

    code = np.zeros(_arr.shape[1:], dtype=np.uint8)
    no_rej = (n_rej_low == 0) | (n_rej_upp == 0)
    # code +=

    return (mask, low, upp, nit, code)
Esempio n. 2
0
    def run_query(self, query, pooling='mean', n=10):
        if self.corpus is None:
            raise AttributeError(
                'Model not built yet, please call the fit method before running queries!'
            )

        assert type(query) == str

        similarities = []
        query_embedding = BERT_sentence_embeddings(query, query=True)

        for item in self.corpus_sent_emb:
            sent_sims = np.dot(item, query_embedding.T)

            if pooling == 'top2':
                if len(item) > 2:
                    similarities.append(
                        np.mean(-bn.partition(-sent_sims, kth=2, axis=0)[:2],
                                axis=0))
                else:
                    similarities.append(np.mean(sent_sims, axis=0))

            elif pooling == 'max':
                similarities.append(np.amax(sent_sims, axis=0))

            elif pooling == 'mean':
                similarities.append(np.mean(sent_sims, axis=0))

        similarities = np.squeeze(np.array(similarities))

        return self.__create_query_result(query, similarities, n)
def mean_rrank_at_k_batch(train_data,
                          heldout_data,
                          Et,
                          Eb,
                          user_idx,
                          k=5,
                          mu=None,
                          vad_data=None):
    '''
    mean reciprocal rank@k: For each user, make predictions and rank for
    all the items. Then calculate the mean reciprocal rank for the top K that
    are in the held-out set.
    '''
    batch_users = user_idx.stop - user_idx.start

    X_pred = _make_prediction(train_data,
                              Et,
                              Eb,
                              user_idx,
                              batch_users,
                              mu=mu,
                              vad_data=vad_data)
    all_rrank = 1. / (np.argsort(np.argsort(-X_pred, axis=1), axis=1) + 1)
    X_true_binary = (heldout_data[user_idx] > 0).toarray()

    heldout_rrank = X_true_binary * all_rrank
    top_k = bn.partition(-heldout_rrank, k, axis=1)
    return -top_k[:, :k].mean(axis=1)
    def convertToNumber(self, nparr):
        if self.ngrams < 2:
            byte_int = np.zeros(256)
            for row in nparr:
                try:
                    int_val = int(row, 16)
                except:
                    int_val = random.randint(0, 254)

                byte_int[int_val] += 1
        else:

            arrsize = math.pow(256, self.ngrams)
            byte_int = np.zeros(int(arrsize))
            for row in nparr:
                row = row.replace(" ", "")
                try:
                    int_val = int(row, 16)
                except:
                    int_val = random.randint(0, 254)

                byte_int[int_val] += 1
            byte_int = -bottleneck.partition(-byte_int, 2000)[:2000]

        return byte_int
Esempio n. 5
0
 def _get_object_influence(self, dist_mat, idx):
     k_nearest_idx = np.argpartition(dist_mat[:, idx],
                                     range(self.n_neighbors +
                                           1))[1:self.n_neighbors + 1]
     k_nearest_dist = bn.partition(dist_mat[:, idx], self.n_neighbors)
     local_density = 1 / k_nearest_dist[self.n_neighbors]
     knn_dist = sum(k_nearest_dist[1:self.n_neighbors + 1])
     return np.array([k_nearest_idx, local_density, knn_dist], dtype=object)
def mean_over_k_largest(vector, k):
    '''Return the mean over the k largest values of a vector'''
    if k == 0:
        return 0

    if k <= len(vector):
        return vector.sum() / len(vector)

    z = -bottleneck.partition(-vector, kth = k)
    return z.sum() / k
    def update(self, arr):
        self.data = np.append(self.data, arr[:self.window_size])

        if len(self.data) > self.window_size:
            arr_pre = self.data[:self.window_size]
            self.data = arr[-(self.window_size - 1):]

            top10_arr = -bn.partition(-arr_pre, 10)[:10]
            self.threshold = np.average(top10_arr) / 2.6
            self.history = np.append(self.history, self.threshold)
            if len(self.history) > 10:
                self.history = self.history[1:]
def updateBestVector(self, v):
    G = self.G
    max = 0

    max_percentiles = [0 for i in range(len(self.thresholds))]
    ordered_percentiles = [[] for i in range(len(self.thresholds))]

    for u in G.neighbors(v):
        if max < self.max_counts[u]:
            max = self.max_counts[u]
        indices = np.digitize(self.orderedMatrix[u], self.thresholds) - 1

        for i in range(len(self.thresholds) - 1):
            if max_percentiles[i] < self.max_counts_percentiles[u][i]:
                max_percentiles[i] = self.max_counts_percentiles[u][i]
            ordered_percentiles[i] = np.concatenate(
                (ordered_percentiles[i], self.orderedMatrix[u][indices == i]))

    for i in range(len(self.thresholds) - 1):
        length = min(len(ordered_percentiles[i]), max_percentiles[i])
        if length == 0:
            ordered_percentiles[i] = []
        else:
            ordered_percentiles[i] = bottle.partition(ordered_percentiles[i],
                                                      length - 1)
            ordered_percentiles[i] = ordered_percentiles[i][0:length]

    remains = max
    index_perc = 0

    #print("max: "+str(max))
    #print("max_percentiles"+str(max_percentiles))
    #print("ordered_percentiles: "+str(ordered_percentiles))

    best_vector = np.asarray([])
    while (remains > 0):
        #print(remains)
        if remains > len(ordered_percentiles[index_perc]):
            best_vector = np.concatenate(
                (best_vector, ordered_percentiles[index_perc]))
            remains -= len(ordered_percentiles[index_perc])
        else:
            best_vector = np.concatenate(
                (best_vector, ordered_percentiles[index_perc][0:remains]))
            remains = 0
        index_perc += 1
    return best_vector
def check_trivial_answer(n_assets, mu, EST, return_matrix):
    max_index = np.argmax(mu)
    w = np.zeros(n_assets)
    for i in range(n_assets):
        if i != max_index:
            w[i] = 0
        else:
            w[i] = 1

    w_v = w @ return_matrix
    length = int(np.floor(alpha * len(w_v))) - 1
    ESt = bn.partition(w_v, kth=length)
    ESt = np.mean(ESt[0:length])

    if (ESt / EST < 1):
        return max_index
    else:
        return -1
Esempio n. 10
0
    def step(self, state):
        ''' Predict the action given the curent state in gerenerating training data.

        Args:
            state (dict): An dictionary that represents the current state

        Returns:
            action (int): The action predicted (randomly chosen) by the random agent
        '''

        current_hand = state['raw_obs']['current_hand']
        legal_actions = state['legal_actions']

        # # check
        # if len(state['raw_obs']['trace']) >= 2:
        #     if state['raw_obs']['trace'][-1][1] == 'pass' and state['raw_obs']['trace'][-2][1] == 'pass':
        #         if set(legal_actions) != PaodekuaiJudger.playable_cards_from_hand(current_hand):
        #             print(legal_actions)
        #             print(current_hand)
        #             raise ValueError('Error')

        # Win the game if possible
        if current_hand in legal_actions:
            return current_hand

        # If no choice, e.g. ['pass']
        if len(legal_actions) == 1:
            return legal_actions[0]

        # set the model to evaluation mode, otherwise the output would be wrong
        with torch.no_grad():
            self.model.eval()
            obs = torch.FloatTensor(state['obs']).reshape(-1, 6, 4, 13)
            prediction = self.model(obs).view(-1, 1, 4, 13)

            if self.generate_data:
                # return choices by prob
                softmax = nn.Softmax(dim=0)
                tensor_cards = torch.FloatTensor([cards_encode_tensor(cards) for cards in legal_actions]).view(-1, 1, 4, 13)
                inner_product = torch.FloatTensor([(prediction * cards).sum() for cards in tensor_cards])
                #
                similarity = softmax(inner_product).numpy()
                top_cards_idx = bottleneck.argpartition(-similarity, 1)[:2]
                top_cards_prob = -bottleneck.partition(-similarity, 1)[:2]
                top_cards_prob = top_cards_prob/sum(top_cards_prob)
                return np.random.choice(np.array(legal_actions)[top_cards_idx], p=top_cards_prob)

            elif not self.entropy:
                # SL card: select the nearest card to the predicted tensor
                # use similarity = inner product
                choice = ''
                similarity = -1
                for cards in legal_actions:
                    tensor_card = torch.FloatTensor(cards_encode_tensor(cards)).reshape(-1, 1, 4, 13)
                    new_sim = (prediction * tensor_card).sum()
                    if new_sim > similarity:
                        choice = cards
                        similarity = new_sim
                return choice
            else:
                choice = ''
                loss = 100000
                for cards in legal_actions:
                    tensor_card = torch.FloatTensor(cards_encode_tensor(cards)).reshape(-1, 1, 4, 13)
                    new_loss = loss_function(prediction, tensor_card)
                    if new_loss < loss:
                        choice = cards
                        loss = new_loss
                return choice
Esempio n. 11
0
a = np.asarray([0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 13, 34])
thresholds = np.asarray([0, 0.000001, 3, 7, 29, 50])
indices = np.digitize(a, thresholds) - 1
print(indices)
lista = []
print(indices == 1)

lista.append(a[indices == 1])
print(lista)
raise ValueError

a = np.random.rand(100000000) * 10
#a=list(range(90,100))+list(range(40,50))+list(range(0,5))+list(range(10,19))
#a=np.asarray(a)
#print(a)
print()
t_start = time.time()
b = bottle.partition(a, 10)[:10]
b = np.sort(b)
t_end = time.time()
print("Elapsed time: ", time.strftime("%H:%M:%S",
                                      time.gmtime(t_end - t_start)))
print(b)
print()
t_start = time.time()
#print(np.sort(a)[:1])
print(np.min(a))
t_end = time.time()
print("Elapsed time: ", time.strftime("%H:%M:%S",
                                      time.gmtime(t_end - t_start)))
eem_data = read_data('EEM.csv')
dbc_data = read_data('DBC.csv')
dbv_data = read_data('DBV.csv')

spy_2008_data = read_data('SPY_2008.csv')

spy_2008_closing = list(spy_2008_data.values())

spy_2008_return = np.zeros(len(spy_2008_closing) - 1)
for i in range(len(spy_2008_closing) - 1):
    spy_2008_return[i] = (float(spy_2008_closing[i + 1]) - float(
        spy_2008_closing[i])) / float(spy_2008_closing[i])

sigma_2008 = np.std(spy_2008_return)
VaR = int(np.floor(alpha * len(spy_2008_return)))
EST_2008 = bn.partition(spy_2008_return, kth=VaR - 1)
EST_2008 = np.mean(EST_2008[0:VaR - 1])

list_of_data = [spy_data, agg_data, gld_data, eem_data, dbc_data, dbv_data]
#list_of_data = [agg_data,eem_data]
assets = process_data(list_of_data)
assets = np.array(assets)
assets = assets.astype(np.float)
n_assets = assets.shape[0]
n_obs = assets.shape[1]

return_matrix = np.zeros((n_assets, n_obs - 1))
for i in range(n_assets):
    for j in range(n_obs - 1):
        return_matrix[i, j] = (assets[i][j + 1] - assets[i][j]) / assets[i][j]
Esempio n. 13
0
def _iter_rej(
        arr, mask=None, sigma_lower=3., sigma_upper=3., maxiters=5, ddof=0,
        nkeep=3, maxrej=None, cenfunc='median', ccdclip=False, irafmode=True,
        rdnoise_ref=0., snoise_ref=0., scale_ref=1, zero_ref=0
):
    """ The common function for iterative rejection algorithms.

    Parameters
    ----------
    arr : ndarray
        The array to find the mask. It must be gain-corrected if
        ``ccdclip=True``.

    rdnoise_ref, snoise_ref : float
        The representative readnoise and sensitivity noise to estimate
        the error-bar for ``ccdclip=True``.

    scale_ref, zero_ref : float
        The representative scaling and zeroing value to estimate the
        error-bar for ``ccdclip=True``.
    """
    def __calc_censtd(_arr):
        # most are defined in upper _iter_rej function
        cen = cenfunc(_arr, axis=0)
        if ccdclip:  # use abs(pix value) to avoid NaN from negative pixels.
            std = np.sqrt(
                ((1 + snoise_ref)*np.abs(cen + zero_ref)*scale_ref)
                + rdnoise_ref**2
            )
            # restore zeroing & scaling ; then add rdnoise
        else:
            std = bn.nanstd(_arr, axis=0, ddof=ddof)

        return cen, std

    # General setup
    _arr, _masks, keeprej, cenfunc, _nvals, lowupp = _setup_reject(
        arr=arr, mask=mask, nkeep=nkeep, maxrej=maxrej, cenfunc=cenfunc
    )
    mask_nan, mask_nkeep, mask_maxrej, mask_pix = _masks
    nkeep, maxrej = keeprej
    nit, ncombine, n_finite_old = _nvals
    low, upp, low_new, upp_new = lowupp

    nrej = ncombine - n_finite_old
    k = 0
    # mask_pix is where **NO** rejection should occur.
    if (nkeep == 0) and (maxrej == ncombine):
        print("nkeep, maxrej turned off.")
        # no need to check mask_pix iteratively
        while k < maxiters:
            cen, std = __calc_censtd(_arr=_arr)
            low_new[~mask_pix] = (cen - sigma_lower*std)[~mask_pix]
            upp_new[~mask_pix] = (cen + sigma_upper*std)[~mask_pix]

            # In numpy, > or < automatically applies along axis=0!!
            mask_bound = (_arr < low_new) | (_arr > upp_new) | ~np.isfinite(_arr)
            _arr[mask_bound] = np.nan

            n_finite_new = ncombine - np.count_nonzero(mask_bound, axis=0)
            n_change = n_finite_old - n_finite_new
            total_change = np.sum(n_change)

            mask_nochange = (n_change == 0)  # identical to say "max-iter reached"

            # no need to backup
            if total_change == 0:
                break

            # I put the test below because I thought it will be quicker
            # to halt clipping if all pixels are masked. But now I feel
            # testing this in every iteration is an unnecessary overhead
            # for "nearly impossible" situation.
            # - ysBach (2020-10-14 21:15:44 (KST: GMT+09:00))
            # if np.all(mask_pix):
            #     break

            # update only non-masked pixels
            nrej[~mask_pix] = n_change[~mask_pix]
            # update only changed pixels
            nit[~mask_nochange] += 1
            k += 1
            n_finite_old = n_finite_new

    else:
        while k < maxiters:
            cen, std = __calc_censtd(_arr=_arr)
            low_new[~mask_pix] = (cen - sigma_lower*std)[~mask_pix]
            upp_new[~mask_pix] = (cen + sigma_upper*std)[~mask_pix]

            # In numpy, > or < automatically applies along axis=0!!
            mask_bound = (_arr < low_new) | (_arr > upp_new) | ~np.isfinite(_arr)
            _arr[mask_bound] = np.nan

            n_finite_new = ncombine - np.count_nonzero(mask_bound, axis=0)
            n_change = n_finite_old - n_finite_new
            total_change = np.sum(n_change)

            mask_nochange = (n_change == 0)  # identical to say "max-iter reached"
            mask_nkeep = ((ncombine - nrej) < nkeep)
            mask_maxrej = (nrej > maxrej)

            # mask pixel position if any of these happened.
            # Including mask_nochange here will not change results but only
            # spend more time.
            mask_pix = mask_nkeep | mask_maxrej

            # revert to the previous ones if masked.
            # By doing this, pixels which was mask_nkeep now, e.g., will
            # again be True in mask_nkeep in the next iter but unchanged.
            # This should be done at every iteration (unfortunately)
            # because, e.g., if nkeep is very large, excessive rejection may
            # happen for many times, and the restoration CANNOT be done
            # after all the iterations.
            low_new[mask_pix] = low[mask_pix].copy()
            upp_new[mask_pix] = upp[mask_pix].copy()
            low = low_new
            upp = upp_new

            if total_change == 0:
                break

            # I put the test below because I thought it will be quicker
            # to halt clipping if all pixels are masked. But now I feel
            # testing this in every iteration is an unnecessary overhead
            # for "nearly impossible" situation.
            # - ysBach (2020-10-14 21:15:44 (KST: GMT+09:00))
            # if np.all(mask_pix):
            #     break

            # update only non-masked pixels
            nrej[~mask_pix] = n_change[~mask_pix]
            # update only changed pixels
            nit[~mask_nochange] += 1
            k += 1
            n_finite_old = n_finite_new

    mask = mask_nan | (arr < low_new) | (arr > upp_new)

    code = np.zeros(_arr.shape[1:], dtype=np.uint8)
    if (maxiters == 0):
        code += 1
    else:
        code += (2*mask_nochange + 4*mask_nkeep
                 + 8*mask_maxrej).astype(np.uint8)

    if irafmode:
        n_minimum = max(nkeep, ncombine - maxrej)
        if n_minimum > 0:
            try:
                resid = np.abs(_arr - cen)
            except UnboundLocalError:  # cen undefined when maxiters=0
                resid = np.abs(_arr - cenfunc(_arr, axis=0))
            # need this cuz bn.argpartition cannot handle NaN:
            resid[np.isnan(resid)] = _get_dtype_limits(resid.dtype)[1]
            # ^ replace with max of dtype
            # after this, resid is guaranteed to have **NO** NaN values.

            resid_cut = np.max(
                bn.partition(resid, n_minimum, axis=0)[:n_minimum, ],
                axis=0
            )
            mask[resid <= resid_cut] = False

    # Note the mask returned here is mask from rejection PROPAGATED with
    # the input mask. So to extract the pixels masked PURELY from
    # rejection, you need ``mask_output^mask_input`` because the input
    # mask is a subset of the output one.

    return (mask, low, upp, nit, code)
Esempio n. 14
0
                  (scan_number, particle_number))

            # --- GET SCAN DATA & INITIALIZE PROCESSED FILE ---
            datafile = scan_analyzer.getScanDataSet(data, scan_number,
                                                    particle_number)
            processedData = h5py.File(
                processed_filepath + "IntensityAnalysisData.hdf5", "a")
            pdata = processedData.create_group('scan%s/particle%s' %
                                               (scan_number, particle_number))

            # --- INFINITY 3 FIRST IMAGE ---
            infinity3_maxima_to_average = 10
            infinity3_image = np.array(
                datafile['Infinity3_First_Processed_Image'])
            infinity3_z = -bottleneck.partition(
                -infinity3_image.flatten(),
                infinity3_maxima_to_average)[:infinity3_maxima_to_average]
            infinity3_averaged_maxima_list.append(np.mean(infinity3_z))

            # --- RAMAN WHITE LIGHT IMAGE ---
            white0order_maxima_to_average = 10
            white0order_image = np.array(
                datafile['Raman_White_Light_0Order_Processed_Image'])
            white0order_z = -bottleneck.partition(
                -white0order_image.flatten(),
                white0order_maxima_to_average)[:white0order_maxima_to_average]
            white0order_averaged_maxima_list.append(np.mean(white0order_z))

            # --- RAMAN LASER LIGHT IMAGE ---
            laser0order_maxima_to_average = 10
            laser0order_image = np.array(
Esempio n. 15
0
    def _get_kernel(self, X, Y=None, nystroem_kernel=False):
        X, Y = check_pairwise_arrays(X, Y)
        if nystroem_kernel:  ##Cannot use self.nytroem since kernel needs also be computable for full data for prediction when Nystroem sampling is used
            if self.component_indices is None:
                rnd = check_random_state(self.random_state)
                n_samples = X.shape[0]
                # get basis vectors
                if self.n_components > n_samples:
                    # XXX should we just bail?
                    n_components = n_samples
                    warnings.warn(
                        "n_components > n_samples. This is not possible.\n"
                        "n_components was set to n_samples, which results"
                        " in inefficient evaluation of the full kernel.")
                else:
                    n_components = self.n_components
                n_components = min(n_samples, n_components)
                self.component_indices = rnd.permutation(
                    n_samples)[:n_components]
            X = X[self.component_indices].copy()
            d = euclidean_distances(X, X)
        else:
            d = euclidean_distances(X, Y)
        ##Get n_neighbors largest element to find range if not given
        if (self.theta is None):
            if (self.n_neighbors == "inf") | (
                    self.n_neighbors == np.inf
            ):  ##special case: chose theta such that it equals the average distance to the farest neighbor
                self.n_neighbors = X.shape[0] - 1
                self.range_adjust = 1.
            if (not self.prctg_neighbors is None) & (self.n_neighbors is None):
                self.n_neighbors = int(X.shape[0] * self.prctg_neighbors)
            if not self.n_neighbors is None:
                if self.kernel == "GW":  ##Choose theta such that on average every point has n_neighbors non-zero entries
                    ds = d.flatten()
                    ds = ds[~(ds == 0)]  ##Remove diagonal
                    self.theta = bn.partition(
                        ds, d.shape[0] * self.n_neighbors -
                        1)[d.shape[0] * self.n_neighbors - 1]
                else:  ##Choose theta as average distance to n_neighbors'th nearest neighbor
                    kdt = scipy.spatial.cKDTree(X)
                    dists, neighs = kdt.query(
                        X, self.n_neighbors + 1
                    )  ##get distance to n_neighbors+1 nearest neighbors (incl. point itself)
                    self.theta = np.mean(
                        dists[:, self.n_neighbors]
                    )  ##calculate average distance to n_neighbors'th nearest neighbor (only true neighbors excl. point itself)
            if self.kernel == "rbf":
                self.theta = self.theta / (
                    self.range_adjust**0.5
                )  ##range_adjust=3 (4.6) correlation should drop to 5% (1%) at distance = theta
            if self.kernel == "laplace":
                self.theta = self.theta / self.range_adjust
            print("Chosen theta: " + str(round(self.theta, 4)))
        if self.kernel == "GW":
            d *= -1. / self.theta
            d2 = d.copy()
            d += 1.
            d[d < 0] = 0
            d *= d
            d2 *= -2
            d2 += 1
            d *= d2
            ##Above code does the same as below:

    #        tmp=1-d/self.theta
    #        tmp[tmp<0]=0
    #        d=tmp**2*(1+2*d/self.theta)
        if self.kernel == "rbf":
            ##np.exp(-(d/self.theta)**2)
            d *= (1. / self.theta)
            d *= -d
            np.exp(d, d)
        if self.kernel == "laplace":
            ##np.exp(-d/self.theta)
            d *= (-1. / self.theta)
            np.exp(d, d)
        if self.sparse:
            #            print("Sparsity ratio: " +str(round(float(100*np.sum(d>0))/X.shape[0]/X.shape[0],2))+"%")
            return csc_matrix(d)
        else:
            return d
Esempio n. 16
0
def top_k_bottleneck(ndarr, k=10):
    return bn.partition(ndarr, ndarr.size - k)[-k:]
Esempio n. 17
0
def test_transpose():
    "partition transpose test"
    a = np.arange(12).reshape(4, 3)
    actual = bn.partition(a.T, 2, -1).T
    desired = bn.slow.partition(a.T, 2, -1).T
    assert_equal(actual, desired, 'partition transpose test')
Esempio n. 18
0
 def time_partition(self, dtype, shape):
     bn.partition(self.arr, self.half)
Esempio n. 19
0
    def __init__(self,
                 a,
                 b,
                 C,
                 reg,
                 ns_budget=None,
                 nt_budget=None,
                 uniform=False,
                 restricted=True,
                 one_init=False,
                 maxiter=10000,
                 maxfun=10000,
                 pgtol=1e-09,
                 verbose=True,
                 log=False):

        # check if bottleneck module exists
        try:
            import bottleneck
        except ImportError:
            warnings.warn(
                "Bottleneck module is not installed. Install it from https://pypi.org/project/Bottleneck/ for better performance."
            )
            bottleneck = np

        # time
        tic_initial = time()

        self.a = np.asarray(a, dtype=np.float64)
        self.b = np.asarray(b, dtype=np.float64)

        # if autograd package is used, we then have to change
        # some arrays from "ArrayBox" type to "np.array".
        if isinstance(C, np.ndarray) == False:
            C = C._value

        self.C = np.asarray(C, dtype=np.float64)
        self.reg = reg
        ns = C.shape[0]
        nt = C.shape[1]
        self.ns_budget = ns_budget
        self.nt_budget = nt_budget
        self.verbose = verbose
        self.uniform = uniform
        self.restricted = restricted
        self.maxiter = maxiter
        self.maxfun = maxfun
        self.pgtol = pgtol
        self.one_init = one_init
        self.log = log

        # by default, we keep only 50% of the sample data points
        if self.ns_budget is None:
            self.ns_budget = int(np.floor(0.5 * ns))
        if self.nt_budget is None:
            self.nt_budget = int(np.floor(0.5 * nt))

        # calculate the Gibbs kernel K
        self.K = np.empty_like(self.C)
        np.divide(self.C, -self.reg, out=self.K)
        np.exp(self.K, out=self.K)

        # screening test (see Lemma 1 in the paper)

        ## full number of budget points (ns, nt) = (ns_budget, nt_budget)
        if self.ns_budget == ns and self.nt_budget == nt:
            # I, J
            self.Isel = np.ones(ns, dtype=bool)
            self.Jsel = np.ones(nt, dtype=bool)
            # epsilon
            self.epsilon = 0.0
            # kappa
            self.fact_scale = 1.0
            # restricted Sinkhron
            self.cst_u = 0.
            self.cst_v = 0.
            # box constraints in LBFGS
            self.bounds_u = [(0.0, np.inf)] * ns
            self.bounds_v = [(0.0, np.inf)] * nt
            #
            self.K_IJ = self.K
            self.a_I = self.a
            self.b_J = self.b
            self.K_IJc = []
            self.K_IcJ = []

        else:
            # sum of rows and columns of K
            K_sum_cols = self.K.sum(axis=1)
            K_sum_rows = self.K.sum(axis=0)

            if self.uniform:
                if ns / self.ns_budget < 4:
                    aK_sort = np.sort(K_sum_cols)
                    epsilon_u_square = a[0] / aK_sort[self.ns_budget - 1]
                else:
                    aK_sort = bottleneck.partition(K_sum_cols, self.ns_budget -
                                                   1)[self.ns_budget - 1]
                    epsilon_u_square = a[0] / aK_sort

                if nt / self.nt_budget < 4:
                    bK_sort = np.sort(K_sum_rows)
                    epsilon_v_square = b[0] / bK_sort[self.nt_budget - 1]
                else:
                    bK_sort = bottleneck.partition(K_sum_rows, self.nt_budget -
                                                   1)[self.nt_budget - 1]
                    epsilon_v_square = b[0] / bK_sort
            else:
                aK = a / K_sum_cols
                bK = b / K_sum_rows

                aK_sort = np.sort(aK)[::-1]
                epsilon_u_square = aK_sort[self.ns_budget - 1]

                bK_sort = np.sort(bK)[::-1]
                epsilon_v_square = bK_sort[self.nt_budget - 1]

            # I, J
            self.Isel = self.a >= epsilon_u_square * K_sum_cols
            self.Jsel = self.b >= epsilon_v_square * K_sum_rows

            if sum(self.Isel) != self.ns_budget:
                print("test error", sum(self.Isel), self.ns_budget)
                if self.uniform:
                    aK = a / K_sum_cols
                    aK_sort = np.sort(aK)[::-1]
                epsilon_u_square = aK_sort[self.ns_budget - 1:self.ns_budget +
                                           1].mean()
                self.Isel = self.a >= epsilon_u_square * K_sum_cols
                self.ns_budget = sum(self.Isel)

            if sum(self.Jsel) != self.nt_budget:
                print("test error", sum(self.Jsel), self.nt_budget)
                if self.uniform:
                    bK = b / K_sum_rows
                    bK_sort = np.sort(bK)[::-1]
                epsilon_v_square = bK_sort[self.nt_budget - 1:self.nt_budget +
                                           1].mean()
                self.Jsel = self.b >= epsilon_v_square * K_sum_rows
                self.nt_budget = sum(self.Jsel)

            # epsilon, kappa
            self.epsilon = (epsilon_u_square * epsilon_v_square)**(1 / 4)
            self.fact_scale = (epsilon_v_square / epsilon_u_square)**(1 / 2)

            if self.verbose:
                print("epsilon = %s\n" % self.epsilon)
                print("kappa = %s\n" % self.fact_scale)
                print(
                    'Cardinality of selected points: |Isel| = %s \t |Jsel| = %s \n'
                    % (sum(self.Isel), sum(self.Jsel)))

            # Ic, Jc: complementary sets of I and J
            self.Ic = ~self.Isel
            self.Jc = ~self.Jsel

            # K
            self.K_IJ = self.K[np.ix_(self.Isel, self.Jsel)]
            self.K_IcJ = self.K[np.ix_(self.Ic, self.Jsel)]
            self.K_IJc = self.K[np.ix_(self.Isel, self.Jc)]
            K_min = self.K_IJ.min()
            if K_min == 0:
                K_min = np.finfo(float).tiny

            # a_I, b_J, a_Ic, b_Jc
            self.a_I = self.a[self.Isel]
            self.b_J = self.b[self.Jsel]
            if not self.uniform:
                self.a_I_min = self.a_I.min()
                self.a_I_max = self.a_I.max()
                self.b_J_max = self.b_J.max()
                self.b_J_min = self.b_J.min()
            else:
                self.a_I_min = self.a_I[0]
                self.a_I_max = self.a_I[0]
                self.b_J_max = self.b_J[0]
                self.b_J_min = self.b_J[0]

            # box constraints in L-BFGS-B (see Proposition 1 in the paper)
            self.bounds_u = [(max(self.a_I_min / (self.epsilon * (nt - self.nt_budget) \
                                                    + self.nt_budget * (self.b_J_max / (self.epsilon *  self.fact_scale * ns * K_min))), \
                                  self.epsilon / self.fact_scale), \
                              self.a_I_max / (self.epsilon * nt * K_min))] * self.ns_budget

            self.bounds_v = [(max(self.b_J_min / (self.epsilon * (ns - self.ns_budget) \
                                                    + self.ns_budget * (self.fact_scale * self.a_I_max / (self.epsilon * nt * K_min))), \
                                  self.epsilon * self.fact_scale), \
                              self.b_J_max / (self.epsilon * ns * K_min))] * self.nt_budget

        # constants in the objective function of the screened Sinkhorn divergence
        self.vec_eps_IJc = self.epsilon * self.fact_scale \
                           * (self.K_IJc * np.ones(nt - self.nt_budget).reshape((1, -1))).sum(axis=1)
        self.vec_eps_IcJ = (self.epsilon / self.fact_scale) \
                           * (np.ones(ns - self.ns_budget).reshape((-1, 1)) * self.K_IcJ).sum(axis=0)

        # restricted-Sinkhron
        if self.ns_budget != ns or self.ns_budget != nt:
            self.cst_u = self.fact_scale * self.epsilon * self.K_IJc.sum(
                axis=1)
            self.cst_v = self.epsilon * self.K_IcJ.sum(
                axis=0) / self.fact_scale

        if not self.one_init:
            u0 = np.full(self.ns_budget, (1. / self.ns_budget) +
                         self.epsilon / self.fact_scale)
            v0 = np.full(self.nt_budget, (1. / self.nt_budget) +
                         self.epsilon * self.fact_scale)
        else:
            print('one initialization')
            u0 = np.full(self.ns_budget, 1.)
            v0 = np.full(self.nt_budget, 1.)

        if self.restricted:
            self.u0, self.v0 = self._restricted_sinkhorn(u0, v0, max_iter=5)
        else:
            print('no restricted')
            self.u0 = u0
            self.v0 = v0

        self.toc_initial = time() - tic_initial
        if self.verbose:
            print('time of initialization: %s' % self.toc_initial)
Esempio n. 20
0
def _iter_rej(arr,
              mask=None,
              sigma_lower=3.,
              sigma_upper=3.,
              maxiters=5,
              ddof=0,
              nkeep=3,
              maxrej=None,
              cenfunc='median',
              ccdclip=False,
              irafmode=True,
              rdnoise_ref=0.,
              snoise_ref=0.,
              scale_ref=1,
              zero_ref=0):
    """ The common function for iterative rejection algorithms.

    Parameters
    ----------
    arr : ndarray
        The array to find the mask. It must be gain-corrected if
        ``ccdclip=True``.

    rdnoise_ref, snoise_ref : float
        The representative readnoise and sensitivity noise to estimate
        the error-bar for ``ccdclip=True``.

    scale_ref, zero_ref : float
        The representative scaling and zeroing value to estimate the
        error-bar for ``ccdclip=True``.
    """
    # General setup
    _arr, _masks, keeprej, cenfunc, _nvals, lowupp = _setup_reject(
        arr=arr, mask=mask, nkeep=nkeep, maxrej=maxrej, cenfunc=cenfunc)
    mask_nan, mask_nkeep, mask_maxrej, mask_pix = _masks
    nkeep, maxrej = keeprej
    nit, ncombine, n_finite_old = _nvals
    low, upp, low_new, upp_new = lowupp

    nrej = ncombine - n_finite_old  # same as nrej_old at the moment
    k = 0
    # mask_pix is where **NO** rejection should occur.
    while k < maxiters:
        if ccdclip:
            cen = cenfunc(_arr, axis=0)
            # use absolute of cen to avoid NaN from negative pixels.
            std = np.sqrt(
                np.abs((1 + snoise_ref) * (cen + zero_ref) *
                       scale_ref)  # restore zeroing & scaling
                + rdnoise_ref**2)
        else:
            cen = cenfunc(_arr, axis=0)
            std = bn.nanstd(_arr, axis=0, ddof=ddof)
        low_new[~mask_pix] = (cen - sigma_lower * std)[~mask_pix]
        upp_new[~mask_pix] = (cen + sigma_upper * std)[~mask_pix]

        # In numpy, > or < automatically applies along axis=0!!
        mask_bound = (_arr < low_new) | (_arr > upp_new) | ~np.isfinite(_arr)
        _arr[mask_bound] = np.nan

        n_finite_new = ncombine - np.count_nonzero(mask_bound, axis=0)
        n_change = n_finite_old - n_finite_new
        total_change = np.sum(n_change)

        mask_nochange = (n_change == 0)  # identical to say "max-iter reached"
        mask_nkeep = ((ncombine - nrej) < nkeep)
        mask_maxrej = (nrej > maxrej)

        # mask pixel position if any of these happened.
        # Including mask_nochange here will not change results but only
        # spend more time.
        mask_pix = mask_nkeep | mask_maxrej

        # revert to the previous ones if masked.
        # By doing this, pixels which was mask_nkeep now, e.g., will
        # again be True in mask_nkeep in the next iter but unchanged.
        # This should be done at every iteration (unfortunately)
        # because, e.g., if nkeep is very large, excessive rejection may
        # happen for many times, and the restoration CANNOT be done
        # after all the iterations.
        low_new[mask_pix] = low[mask_pix].copy()
        upp_new[mask_pix] = upp[mask_pix].copy()
        low = low_new
        upp = upp_new

        if total_change == 0:
            break

        if np.all(mask_pix):
            break

        # update only non-masked pixels
        nrej[~mask_pix] = n_change[~mask_pix]
        # update only changed pixels
        nit[~mask_nochange] += 1
        k += 1
        n_finite_old = n_finite_new

    mask = mask_nan | (arr < low_new) | (arr > upp_new)

    code = np.zeros(_arr.shape[1:], dtype=np.uint8)
    if (maxiters == 0):
        code += 1
    else:
        code += (2 * mask_nochange + 4 * mask_nkeep + 8 * mask_maxrej).astype(
            np.uint8)

    if irafmode:
        n_minimum = max(nkeep, ncombine - maxrej)
        resid = np.abs(_arr - cen)
        # need this cuz bn.argpartition cannot handle NaN:
        resid[mask_nan] = _get_dtype_limits(resid.dtype)[1]  # max of dtype
        # after this, resid is guaranteed to have **NO** NaN values.

        resid_cut = np.max(bn.partition(resid, n_minimum,
                                        axis=0)[:n_minimum, ],
                           axis=0)
        mask[resid <= resid_cut] = False

    # Note the mask returned here is mask from rejection PROPAGATED with
    # the input mask. So to extract the pixels masked PURELY from
    # rejection, you need ``mask_output^mask_input`` because the input
    # mask is a subset of the output one.

    return (mask, low, upp, nit, code)
 def partsort(a, n):
     return bn.partition(a, kth=n-1)
Esempio n. 22
0
    def execute(self):
        percentiles = [i * 10 for i in range(0, 10)]
        totale = []
        for g in self.G.nodes:
            totale += list(self.matrix[g][self.matrix[g] < 1])
        thresholds = np.percentile(totale, percentiles)
        thresholds = list(thresholds) + [1]
        print(thresholds)

        orderedMatrix = [[] for i in range(10000)]
        max_counts = [0 for i in range(10000)]  # mi pare sia corretto
        for g in self.G.nodes:
            orderedMatrix[g] = list(which_diff(self.matrix[g]))
            max_counts[g] = len(
                orderedMatrix[g])  # ad ogni nodo è associato il max_count

        max_counts_percentiles = [0 for i in range(10)]
        for g in self.G.nodes:
            counts = list(np.histogram(orderedMatrix[g], thresholds)[0])
            for i in range(len(thresholds) - 1):
                if max_counts_percentiles[i] < counts[i]:
                    max_counts_percentiles[i] = counts[i]
        print(max_counts_percentiles)

        ordered_percentiles = [[] for i in range(len(thresholds) - 1)]
        cont = 0
        for g in self.G.nodes:
            indices = list(np.digitize(orderedMatrix[g], thresholds) - 1)
            cont += 1
            for i in range(len(indices)):
                ordered_percentiles[indices[i]].append(orderedMatrix[g][i])
        #print("cont: "+str(cont))
        for i in range(len(ordered_percentiles)):
            ordered_percentiles[i] = bottle.partition(
                ordered_percentiles[i],
                max_counts_percentiles[i])[:max_counts_percentiles[i]]

        #print(ordered_percentiles)

        counts_k = sorted(max_counts, reverse=True)[0:self.k]
        best_vectors = [[] for i in range(self.k)]
        for i in range(len(counts_k)):
            index_perc = len(thresholds) - 2
            best_vectors[i] = np.ones(len(self.samples) - counts_k[i])
            remains = counts_k[i]
            while (remains > 0):
                if remains > len(ordered_percentiles[index_perc]):

                    best_vectors[i] = np.concatenate(
                        (best_vectors[i], ordered_percentiles[index_perc]))
                    remains -= len(ordered_percentiles[index_perc])
                else:
                    best_vectors[i] = np.concatenate(
                        (best_vectors[i],
                         ordered_percentiles[index_perc][0:remains]))
                    remains = 0
        result = best_vectors[0]
        for i in range(1, len(best_vectors)):
            result = np.multiply(result, best_vectors[i])

        score = np.sum(result)
        score_max = len(self.samples) - score
        print("MinVersion: " + str(score))
        print("MaxVersion: " + str(score_max))
Esempio n. 23
0
def calc_IA_features(packet_list, filter_con):
    """ function to calculate inter-arrival times related features """
    global prev_packet
    global IA_times
    global IA_times_list
    global device_list
    global slice_length

    IA_times_list = []

    for i, (packet, dev_name) in enumerate(packet_list):
        if prev_packet == "":
            print("No previous packet to calculate inter-arrival time")
        else:
            time_gap = packet.time - prev_packet.time
            IA_times.append(abs(time_gap))
        prev_packet = packet
        yield packet, dev_name

    IA_times_list.append(IA_times)
    IA_times = []
    prev_packet = ""

    for i, (data) in enumerate(IA_times_list):
        data = data[:min(slice_length, len(data) - 1)]
        min_IAT = min(data)  # minimum packet inter-arrival time
        max_IAT = max(data)  # maximum packet inter-arrival time
        q1_IAT = np.percentile(data,
                               25)  # first quartile of inter-arrival time
        median_IAT = np.percentile(data, 50)  # median of inter-arrival time
        mean_IAT = np.mean(data)  # mean of inter-arrival time
        q3_IAT = np.percentile(data,
                               75)  # third quartile of inter-arrival time
        var_IAT = np.var(data)  # variance of inter-arrival time
        iqr_IAT = q3_IAT - q1_IAT  # inter quartile range of inter-arrival time

        feature_list[i].append(min_IAT)
        feature_list[i].append(max_IAT)
        feature_list[i].append(q1_IAT)
        feature_list[i].append(median_IAT)
        feature_list[i].append(mean_IAT)
        feature_list[i].append(q3_IAT)
        feature_list[i].append(var_IAT)
        feature_list[i].append(iqr_IAT)

        # FFT calculation for inter-arrival times
        data = np.array(data[:min(slice_length, len(data) - 1)])
        min_len = min(
            len(data), 10
        )  # get 10 fft components or the minimum length of input data to fft
        fft_data = fft(data)  # calculate fft with scipy
        fft_data = np.abs(fft_data)  # get the magnitudes of fft components
        z = -bottleneck.partition(
            -fft_data, min_len - 1)[:min_len]  # get the max components
        sorted_fft = np.sort(z)
        sorted_fft[:] = sorted_fft[::
                                   -1]  # sort the fft components from largest to smallest

        if len(
                sorted_fft
        ) < 10:  # pad the array with zeros if at least 10 fft components are not there
            sorted_fft = np.append(sorted_fft, np.zeros(10 - len(sorted_fft)))

        for fft_val in sorted_fft:
            feature_list[i].append(
                fft_val)  # append fft values to feature list
Esempio n. 24
0
def test_transpose():
    """partition transpose test"""
    a = np.arange(12).reshape(4, 3)
    actual = bn.partition(a.T, 2, -1).T
    desired = bn.slow.partition(a.T, 2, -1).T
    assert_equal(actual, desired, "partition transpose test")
Esempio n. 25
0
    def update(self, C):
        """
       we use this function to gain more efficiency in OTDA experiments
        """
        self.C = np.asarray(C, dtype=np.float64)
        nt = C.shape[0]
        ns = C.shape[1]
        self.K = np.exp(-self.C / self.reg)

        # sum of rows and columns of K
        K_sum_cols = self.K.sum(axis=1)
        K_sum_rows = self.K.sum(axis=0)

        if self.uniform:
            if ns / self.ns_budget < 4:
                aK_sort = np.sort(K_sum_cols)
                epsilon_u_square = self.a[0] / aK_sort[self.ns_budget - 1]
            else:
                aK_sort = bottleneck.partition(K_sum_cols, self.ns_budget -
                                               1)[self.ns_budget - 1]
                epsilon_u_square = self.a[0] / aK_sort

            if nt / self.nt_budget < 4:
                bK_sort = np.sort(K_sum_rows)
                epsilon_v_square = self.b[0] / bK_sort[self.nt_budget - 1]
            else:
                bK_sort = bottleneck.partition(K_sum_rows, self.nt_budget -
                                               1)[self.nt_budget - 1]
                epsilon_v_square = self.b[0] / bK_sort

        else:
            aK = self.a / K_sum_cols
            bK = self.b / K_sum_rows

            aK_sort = np.sort(aK)[::-1]
            epsilon_u_square = aK_sort[self.ns_budget - 1]
            bK_sort = np.sort(bK)[::-1]
            epsilon_v_square = bK_sort[self.nt_budget - 1]

        # I, J
        self.Isel = self.a >= epsilon_u_square * K_sum_cols
        self.Jsel = self.b >= epsilon_v_square * K_sum_rows

        if sum(self.Isel) != self.ns_budget:
            if self.uniform:
                aK = self.a / K_sum_cols
            aK_sort = np.sort(aK)[::-1]
            epsilon_u_square = aK_sort[self.ns_budget - 1:self.ns_budget +
                                       1].mean()
            self.Isel = self.a >= epsilon_u_square * K_sum_cols
            self.ns_budget = sum(self.Isel)

        if sum(self.J) != self.nt_budget:
            if self.uniform:
                bK = self.b / K_sum_rows
            bK_sort = np.sort(bK)[::-1]
            epsilon_v_square = bK_sort[self.nt_budget - 1:self.nt_budget +
                                       1].mean()
            self.Jsel = self.b >= epsilon_v_square * K_sum_rows
            self.nt_budget = sum(self.Jsel)

        self.epsilon = (epsilon_u_square * epsilon_v_square)**(1 / 4)
        self.fact_scale = (epsilon_v_square / epsilon_u_square)**(1 / 2)

        # Ic, Jc
        self.Ic = ~self.Isel
        self.Jc = ~self.Jsel

        # K
        self.K_IJ = self.K[np.ix_(self.Isel, self.Jsel)]
        self.K_IcJ = self.K[np.ix_(self.Ic, self.Jsel)]
        self.K_IJc = self.K[np.ix_(self.Isel, self.Jc)]
        K_min = self.K_IJ.min()
        if K_min == 0:
            K_min = np.finfo(float).tiny

        # a_I,b_J,a_Ic,b_Jc
        self.a_I = self.a[self.Isel]
        self.b_J = self.b[self.Jsel]
        if not self.uniform:
            self.a_I_min = self.a_I.min()
            self.a_I_max = self.a_I.max()
            self.b_J_max = self.b_J.max()
            self.b_J_min = self.b_J.min()
        else:
            self.a_I_min = self.a_I[0]
            self.a_I_max = self.a_I[0]
            self.b_J_max = self.b_J[0]
            self.b_J_min = self.b_J[0]

        # box constraints in LBFGS solver (see Proposition 1 in the paper)
        self.bounds_u = [(max(self.a_I_min / (self.epsilon * (nt - self.nt_budget) \
                                                    + self.nt_budget * (self.b_J_max / (self.epsilon *  self.fact_scale * ns * K_min))), \
                                  self.epsilon / self.fact_scale), \
                              self.a_I_max / (self.epsilon * nt * K_min))] * self.ns_budget

        self.bounds_v = [(max(self.b_J_min / (self.epsilon * (ns - self.ns_budget) \
                                                    + self.ns_budget * (self.fact_scale * self.a_I_max / (self.epsilon * nt * K_min))), \
                                  self.epsilon * self.fact_scale), \
                              self.b_J_max / (self.epsilon * ns * K_min))] * self.nt_budget

        self.vec_eps_IJc = self.epsilon * self.fact_scale \
                           * (self.K_IJc * np.ones(nt-self.nt_budget).reshape((1, -1))).sum(axis=1)
        self.vec_eps_IcJ = (self.epsilon / self.fact_scale) \
                           * (np.ones(ns-self.ns_budget).reshape((-1, 1)) * self.K_IcJ).sum(axis=0)

        # pre-calculed constans for restricted Sinkhron
        if self.ns_budget != ns or self.ns_budget != nt:
            self.cst_u = self.fact_scale * self.epsilon * self.K_IJc.sum(
                axis=1)
            self.cst_v = self.epsilon * self.K_IcJ.sum(
                axis=0) / self.fact_scale

        if not self.one_init:
            u0 = np.full(self.ns_budget, (1. / self.ns_budget) +
                         self.epsilon / self.fact_scale)
            v0 = np.full(self.nt_budget, (1. / self.nt_budget) +
                         self.epsilon * self.fact_scale)
        else:
            u0 = np.full(self.ns_budget, 1.)
            v0 = np.full(self.nt_budget, 1.)

        if self.restricted:
            self.u0, self.v0 = self._restricted_sinkhorn(u0, v0, max_iter=5)
        else:
            self.u0 = u0
            self.v0 = v0