예제 #1
0
def set_numba_threads(n):
    numba_threads = numba.get_num_threads()
    try:
        numba.set_num_threads(n)
        yield
    finally:
        numba.set_num_threads(numba_threads)
예제 #2
0
 def _test_func(acc, buf, local_mask):
     set_num_threads(nthreads)
     # set threads in parent function
     set_num_threads(local_mask[0])
     if local_mask[0] < N:
         child_func(buf, local_mask[0])
     acc[0] += get_num_threads()
예제 #3
0
 def test_func(nthreads):
     x = 5
     buf = np.empty((x, ))
     set_num_threads(nthreads)
     for i in prange(x):
         buf[i] = get_num_threads()
     return buf
예제 #4
0
 def test_func():
     set_num_threads(mask)
     x = 5000000
     buf = np.empty((x, ))
     for i in prange(x):
         buf[i] = get_thread_id()
     return len(np.unique(buf)), get_num_threads()
예제 #5
0
    def _transform(self, X, y=None):
        """Transform input time series.

        Parameters
        ----------
        X : 3D np.ndarray of shape = [n_instances, n_dimensions, series_length]
            panel of time series to transform
        y : ignored argument for interface compatibility

        Returns
        -------
        pandas DataFrame, transformed features
        """
        X = X[:, 0, :].astype(np.float32)

        # change n_jobs dependend on value and existing cores
        prev_threads = get_num_threads()
        if self.n_jobs < 1 or self.n_jobs > multiprocessing.cpu_count():
            n_jobs = multiprocessing.cpu_count()
        else:
            n_jobs = self.n_jobs
        set_num_threads(n_jobs)
        X_ = _transform(X, self.parameters)
        set_num_threads(prev_threads)
        return pd.DataFrame(X_)
예제 #6
0
 def _test_func(buf, local_mask):
     set_num_threads(nthreads)
     # when the threads exit the child functions they should
     # have a TLS slot value of the local mask as it was set
     # in child
     if local_mask[0] < config.NUMBA_NUM_THREADS:
         child(buf, local_mask[0])
         assert get_num_threads() == local_mask[0]
예제 #7
0
def test_numba_info():
    ni = d.numba_info()
    if numba.config.DISABLE_JIT:
        assert ni is None
    else:
        assert ni is not None
        assert ni.threading == numba.threading_layer()
        assert ni.threads == numba.get_num_threads()
예제 #8
0
def nn_descent_internal_high_memory_parallel(
    current_graph,
    inds,
    indptr,
    data,
    n_neighbors,
    rng_state,
    max_candidates=50,
    dist=sparse_euclidean,
    n_iters=10,
    delta=0.001,
    verbose=False,
):
    n_vertices = indptr.shape[0] - 1
    block_size = 16384
    n_blocks = n_vertices // block_size
    n_threads = numba.get_num_threads()

    in_graph = [
        set(current_graph[0][i].astype(np.int64))
        for i in range(current_graph[0].shape[0])
    ]

    for n in range(n_iters):
        if verbose:
            print("\t", n + 1, " / ", n_iters)

        (new_candidate_neighbors, old_candidate_neighbors) = new_build_candidates(
            current_graph, max_candidates, rng_state, n_threads
        )

        c = 0
        for i in range(n_blocks + 1):
            block_start = i * block_size
            block_end = min(n_vertices, (i + 1) * block_size)

            new_candidate_block = new_candidate_neighbors[block_start:block_end]
            old_candidate_block = old_candidate_neighbors[block_start:block_end]
            dist_thresholds = current_graph[1][:, 0]

            updates = generate_graph_updates(
                new_candidate_block,
                old_candidate_block,
                dist_thresholds,
                inds,
                indptr,
                data,
                dist,
            )

            c += apply_graph_updates_high_memory(current_graph, updates, in_graph)

        if c <= delta * n_neighbors * n_vertices:
            if verbose:
                print("\tStopping threshold met -- exiting after", n + 1, "iterations")
            return
예제 #9
0
def filter_genes(adata, minc):
    t0 = time.time()
    cols_to_keep = 0
    cols = 0

    if use_fastpp:

        @numba.njit(cache=True, parallel=True)
        def get_cols_to_keep(indices, data, minc, colcount, nthr):
            counts = np.zeros((nthr, colcount), dtype=np.int32)
            for i in numba.prange(nthr):
                start = i * indices.shape[0] // nthr
                end = (i + 1) * indices.shape[0] // nthr
                for j in range(start, end):
                    if data[j] != 0 and indices[j] < colcount:
                        #if indices[j]<colcount:
                        counts[i, indices[j]] += 1
            counts = np.sum(counts, axis=0)
            keep_cols = counts >= minc
            return counts, keep_cols

        ncols = adata.X.shape[1]
        nthr = numba.get_num_threads()
        print("filter_genes:  prep ", time.time() - t0)
        counts, cols_to_keep = get_cols_to_keep(adata.X.indices, adata.X.data,
                                                minc, ncols, nthr)
        print("filter_genes:  compute ", time.time() - t0)
        adata.var['n_cells'] = counts
        print("filter_genes:  set metadata ", time.time() - t0)
        if strategy == 1: adata = adata[:, cols_to_keep]
        if strategy == 2: adata._inplace_subset_var(cols_to_keep)
        if strategy == 3: adata = adata[:, cols_to_keep].copy()
        if strategy == 4:
            adata = anndata.AnnData(adata.X[:, cols_to_keep], adata.obs,
                                    adata.var.iloc[cols_to_keep, :])
        if strategy == 5:
            adata = anndata.AnnData(
                adata.X[:, cols_to_keep], adata.obs,
                adata.var.drop(adata.var.iloc[np.logical_not(cols_to_keep), :],
                               inplace=True))
        if strategy == 6: adata._inplace_subset_var(cols_to_keep)
        if strategy == 7:
            adata = anndata.AnnData(csr_subset(adata.X, None, cols_to_keep),
                                    adata.obs, adata.var.iloc[cols_to_keep, :])
        if strategy == 8: cols = csr_col_subset(adata.X, cols_to_keep)
        if strategy == 9: adata._inplace_subset_var(cols_to_keep)
        if strategy == 10:
            adata = anndata.AnnData(adata.X[:, cols_to_keep], adata.obs,
                                    adata.var.iloc[cols_to_keep, :])
        if strategy == 11: cols = csr_col_subset(adata.X, cols_to_keep)
    else:
        sc.pp.filter_genes(adata, min_cells=minc)

    print("filter_genes:  filter total", time.time() - t0)
    if cols == 0: cols = adata.shape[1]
    return adata, cols_to_keep, cols
예제 #10
0
 def __call__(self, *, n_steps, mu_coeff, post_step, post_iter, fields):
     assert self.n_threads == 1 or numba.get_num_threads() == self.n_threads
     with warnings.catch_warnings():
         warnings.simplefilter('ignore',
                               category=NumbaExperimentalFeatureWarning)
         wall_time_per_timestep = self.__call(
             n_steps, mu_coeff, post_step, post_iter,
             *(_Impl(field=v.impl[IMPL_META_AND_DATA], bc=v.impl[IMPL_BC])
               for v in fields.values()), self.traversals.null_impl)
     return wall_time_per_timestep
예제 #11
0
def klip_chunk_svd(image_vecs_meansub, n_images, mtx_u0, diag_s0, mtx_v0,
                   k_klip, reuse, strategy, exclusion_values, exclusion_deltas,
                   signal_vecs):
    n_frames = image_vecs_meansub.shape[1]
    output = np.zeros_like(image_vecs_meansub)
    if signal_vecs is not None:
        output_model = np.zeros_like(signal_vecs)
    else:
        output_model = None
    print('klip_chunk_svd running with', numba.get_num_threads(), 'threads on',
          n_frames, 'frames')
    for i in numba.prange(n_frames):
        if not reuse:
            min_excluded_idx, max_excluded_idx = exclusions_to_range(
                n_images=n_images,
                current_idx=i,
                exclusion_values=exclusion_values,
                exclusion_deltas=exclusion_deltas,
            )
            n_excluded = max_excluded_idx - min_excluded_idx + 1
            print('processing frame', i, ', excluding', n_excluded,
                  ' frames (from frame', min_excluded_idx, 'to',
                  max_excluded_idx, ")")
            if strategy == constants.KlipStrategy.DOWNDATE_SVD:
                assert mtx_u0 is not None
                assert diag_s0 is not None
                assert mtx_v0 is not None
                subset_mtx_u0 = np.ascontiguousarray(mtx_u0[:, :k_klip +
                                                            n_excluded])
                subset_diag_s = diag_s0[:k_klip + n_excluded]
                subset_mtx_v0 = np.ascontiguousarray(mtx_v0[:, :k_klip +
                                                            n_excluded])
                new_u, _, _ = learning.minimal_downdate(
                    subset_mtx_u0,
                    subset_diag_s,
                    subset_mtx_v0,
                    min_col_to_remove=min_excluded_idx,
                    max_col_to_remove=max_excluded_idx + 1,
                )
                eigenimages = new_u[:, :k_klip]
            else:
                subset_image_vecs = utils.drop_idx_range_cols(
                    image_vecs_meansub, min_excluded_idx, max_excluded_idx + 1)
                eigenimages, _, _ = learning._numba_svd_wrap(
                    subset_image_vecs, k_klip)
        else:
            assert mtx_u0 is not None
            eigenimages = mtx_u0[:, :k_klip]
        meansub_target = image_vecs_meansub[:, i]
        # Since we may have truncated by columns above, this re-contiguou-fies
        # and silences the NumbaPerformanceWarning
        eigenimages = np.ascontiguousarray(eigenimages)
        output[:, i] = meansub_target - eigenimages @ (
            eigenimages.T @ meansub_target)
    return output, output_model
예제 #12
0
 def condensation(solver, n_cell, cell_start_arg, v, particle_temperatures,
                  r_cr, n, vdry, idx, rhod, thd, qv, dv, prhod, pthd, pqv,
                  kappa, rtol_x, rtol_thd, dt, substeps, cell_order,
                  ripening_flags):
     n_threads = min(numba.get_num_threads(), n_cell)
     AlgorithmicMethods._condensation(
         solver, n_threads, n_cell, cell_start_arg.data, v.data,
         particle_temperatures.data, r_cr.data, n.data, vdry.data, idx.data,
         rhod.data, thd.data, qv.data, dv, prhod.data, pthd.data, pqv.data,
         kappa, rtol_x, rtol_thd, dt, substeps.data, cell_order,
         ripening_flags.data)
예제 #13
0
def get_num_threads():
    """
    Get current number of threads.

    Returns
    -------
    int
        Number of threads.

    """
    return numba.get_num_threads()
예제 #14
0
 def condensation(solver, n_cell, cell_start_arg, v, v_cr, n, vdry, idx,
                  rhod, thd, qv, dv, prhod, pthd, pqv, kappa, rtol_x,
                  rtol_thd, dt, counters, cell_order, RH_max, success):
     n_threads = min(numba.get_num_threads(), n_cell)
     AlgorithmicMethods._condensation(
         solver, n_threads, n_cell, cell_start_arg.data, v.data, v_cr.data,
         n.data, vdry.data, idx.data, rhod.data, thd.data, qv.data, dv,
         prhod.data, pthd.data, pqv.data, kappa, rtol_x, rtol_thd, dt,
         counters['n_substeps'].data, counters['n_activating'].data,
         counters['n_deactivating'].data, counters['n_ripening'].data,
         cell_order, RH_max.data, success.data)
예제 #15
0
 def test_func(nthreads):
     buf = np.zeros((M, N))
     set_num_threads(nthreads)
     for i in prange(M):
         local_mask = 1 + i % mask
         # when the threads exit the child functions they should
         # have a TLS slot value of the local mask as it was set
         # in child
         if local_mask < config.NUMBA_NUM_THREADS:
             child(buf, local_mask)
             assert get_num_threads() == local_mask
     return buf
예제 #16
0
 def _test_func(nthreads):
     acc = 0
     buf = np.zeros((M, N))
     set_num_threads(nthreads)
     for i in prange(M):
         local_mask = 1 + i % mask
         # set threads in parent function
         set_num_threads(local_mask)
         if local_mask < N:
             child_func(buf, local_mask)
         acc += get_num_threads()
     return acc, buf
예제 #17
0
 def __call__(self, nt, mu_coeff, advectee, advectee_bc, advector,
              advector_bc, g_factor, g_factor_bc, vectmp_a, vectmp_a_bc,
              vectmp_b, vectmp_b_bc, vectmp_c, vectmp_c_bc, psi_min,
              psi_min_bc, psi_max, psi_max_bc, beta_up, beta_up_bc,
              beta_down, beta_down_bc):
     assert self.n_threads == 1 or numba.get_num_threads() == self.n_threads
     return self.__call(nt, mu_coeff, advectee, advectee_bc, advector,
                        advector_bc, g_factor, g_factor_bc, vectmp_a,
                        vectmp_a_bc, vectmp_b, vectmp_b_bc, vectmp_c,
                        vectmp_c_bc, psi_min, psi_min_bc, psi_max,
                        psi_max_bc, beta_up, beta_up_bc, beta_down,
                        beta_down_bc)
예제 #18
0
def numba_info():
    x = _par_test(100)
    _log.debug('sum: %d', x)

    try:
        layer = numba.threading_layer()
    except ValueError:
        _log.info('Numba threading not initialized')
        return None
    _log.info('numba threading layer: %s', layer)
    nth = numba.get_num_threads()
    return NumbaInfo(layer, nth)
예제 #19
0
def filter_cells(adata, ming, maxg):
    t0 = time.time()
    rows_to_keep = 0
    rows = 0

    if use_fastpp:

        @numba.njit(cache=True, parallel=True)
        def get_rows_to_keep(indptr, ming, maxg):
            lens = indptr[1:] - indptr[:-1]
            keep_rows = np.logical_and(lens >= ming, lens <= maxg)
            return lens, keep_rows

        nrows = adata.X.shape[0]
        nelems = adata.X.data.shape[0]
        nthr = numba.get_num_threads()
        print("filter_cells:  prep ", time.time() - t0)
        row_lengths, rows_to_keep = get_rows_to_keep(adata.X.indptr, ming,
                                                     maxg)
        print("filter_cells:  compute ", time.time() - t0)
        adata.obs['n_genes'] = row_lengths
        print("filter_cells:  set metadata ", time.time() - t0)
        if strategy == 1: adata = adata[rows_to_keep]
        if strategy == 2: adata._inplace_subset_obs(rows_to_keep)
        if strategy == 3: adata = adata[rows_to_keep].copy()
        if strategy == 4:
            adata = anndata.AnnData(adata.X[rows_to_keep],
                                    adata.obs.iloc[rows_to_keep, :], adata.var)
        if strategy == 5:
            adata = anndata.AnnData(
                adata.X[rows_to_keep],
                adata.obs.drop(adata.obs.iloc[np.logical_not(rows_to_keep), :],
                               inplace=True), adata.var)
        if strategy == 6: adata._inplace_subset_obs(rows_to_keep)
        if strategy == 7:
            adata = anndata.AnnData(csr_subset(adata.X, rows_to_keep),
                                    adata.obs.iloc[rows_to_keep, :], adata.var)
        if strategy == 8: rows = csr_row_subset(adata.X, rows_to_keep)
        if strategy == 9: adata._inplace_subset_obs(rows_to_keep)
        if strategy == 10:
            adata = anndata.AnnData(adata.X[rows_to_keep],
                                    adata.obs.iloc[rows_to_keep, :], adata.var)
        if strategy == 11: rows = csr_row_subset2(adata.X, rows_to_keep)
        #if strategy==11: adata = anndata.AnnData(adata.X[rows_to_keep],adata.obs.iloc[rows_to_keep,:],adata.var)
    else:
        sc.pp.filter_cells(adata, min_genes=ming)
        print("filter_cells:  first call ", time.time() - t0)
        sc.pp.filter_cells(adata, max_genes=maxg)

    print("filter_cells:  filter total", time.time() - t0)
    if rows == 0: rows = adata.shape[0]
    return adata, rows_to_keep, rows
예제 #20
0
def set_threads(num: int = -1) -> int:
    """Set the number of numba threads

    Args:
        num (int, optional): The number of threads. Defaults to -1.

    Returns:
        int: The old number of theads (or -1 if unchanged).
    """
    if num > 0:
        old = get_num_threads()
        if old != num:
            set_num_threads(num)
        return old
    return -1
예제 #21
0
    def __init__(
        self,
        minibatch: int,
        maxT: int,
        maxU: int,
        alphabet_size: int,
        workspace,
        blank: int,
        fastemit_lambda: float,
        clamp: float,
        num_threads: int,
        stream,
    ):
        """
        Helper class to launch the CUDA Kernels to compute the Transducer Loss.

        Args:
            minibatch: Int representing the batch size.
            maxT: The maximum possible acoustic sequence length. Represents T in the logprobs tensor.
            maxU: The maximum possible target sequence length. Represents U in the logprobs tensor.
            alphabet_size: The vocabulary dimension V+1 (inclusive of RNNT blank).
            workspace: An allocated chunk of memory that will be sliced off and reshaped into required
                blocks used as working memory.
            blank: Index of the RNNT blank token in the vocabulary. Generally the first or last token in the vocab.
            fastemit_lambda: Float scaling factor for FastEmit regularization. Refer to
                FastEmit: Low-latency Streaming ASR with Sequence-level Emission Regularization.
            clamp: Float value. When set to value >= 0.0, will clamp the gradient to [-clamp, clamp].
            num_threads: Number of OMP threads to launch.
            stream: Numba Cuda Stream.
        """
        self.minibatch_ = minibatch
        self.maxT_ = maxT
        self.maxU_ = maxU
        self.alphabet_size_ = alphabet_size
        self.gpu_workspace = cuda.as_cuda_array(
            workspace
        )  # a flat vector of floatX numbers that represents allocated memory slices
        self.blank_ = blank
        self.fastemit_lambda_ = fastemit_lambda
        self.clamp_ = abs(clamp)
        self.num_threads_ = num_threads
        self.stream_ = stream  # type: cuda.cudadrv.driver.Stream

        if num_threads > 0:
            numba.set_num_threads(min(multiprocessing.cpu_count(),
                                      num_threads))
        else:
            self.num_threads_ = numba.get_num_threads()
예제 #22
0
def set_numba_threads(n):
    import numba
    from numba.core.config import NUMBA_NUM_THREADS
    numba_threads = numba.get_num_threads()
    try:
        if n > NUMBA_NUM_THREADS:
            warnings.warn(
                f"Attempting to set threads to {n}, which is larger than "
                f"NUMBA_NUM_THREADS={NUMBA_NUM_THREADS}. "
                f"Setting to allowed maximum NUMBA_NUM_THREADS instead.")
        n = min(n, NUMBA_NUM_THREADS)

        numba.set_num_threads(n)
        yield
    finally:
        numba.set_num_threads(numba_threads)
예제 #23
0
    def _transform(self, X, y=None):
        """Transform input time series using random convolutional kernels.

        Parameters
        ----------
        X : 3D np.ndarray of shape = [n_instances, n_dimensions, series_length]
            panel of time series to transform
        y : ignored argument for interface compatibility

        Returns
        -------
        pandas DataFrame, transformed features
        """
        X = X.astype(np.float64)
        X = convert(X,
                    from_type="numpy3D",
                    to_type="numpyflat",
                    as_scitype="Panel")
        if self.normalise:
            X = (X - X.mean(axis=-1, keepdims=True)) / (
                X.std(axis=-1, keepdims=True) + 1e-8)

        X1 = np.diff(X, 1)

        # change n_jobs dependend on value and existing cores
        prev_threads = get_num_threads()
        if self.n_jobs < 1 or self.n_jobs > multiprocessing.cpu_count():
            n_jobs = multiprocessing.cpu_count()
        else:
            n_jobs = self.n_jobs
        set_num_threads(n_jobs)

        X = _transform(
            X,
            X1,
            self.parameter,
            self.parameter1,
            self.n_features_per_kernel,
        )
        X = np.nan_to_num(X)

        set_num_threads(prev_threads)
        # # from_2d_array_to_3d_numpy
        # _X = np.reshape(_X, (_X.shape[0], 1, _X.shape[1])).astype(np.float64)
        return pd.DataFrame(X)
예제 #24
0
    def transform(self, X, y=None):
        """Transform input time series using random convolutional kernels.

        Parameters
        ----------
        X : pandas DataFrame, input time series (sktime format)
        y : array_like, target values (optional, ignored as irrelevant)

        Returns
        -------
        pandas DataFrame, transformed features
        """
        self.check_is_fitted()
        _X = check_X(X, enforce_univariate=True, coerce_to_numpy=True)
        _X = _X[:, 0, :].astype(np.float64)

        _X = from_3d_numpy_to_2d_array(_X)

        if self.normalise:
            _X = (_X - _X.mean(axis=-1, keepdims=True)) / (
                _X.std(axis=-1, keepdims=True) + 1e-8)

        X1 = np.diff(_X, 1)

        # change n_jobs dependend on value and existing cores
        prev_threads = get_num_threads()
        if self.n_jobs < 1 or self.n_jobs > multiprocessing.cpu_count():
            n_jobs = multiprocessing.cpu_count()
        else:
            n_jobs = self.n_jobs
        set_num_threads(n_jobs)

        _X = _transform(
            _X,
            X1,
            self.parameter,
            self.parameter1,
            self.n_features_per_kernel,
        )
        _X = np.nan_to_num(_X)

        set_num_threads(prev_threads)
        # # from_2d_array_to_3d_numpy
        # _X = np.reshape(_X, (_X.shape[0], 1, _X.shape[1])).astype(np.float64)
        return pd.DataFrame(_X)
예제 #25
0
    def __init__(
        self,
        minibatch: int,
        maxT: int,
        maxU: int,
        alphabet_size: int,
        workspace: torch.Tensor,
        blank: int,
        fastemit_lambda: float,
        num_threads: int,
        batch_first: bool,
    ):
        """
        Helper class to compute the Transducer Loss on CPU.

        Args:
            minibatch: Size of the minibatch b.
            maxT: The maximum possible acoustic sequence length. Represents T in the logprobs tensor.
            maxU: The maximum possible target sequence length. Represents U in the logprobs tensor.
            alphabet_size: The vocabulary dimension V+1 (inclusive of RNNT blank).
            workspace: An allocated chunk of memory that will be sliced off and reshaped into required
                blocks used as working memory.
            blank: Index of the RNNT blank token in the vocabulary. Generally the first or last token in the vocab.
            fastemit_lambda: Float scaling factor for FastEmit regularization. Refer to
                FastEmit: Low-latency Streaming ASR with Sequence-level Emission Regularization.
            num_threads: Number of OMP threads to launch.
            batch_first: Bool that decides if batch dimension is first or third.
        """
        self.minibatch_ = minibatch
        self.maxT_ = maxT
        self.maxU_ = maxU
        self.alphabet_size_ = alphabet_size
        self.workspace = workspace  # a flat vector of floatX numbers that represents allocated memory slices
        self.blank_ = blank
        self.fastemit_lambda_ = fastemit_lambda
        self.num_threads_ = num_threads
        self.batch_first = batch_first

        if num_threads > 0:
            numba.set_num_threads(min(multiprocessing.cpu_count(),
                                      num_threads))
        else:
            self.num_threads_ = numba.get_num_threads()
예제 #26
0
def VelocityStructFunc_tree(pos,
                            vel,
                            weight,
                            tree,
                            rbins,
                            max_bin_size_ratio=100,
                            theta=0.7,
                            boxsize=0,
                            weighted_binning=False):
    """Returns the average mass in radial bins surrounding a point

    Arguments:
    pos -- shape (N,3) array of particle positions
    tree -- Octree instance containing the positions, masses, and softenings of the source particles

    Optional arguments:
    rbins -- 1D array of radial bin edges - if None will use heuristics to determine sensible bins
    max_bin_size_ratio -- controls the accuracy of the binning - tree nodes are subdivided until their side length is at most this factor * the radial bin width (default 0.5)

    Returns:
    mbins -- arrays containing total mass in each bin
    """

    Nthreads = get_num_threads()
    mbin = zeros((Nthreads, rbins.shape[0] - 1))
    wtsum = zeros_like(mbin)
    # break into chunks for parallelization
    for chunk in prange(Nthreads):
        for i in range(chunk, pos.shape[0], Nthreads):
            dwtsum, dmbin = VelocityStructWalk(
                pos[i],
                vel[i],
                tree,
                rbins,
                max_bin_size_ratio=max_bin_size_ratio,
                theta=theta,
                boxsize=boxsize,
                weighted_binning=weighted_binning)
            for j in range(mbin.shape[1]):
                mbin[chunk, j] += dmbin[j] * weight[i]
                wtsum[chunk, j] += weight[i] * dwtsum[j]
    return mbin.sum(0) / wtsum.sum(0)
예제 #27
0
def apply_graph_updates_low_memory(current_graph, updates):

    n_changes = 0
    priorities = current_graph[1]
    indices = current_graph[0]
    flags = current_graph[2]
    n_threads = numba.get_num_threads()

    for n in numba.prange(n_threads):
        for i in range(len(updates)):
            for j in range(len(updates[i])):
                p, q, d = updates[i][j]

                if p == -1 or q == -1:
                    continue

                if p % n_threads == n:
                    # added = heap_push(current_graph, p, d, q, 1)
                    added = checked_flagged_heap_push(
                        priorities[p],
                        indices[p],
                        flags[p],
                        d,
                        q,
                        1,
                    )
                    n_changes += added

                if q % n_threads == n:
                    # added = heap_push(current_graph, q, d, p, 1)
                    added = checked_flagged_heap_push(
                        priorities[q],
                        indices[q],
                        flags[q],
                        d,
                        p,
                        1,
                    )
                    n_changes += added

    return n_changes
    def _transform(self, X, y=None):
        """Transform input time series using random convolutional kernels.

        Parameters
        ----------
        X : 3D np.ndarray of shape = [n_instances, n_dimensions, series_length]
            panel of time series to transform
        y : ignored argument for interface compatibility

        Returns
        -------
        pandas DataFrame, transformed features
        """
        if self.normalise:
            X = (X - X.mean(axis=-1, keepdims=True)) / (
                X.std(axis=-1, keepdims=True) + 1e-8
            )

        _X1 = np.diff(X, 1)

        # change n_jobs dependend on value and existing cores
        prev_threads = get_num_threads()
        if self.n_jobs < 1 or self.n_jobs > multiprocessing.cpu_count():
            n_jobs = multiprocessing.cpu_count()
        else:
            n_jobs = self.n_jobs
        set_num_threads(n_jobs)

        X = _transform(
            X,
            _X1,
            self.parameter,
            self.parameter1,
            self.n_features_per_kernel,
        )
        X = np.nan_to_num(X)

        set_num_threads(prev_threads)

        return pd.DataFrame(X)
예제 #29
0
def sum(X, axis=None):
    @numba.njit(cache=True, parallel=True)
    def _sum(X):
        s = 0
        for i in numba.pndindex(X.shape):
            s += X[i]
        return s

    @numba.njit(cache=True, parallel=True)
    def _sum0(X, nthr):
        s = np.empty((nthr, X.shape[1]), dtype=X.dtype)
        for i in numba.prange(nthr):
            for r in range(i, X.shape[0], nthr):
                s[i] = X[r]
        return s.sum(axis=0)

    @numba.njit(cache=True, parallel=True)
    def _sum1(X):
        s = np.empty(X.shape[0], dtype=X.dtype)
        for r in numba.prange(X.shape[0]):
            s[r] = X[r].sum()
        return s

    if issparse(X) or not use_fastpp:
        if axis is None:
            return np.array(X.sum())
        return np.array(X.sum(axis=axis))
    if axis is None:
        return _sum(X)
    if X.ndim == 2:
        if axis == 0:
            nthr = numba.get_num_threads()
            return _sum0(X, nthr)
        return _sum1(X)
    # if axis is None:
    #     return X.sum()
    return X.sum(axis=axis)
예제 #30
0
    def __init__(self,
                 *,
                 options: Options,
                 n_dims: (int, None) = None,
                 non_unit_g_factor: bool = False,
                 grid: (tuple, None) = None,
                 n_threads: (int, None) = None):
        if n_dims is not None and grid is not None:
            raise ValueError()
        if n_dims is None and grid is None:
            raise ValueError()
        if grid is None:
            grid = tuple([-1] * n_dims)
        if n_dims is None:
            n_dims = len(grid)
        if n_dims > 1 and options.DPDC:
            raise NotImplementedError()
        if n_threads is None:
            n_threads = numba.get_num_threads()

        self.__options = options
        self.__n_threads = 1 if n_dims == 1 else n_threads

        if self.__n_threads > 1:
            try:
                numba.parfors.parfor.ensure_parallel_support()
            except numba.core.errors.UnsupportedParforsError:
                print(
                    "Numba ensure_parallel_support() failed, forcing n_threads=1",
                    file=sys.stderr)
                self.__n_threads = 1

        self.__n_dims = n_dims
        self.__call, self.traversals = make_step_impl(options,
                                                      non_unit_g_factor, grid,
                                                      self.n_threads)