Esempio n. 1
0
def concat_backward(gradient: Tensor, tensors: List[Tensor], axis: int = 0):
    _check_tensors(*tensors)
    engine = _get_engine(*tensors)

    grad_arrays = engine.split(gradient.data, len(tensors), axis=axis)
    for idx, tensor in enumerate(tensors):
        _set_grad(tensor, data=grad_arrays[idx] * engine.ones_like(tensor.data))
Esempio n. 2
0
def eps_greedy_hypothesis(q_func, env, state, n_actions, eps=.3):
    choice = np.random.rand()
    if choice < eps:
        action = env.action_space.sample()
        return action
        #return np.random.randint(0, n_actions)
    else:
        s0 = cp.array(state[None].astype(DTYPE))

        # Outputs
        y0 = q_func(s0).data
        q0 = y0[:, :n_actions]

        v0 = y0[:, -1:]

        h0 = cp.split(y0[:, n_actions:-1], n_actions, axis=1)
        y1 = [q_func(s1).data for s1 in h0]
        #q1 = [y[:n_actions].get() for y in y1]

        hn = cp.split(y0[:, n_actions:-1], n_actions, axis=1)
        yn = [q_func(s).data for s in hn]
        qn = np.hstack([y[:n_actions].get().flatten()[None].T for y in yn])
        to_max = qn

        for _ in range(4):
            hn = [
                cp.vstack(cp.split(y[:, n_actions:-1], n_actions, axis=1))
                for y in yn
            ]
            yn = [q_func(s).data for s in hn]
            qn = np.hstack([y[:n_actions].get().flatten()[None].T for y in yn])
            to_max = np.vstack([to_max, qn])
        move = (to_max).max(axis=0).argmax()

        #qn = [y[:n_actions].get() for y in yn]
        #move = np.argmax([np.mean(q**2) for q in qn])

        #move = np.argmax([np.max(q) for q in q1])
        '''
        output = np.argmax(q_func(s0)[0].data.get()[:n_actions])
        print(move, output)
        '''
        return move
Esempio n. 3
0
    def backward(self, error):
        x, axis = self.cache

        # numpyとcupyで挙動が異なる
        # cupy.cumsum --> 入力がリストだとエラーになる
        # cupy.split --> インデックスデータをCPUに移す必要がある
        if DEVICE == "gpu":
            dxs = np.split(error,
                           numpy.cumsum([
                               x[i].value.shape[axis]
                               for i in range(len(x) - 1)
                           ]),
                           axis=axis)
        else:
            dxs = np.split(
                error,
                np.cumsum([x[i].value.shape[axis] for i in range(len(x) - 1)]),
                axis=axis)

        for i, dx in enumerate(dxs):
            x[i].accumulate(dx)
Esempio n. 4
0
    def forward(self):
        # input is the tensor phi:(1120,1120,3)
        # clone phi?
        phi = cp.asarray(self.phi)

        h = cp.fft.ifft2(cp.fft.ifftshift(cp.exp(1.0j*phi),axes=(0, 1)),axes=(0, 1))
        psf_new = cp.square(cp.abs(h))
        if len(psf_new.shape)==2:
            norm = cp.sum(psf_new)
        else:
            norm = cp.reshape(cp.sum(psf_new,axis=(0,1)),(1,1)+psf_new.shape[2:])
        psf_new = psf_new/norm

        psf_new_rescaled = cv.resize(cp.asnumpy(psf_new), (final_dim, final_dim), interpolation=cv.INTER_NEAREST)
        psf_new_rescaled = cp.asarray(psf_new_rescaled)
        
        A = np.load(filename_crosstalk)
        A_t = np.transpose(A)
        P = cp.asnumpy(psf_new_rescaled)

        B = np.matmul(A_t.reshape((1,1)+A_t.shape),P.reshape(P.shape+(1,))).reshape(P.shape)
        
        # psf_new_unpadded (152, 228, 3)
        psf_new_unpadded = B[unpad_1:-unpad_1, unpad_2:-unpad_2]
        psf_new_unpadded = cp.asarray(psf_new_unpadded)
        # tiled kernels (76, 228, 3)
        tiled_kernels = cp.split(psf_new_unpadded,2)[0]-cp.split(psf_new_unpadded,2)[1]
        # (48, 3, 3, 3)
        weights_pm = []
        for i in range(rows):
            for j in range(cols):
                padded_kernel = cp.split(cp.split(tiled_kernels,rows,axis=0)[i],cols,axis=1)[j]
                kernel = padded_kernel[pad:-pad, pad:-pad]
                weights_pm.append(kernel)
        #(3,3,3,48)
        
        weights_pm = np.asarray(weights_pm)
        weights_pm = np.transpose(weights_pm, (1,2,3,0))
        mempool.free_all_blocks()
        return weights_pm*norm_factor
Esempio n. 5
0
def converter(batch, device):
    if device == 0:
        batch = np.array(batch)

    elif device == 1:
        batch = cuda.to_gpu(xp.array(batch))

    elif device >= 2:
        batch = cuda.to_cpu(batch)
        batch = xp.split(xp.array(batch).astype(xp.int64), device)
        batch = [cuda.to_gpu(batch[i], i) for i in range(device)]

    return batch
Esempio n. 6
0
def chunk(tensor: Tensor, chunks: int, dim: int = 0):
    _check_tensors(tensor)
    engine = _get_engine(tensor)

    arrays = engine.split(tensor.data, chunks, dim)

    tensors = []
    for array in arrays:
        tensors.append(_create_tensor(
            tensor,
            data=array,
            func=wrapped_partial(chunk_backward, tensor=tensor, chunks=chunks)
        ))
    return tensors
Esempio n. 7
0
    def forward_gpu(self, inputs):
        gate_inputs, prev_c = inputs

        forget_gate_input, input_gate_input, tanh_input, output_gate_input = cupy.split(
            gate_inputs, 4, axis=1)

        kernel_input = "T forget_gate_input, T input_gate_input, T tanh_input, T output_gate_input, T prev_c"
        kernel_outputs = "T forget_gate, T input_gate, T tanh_gate, T output_gate, T tanh_next_c, T next_h, T next_c"

        kernel = "forget_gate = (tanh(forget_gate_input * 0.5f) + 1.0f) * 0.5f;"\
            "input_gate = (tanh(input_gate_input * 0.5f) + 1.0f) * 0.5f;"\
            "tanh_gate = tanh(tanh_input);"\
            "next_c = forget_gate * prev_c + input_gate * tanh_gate;"\
            "output_gate = (tanh(output_gate_input * 0.5f) + 1.0f) * 0.5f;"\
            "tanh_next_c = tanh(next_c);"\
            "next_h = output_gate * tanh(next_c)"

        (self.forget_gate, self.input_gate, self.tanh_gate, self.output_gate,
         self.tanh_next_c, self.next_h, self.next_c) = cuda.elementwise(
             kernel_input, kernel_outputs, kernel,
             'gqn_core_fwd')(forget_gate_input, input_gate_input, tanh_input,
                             output_gate_input, prev_c)

        return self.next_h, self.next_c
Esempio n. 8
0
def _stratify_split(X, stratify, labels, n_train, n_test, x_numba, y_numba,
                    random_state):
    """
    Function to perform a stratified split based on stratify column.
    Based on scikit-learn stratified split implementation.

    Parameters
    ----------
    X, y: Shuffled input data and labels
    stratify: column to be stratified on.
    n_train: Number of samples in train set
    n_test: number of samples in test set
    x_numba: Determines whether the data should be converted to numba
    y_numba: Determines whether the labales should be converted to numba

    Returns
    -------
    X_train, X_test: Data X divided into train and test sets
    y_train, y_test: Labels divided into train and test sets
    """
    x_cudf = False
    labels_cudf = False

    if isinstance(X, cudf.DataFrame):
        x_cudf = True
    elif hasattr(X, "__cuda_array_interface__"):
        X = cp.asarray(X)
        x_order = _strides_to_order(X.__cuda_array_interface__['strides'],
                                    cp.dtype(X.dtype))

    # labels and stratify will be only cp arrays
    if isinstance(labels, cudf.Series):
        labels_cudf = True
        labels = labels.values
    elif hasattr(labels, "__cuda_array_interface__"):
        labels = cp.asarray(labels)
    elif isinstance(stratify, cudf.DataFrame):
        # ensuring it has just one column
        if labels.shape[1] != 1:
            raise ValueError('Expected one column for labels, but found df'
                             'with shape = %d' % (labels.shape))
        labels_cudf = True
        labels = labels[0].values

    labels_order = _strides_to_order(
                        labels.__cuda_array_interface__['strides'],
                        cp.dtype(labels.dtype))

    # Converting to cupy array removes the need to add an if-else block
    # for startify column
    if isinstance(stratify, cudf.Series):
        stratify = stratify.values
    elif hasattr(stratify, "__cuda_array_interface__"):
        stratify = cp.asarray(stratify)
    elif isinstance(stratify, cudf.DataFrame):
        # ensuring it has just one column
        if stratify.shape[1] != 1:
            raise ValueError('Expected one column, but found column'
                             'with shape = %d' % (stratify.shape))
        stratify = stratify[0].values

    classes, stratify_indices = cp.unique(stratify, return_inverse=True)

    n_classes = classes.shape[0]
    class_counts = cp.bincount(stratify_indices)
    if cp.min(class_counts) < 2:
        raise ValueError("The least populated class in y has only 1"
                         " member, which is too few. The minimum"
                         " number of groups for any class cannot"
                         " be less than 2.")

    if n_train < n_classes:
        raise ValueError('The train_size = %d should be greater or '
                         'equal to the number of classes = %d' % (n_train,
                                                                  n_classes))

    class_indices = cp.split(cp.argsort(stratify_indices),
                             cp.cumsum(class_counts)[:-1].tolist())

    X_train = None

    # random_state won't be None or int, that's handled earlier
    if isinstance(random_state, np.random.RandomState):
        random_state = cp.random.RandomState(seed=random_state.get_state()[1])

    # Break ties
    n_i = _approximate_mode(class_counts, n_train, random_state)
    class_counts_remaining = class_counts - n_i
    t_i = _approximate_mode(class_counts_remaining, n_test, random_state)

    for i in range(n_classes):
        permutation = random_state.permutation(class_counts[i].item())
        perm_indices_class_i = class_indices[i].take(permutation)

        y_train_i = cp.array(labels[perm_indices_class_i[:n_i[i]]],
                             order=labels_order)
        y_test_i = cp.array(labels[perm_indices_class_i[n_i[i]:n_i[i] +
                                                        t_i[i]]],
                            order=labels_order)
        if hasattr(X, "__cuda_array_interface__") or \
           isinstance(X, cupyx.scipy.sparse.csr_matrix):

            X_train_i = cp.array(X[perm_indices_class_i[:n_i[i]]],
                                 order=x_order)
            X_test_i = cp.array(X[perm_indices_class_i[n_i[i]:n_i[i] +
                                                       t_i[i]]],
                                order=x_order)

            if X_train is None:
                X_train = cp.array(X_train_i, order=x_order)
                y_train = cp.array(y_train_i, order=labels_order)
                X_test = cp.array(X_test_i, order=x_order)
                y_test = cp.array(y_test_i, order=labels_order)
            else:
                X_train = cp.concatenate([X_train, X_train_i], axis=0)
                X_test = cp.concatenate([X_test, X_test_i], axis=0)
                y_train = cp.concatenate([y_train, y_train_i], axis=0)
                y_test = cp.concatenate([y_test, y_test_i], axis=0)

        elif x_cudf:
            X_train_i = X.iloc[perm_indices_class_i[:n_i[i]]]
            X_test_i = X.iloc[perm_indices_class_i[n_i[i]:n_i[i] + t_i[i]]]

            if X_train is None:
                X_train = X_train_i
                y_train = y_train_i
                X_test = X_test_i
                y_test = y_test_i
            else:
                X_train = cudf.concat([X_train, X_train_i], ignore_index=False)
                X_test = cudf.concat([X_test, X_test_i], ignore_index=False)
                y_train = cp.concatenate([y_train, y_train_i], axis=0)
                y_test = cp.concatenate([y_test, y_test_i], axis=0)

    if x_numba:
        X_train = cuda.as_cuda_array(X_train)
        X_test = cuda.as_cuda_array(X_test)
    elif x_cudf:
        X_train = cudf.DataFrame(X_train)
        X_test = cudf.DataFrame(X_test)

    if y_numba:
        y_train = cuda.as_cuda_array(y_train)
        y_test = cuda.as_cuda_array(y_test)
    elif labels_cudf:
        y_train = cudf.Series(y_train)
        y_test = cudf.Series(y_test)

    return X_train, X_test, y_train, y_test
Esempio n. 9
0
def oaconvolve(in1, in2, mode="full", axes=None):
    """Convolve two N-dimensional arrays using the overlap-add method.

    Convolve ``in1`` and ``in2`` using the overlap-add method, with the output
    size determined by the ``mode`` argument. This is generally faster than
    ``convolve`` for large arrays, and generally faster than ``fftconvolve``
    when one array is much larger than the other, but can be slower when only a
    few output values are needed or when the arrays are very similar in shape,
    and can only output float arrays (int or object array inputs will be cast
    to float).

    Args:
        in1 (cupy.ndarray): First input.
        in2 (cupy.ndarray): Second input. Should have the same number of
            dimensions as ``in1``.
        mode (str): Indicates the size of the output:

            - ``'full'``: output is the full discrete linear \
                          cross-correlation (default)
            - ``'valid'``: output consists only of those elements that do \
                           not rely on the zero-padding. Either ``in1`` or \
                           ``in2`` must be at least as large as the other in \
                           every dimension.
            - ``'same'``: output is the same size as ``in1``, centered \
                          with respect to the ``'full'`` output

        axes (scalar or tuple of scalar or None): Axes over which to compute
            the convolution. The default is over all axes.

    Returns:
        cupy.ndarray: the result of convolution

    .. seealso:: :func:`cupyx.scipy.signal.convolve`
    .. seealso:: :func:`cupyx.scipy.signal.fftconvolve`
    .. seealso:: :func:`cupyx.scipy.ndimage.convolve`
    .. seealso:: :func:`scipy.signal.oaconvolve`
    """
    out = _st_core._check_conv_inputs(in1, in2, mode)
    if out is not None:
        return out
    if in1.shape == in2.shape:  # Equivalent to fftconvolve
        return fftconvolve(in1, in2, mode=mode, axes=axes)

    in1, in2, axes = _st_core._init_freq_conv_axes(in1,
                                                   in2,
                                                   mode,
                                                   axes,
                                                   sorted_axes=True)
    s1, s2 = in1.shape, in2.shape
    if not axes:
        return _st_core._apply_conv_mode(in1 * in2, s1, s2, mode, axes)

    # Calculate the block sizes for the output, steps, first and second inputs.
    # It is simpler to calculate them all together than doing them in separate
    # loops due to all the special cases that need to be handled.
    optimal_sizes = (_st_core._calc_oa_lens(s1[i], s2[i]) if i in axes else
                     (-1, -1, s1[i], s2[i]) for i in range(in1.ndim))
    block_size, overlaps, in1_step, in2_step = zip(*optimal_sizes)

    # Fall back to fftconvolve if there is only one block in every dimension
    if in1_step == s1 and in2_step == s2:
        return fftconvolve(in1, in2, mode=mode, axes=axes)

    # Pad and reshape the inputs for overlapping and adding
    shape_final = [
        s1[i] + s2[i] - 1 if i in axes else None for i in range(in1.ndim)
    ]
    in1, in2 = _st_core._oa_reshape_inputs(in1, in2, axes, shape_final,
                                           block_size, overlaps, in1_step,
                                           in2_step)

    # Reshape the overlap-add parts to input block sizes
    split_axes = [iax + i for i, iax in enumerate(axes)]
    fft_axes = [iax + 1 for iax in split_axes]

    # Do the convolution
    fft_shape = [block_size[i] for i in axes]
    ret = _st_core._freq_domain_conv(in1,
                                     in2,
                                     fft_axes,
                                     fft_shape,
                                     calc_fast_len=False)

    # Do the overlap-add
    for ax, ax_fft, ax_split in zip(axes, fft_axes, split_axes):
        overlap = overlaps[ax]
        if overlap is None:
            continue

        ret, overpart = cupy.split(ret, [-overlap], ax_fft)
        overpart = cupy.split(overpart, [-1], ax_split)[0]

        ret_overpart = cupy.split(ret, [overlap], ax_fft)[0]
        ret_overpart = cupy.split(ret_overpart, [1], ax_split)[1]
        ret_overpart += overpart

    # Reshape back to the correct dimensionality
    shape_ret = [
        ret.shape[i] if i not in fft_axes else ret.shape[i] * ret.shape[i - 1]
        for i in range(ret.ndim) if i not in split_axes
    ]
    ret = ret.reshape(*shape_ret)

    # Slice to the correct size
    ret = ret[tuple([slice(islice) for islice in shape_final])]

    return _st_core._apply_conv_mode(ret, s1, s2, mode, axes)
Esempio n. 10
0
 def compress_by_chunk(self, cupy_bool_tensor, num_chunks):
     packed_sign = cupy.packbits(cupy_bool_tensor)
     sign_list_packed = cupy.split(packed_sign, num_chunks)
     cupy.cuda.get_current_stream().synchronize()
     return sign_list_packed
Esempio n. 11
0
    def _init_households(self, household_data: dict):
        """
        Initializes parameters for households.

        Seniors living in pairs are simulated separately.
        """
        dice = cp.random.random(self._size)

        elderly_indexes = self._indices[(self.age >= 60) *
                                        (dice <= household_data['elderly'][2])]

        current_city_ids = self.city_id[elderly_indexes]

        meetings = [[], []]

        for city_id in self.city_ids:
            city_indexes = elderly_indexes[current_city_ids == city_id]

            split_indexes = cp.split(
                city_indexes[:int(len(city_indexes) / 2) * 2], 2)

            for i, s in enumerate(split_indexes):
                meetings[i].append(s)

        self._household_meetings_elderly = {
            'first': cp.hstack(meetings[0]),
            'second': cp.hstack(meetings[1])
        }

        # === split the rest of the population into households:

        self._household_sizes = cp.ones(self._size)

        splitting_dice = cp.random.random(self._size)

        current_threshold = 0
        self._max_household_size = max(
            [s for s in household_data['young'].keys()])

        for s, ratio in household_data['young'].items():
            mask = (current_threshold <= splitting_dice) * (
                splitting_dice < current_threshold + ratio)

            self._household_sizes[mask] = s

            current_threshold += ratio

        self._household_sizes[cp.hstack([
            self._household_meetings_elderly['first'],
            self._household_meetings_elderly['second'],
        ])] = -1

        for household_size in range(2, self._max_household_size + 1):
            current_indexes = self._indices[self._household_sizes ==
                                            household_size]

            if len(current_indexes) == 0:
                continue

            current_city_ids = self.city_id[current_indexes]

            meetings = [[] for i in range(household_size)]

            for city_id in self.city_ids:
                city_indexes = current_indexes[current_city_ids == city_id]

                split_indexes = cp.split(
                    city_indexes[:int(len(city_indexes) / household_size) *
                                 household_size], household_size)

                for i, s in enumerate(split_indexes):
                    meetings[i].append(s)

            self._household_meetings[household_size] = [
                cp.hstack(m) for m in meetings
            ]

        self._household_sizes[self._household_sizes == -1] = 2

        # === plot the distribution of households:

        check_plot_dir = self.config.get('check_plots')

        ensure_dir(check_plot_dir)

        if cuda:
            ages = cp.asnumpy(self.age)
            household_sizes = cp.asnumpy(self._household_sizes)

        else:
            ages = self.age
            household_sizes = self._household_sizes

        household_age_distribution(
            ages=ages,
            household_sizes=household_sizes,
            filepath=f'{check_plot_dir}/household-distributions.png')

        age_distribution(ages,
                         filepath=f'{check_plot_dir}/age-distributions.png')