Example #1
0
def load_data_wrapper():
    """Return a tuple containing ``(training_data, validation_data,
    test_data)``. Based on ``load_data``, but the format is more
    convenient for use in our implementation of neural networks.

    In particular, ``training_data`` is a list containing 50,000
    2-tuples ``(x, y)``.  ``x`` is a 784-dimensional numpy.ndarray
    containing the input image.  ``y`` is a 10-dimensional
    numpy.ndarray representing the unit vector corresponding to the
    correct digit for ``x``.

    ``validation_data`` and ``test_data`` are lists containing 10,000
    2-tuples ``(x, y)``.  In each case, ``x`` is a 784-dimensional
    numpy.ndarry containing the input image, and ``y`` is the
    corresponding classification, i.e., the digit values (integers)
    corresponding to ``x``.

    Obviously, this means we're using slightly different formats for
    the training data and the validation / test data.  These formats
    turn out to be the most convenient for use in our neural network
    code."""
    tr_d, va_d, te_d = load_data()
    training_inputs = [cp.reshape(cp.asarray(x), (784, 1)) for x in tr_d[0]]
    training_results = [vectorized_result(y) for y in tr_d[1]]
    training_data = list(zip(training_inputs, training_results))
    validation_inputs = [cp.reshape(cp.asarray(x), (784, 1)) for x in va_d[0]]
    validation_data = list(zip(validation_inputs, va_d[1]))
    test_inputs = [cp.reshape(cp.asarray(x), (784, 1)) for x in te_d[0]]
    test_data = list(zip(test_inputs, te_d[1]))
    return (training_data, validation_data, test_data)
Example #2
0
def lobe_calc(Left_Lobe, RightLobe, Four_Y, Four_X, FourXY, cutoff):
    lobe_shape = Left_Lobe.shape
    two_dims_rolled = lobe_shape[2] * lobe_shape[3]
    Left_Lobe = cp.reshape(Left_Lobe,
                           (lobe_shape[0], lobe_shape[1], two_dims_rolled))
    RightLobe = cp.reshape(RightLobe,
                           (lobe_shape[0], lobe_shape[1], two_dims_rolled))
    for pp in range(int(two_dims_rolled)):
        (ii, jj) = np.unravel_index(pp, (lobe_shape[2], lobe_shape[3]))
        xq = Four_X[ii, jj]
        yq = Four_Y[ii, jj]
        d_plus = (((Four_X + xq)**2) + ((Four_Y + yq)**2))**0.5
        d_minu = (((Four_X - xq)**2) + ((Four_Y - yq)**2))**0.5
        d_zero = FourXY

        ll = Left_Lobe[:, :, pp]
        ll[d_plus < cutoff] = 1
        ll[d_minu > cutoff] = 1
        ll[d_zero < cutoff] = 1
        Left_Lobe[:, :, pp] = ll

        rr = RightLobe[:, :, pp]
        rr[d_plus > cutoff] = 1
        rr[d_minu < cutoff] = 1
        rr[d_zero < cutoff] = 1
        RightLobe[:, :, pp] = rr
    Left_Lobe = cp.reshape(Left_Lobe, lobe_shape)
    RightLobe = cp.reshape(RightLobe, lobe_shape)
Example #3
0
def dot(a, b, out=None):
    """Returns a dot product of two arrays.

    For arrays with more than one axis, it computes the dot product along the
    last axis of ``a`` and the second-to-last axis of ``b``. This is just a
    matrix product if the both arrays are 2-D. For 1-D arrays, it uses their
    unique axis as an axis to take dot product over.

    Args:
        a (cupy.ndarray): The left argument.
        b (cupy.ndarray): The right argument.
        out (cupy.ndarray): Output array.

    Returns:
        cupy.ndarray: The dot product of ``a`` and ``b``.

    .. seealso:: :func:`numpy.dot`

    """
    a_ndim = a.ndim
    b_ndim = b.ndim
    assert a_ndim > 0 and b_ndim > 0
    a_is_vec = a_ndim == 1
    b_is_vec = b_ndim == 1

    if a_is_vec:
        a = cupy.reshape(a, (1, a.size))
        a_ndim = 2
    if b_is_vec:
        b = cupy.reshape(b, (b.size, 1))
        b_ndim = 2

    a_axis = a_ndim - 1
    b_axis = b_ndim - 2

    if a.shape[a_axis] != b.shape[b_axis]:
        raise ValueError('Axis dimension mismatch')

    if a_axis:
        a = cupy.rollaxis(a, a_axis, 0)
    if b_axis:
        b = cupy.rollaxis(b, b_axis, 0)

    k = a.shape[0]
    m = b.size // k
    n = a.size // k

    ret_shape = a.shape[1:] + b.shape[1:]
    if out is None:
        if a_is_vec:
            ret_shape = () if b_is_vec else ret_shape[1:]
        elif b_is_vec:
            ret_shape = ret_shape[:-1]
    else:
        if out.size != n * m:
            raise ValueError('Output array has an invalid size')
        if not out.flags.c_contiguous:
            raise ValueError('Output array must be C-contiguous')

    return _tensordot_core(a, b, out, n, m, k, ret_shape)
Example #4
0
def my_sum(S1, sig, varargin=None):
    # returns a running sum applied sequentially across a choice of dimensions and bin sizes
    # S1 is the matrix to be filtered
    # sig is either a scalar or a sequence of scalars, one for each axis to be filtered.
    #  it's the plus/minus bin length for the summing filter
    # varargin can be the dimensions to do filtering, if len(sig) != x.shape
    # if sig is scalar and no axes are provided, the default axis is 2
    idims = 1
    if varargin is not None:
        idims = varargin
    idims = _make_vect(idims)
    if _is_vect(idims) and _is_vect(sig):
        sigall = sig
    else:
        sigall = np.tile(sig, len(idims))

    for sig, idim in zip(sigall, idims):
        Nd = S1.ndim
        S1 = cp.transpose(S1, [idim] + list(range(0, idim)) +
                          list(range(idim + 1, Nd)))
        dsnew = S1.shape
        S1 = cp.reshape(S1, (S1.shape[0], -1), order='F')
        dsnew2 = S1.shape
        S1 = cp.concatenate((cp.full(
            (sig, dsnew2[1]), 0), S1, cp.full((sig, dsnew2[1]), 0)),
                            axis=0)
        Smax = S1[:dsnew2[0], :]
        for j in range(1, 2 * sig + 1):
            Smax = Smax + S1[j:j + dsnew2[0], :]
        S1 = cp.reshape(Smax, dsnew, order='F')
        S1 = cp.transpose(
            S1,
            list(range(1, idim + 1)) + [0] + list(range(idim + 1, Nd)))
    return S1
    def __init__(self, num_stride, padding, num_filter, filter_size,
                 input_shape):
        self.stride = num_stride
        self.padding = padding  # 'same' or 'valid'
        self.num_filter = num_filter
        self.filter_size = filter_size  # [a,b,c]

        self.weights = np.reshape(
            np.random.normal(
                0, 0.05,
                filter_size[0] * filter_size[1] * filter_size[2] * num_filter),
            (filter_size[0], filter_size[1], filter_size[2], num_filter))
        # height, width, channel, num_filter
        self.bias = np.reshape(np.random.normal(0, 0.05, num_filter),
                               (num_filter))
        self.dweights = np.zeros_like(self.weights)
        self.dbias = np.zeros_like(self.bias)

        self.input_shape = input_shape  # batch_size, height, width, channel
        self.output_shape = self.get_output_shape(
            input_shape)  # batch_size, height, width, filter

        # self.inputt = np.zeros_like(input_shape)

        self.padding_shape = self.get_pad_shape()
 def Fisher(self, key, label, batchsize, nb_class, convert_dim, dimension,
            affinity):
     label = cp.array(label)
     if (self.n_shot == 1):
         Sw = cp.identity(dimension, dtype='float32')
     else:
         Sw = self.local_cov_in_class(key.data, label, nb_class, batchsize,
                                      affinity)
         #Sw=self.local_cov_in_class_NN(key.data,label,nb_class,batchsize,5)
     Sb = self.local_cov_bet_class(key.data, label, nb_class, batchsize,
                                   affinity)
     #Sb=self.local_cov_bet_class_NN(key.data,label,nb_class,batchsize,5)
     Sb_Sw = Sb - 0.5 * Sw
     if (self.n_shot == 1):
         Sb_Sw = Sb
     lam, v = np.linalg.eigh(Sb_Sw)
     lam = cp.asarray(lam)
     v = cp.asarray(v)
     eigen_id = cp.argsort(lam)[::-1]
     eigen_value = lam[eigen_id]
     eigen_vector = v[:, eigen_id]
     W = eigen_vector[:, :convert_dim]
     W = cp.reshape(W, [dimension, convert_dim])
     W = W / cp.reshape(cp.linalg.norm(W, axis=0), [1, convert_dim])
     W = F.transpose(W)
     return W
    def forward(self, is_training=True):
        inputs = self.input_tensor
        # self.input_shape =inputs.shape
        gamma, beta = self.variables
        N, C, H, W = inputs.shape
        self.shape_field = tuple([i for i in range(2, inputs.ndim)])

        x_group = cp.reshape(inputs, (N, self.G, C // self.G, H, W))
        mean = cp.mean(x_group, axis=self.shape_field, keepdims=True)
        var = cp.var(x_group, axis=self.shape_field, keepdims=True)
        xgmu = x_group - mean
        sqrtvar = cp.sqrt(var + self.epsilon)
        x_group_norm = xgmu / sqrtvar

        x_norm = cp.reshape(x_group_norm, (N, C, H, W))

        outputs = gamma.output_tensor * x_norm + beta.output_tensor

        self.cache = (xgmu, sqrtvar, x_norm)

        self.output_tensor = outputs

        if self.require_grads:
            self.grads = cp.zeros_like(self.output_tensor)
        super().forward(is_training)
Example #8
0
def dot(a, b, out=None):
    """Returns a dot product of two arrays.

    For arrays with more than one axis, it computes the dot product along the
    last axis of ``a`` and the second-to-last axis of ``b``. This is just a
    matrix product if the both arrays are 2-D. For 1-D arrays, it uses their
    unique axis as an axis to take dot product over.

    Args:
        a (cupy.ndarray): The left argument.
        b (cupy.ndarray): The right argument.
        out (cupy.ndarray): Output array.

    Returns:
        cupy.ndarray: The dot product of ``a`` and ``b``.

    .. seealso:: :func:`numpy.dot`

    """
    a_ndim = a.ndim
    b_ndim = b.ndim
    assert a_ndim > 0 and b_ndim > 0
    a_is_vec = a_ndim == 1
    b_is_vec = b_ndim == 1

    if a_is_vec:
        a = cupy.reshape(a, (1, a.size))
        a_ndim = 2
    if b_is_vec:
        b = cupy.reshape(b, (b.size, 1))
        b_ndim = 2

    a_axis = a_ndim - 1
    b_axis = b_ndim - 2

    if a.shape[a_axis] != b.shape[b_axis]:
        raise ValueError('Axis dimension mismatch')

    if a_axis:
        a = cupy.rollaxis(a, a_axis, 0)
    if b_axis:
        b = cupy.rollaxis(b, b_axis, 0)

    k = a.shape[0]
    m = b.size // k
    n = a.size // k

    ret_shape = a.shape[1:] + b.shape[1:]
    if out is None:
        if a_is_vec:
            ret_shape = () if b_is_vec else ret_shape[1:]
        elif b_is_vec:
            ret_shape = ret_shape[:-1]
    else:
        if out.size != n * m:
            raise ValueError('Output array has an invalid size')
        if not out.flags.c_contiguous:
            raise ValueError('Output array must be C-contiguous')

    return _tensordot_core(a, b, out, n, m, k, ret_shape)
Example #9
0
def evol(s, B, U, chi, d):
    for i_bond in [0, 1]:
        ia = np.mod(i_bond - 1, 2)
        ib = np.mod(i_bond, 2)
        ic = np.mod(i_bond + 1, 2)
        chia = B[ib].shape[1]
        chic = B[ic].shape[2]
        # Construct theta matrix and time evolution #
        theta = cp.tensordot(B[ib], B[ic], axes=(2, 1))  # i a j b
        theta = cp.tensordot(U, theta, axes=([2, 3], [0, 2]))  # ip jp a b
        theta = cp.tensordot(cp.diag(s[ia]), theta, axes=([1, 2]))  # a ip jp b
        theta = cp.reshape(cp.transpose(theta, (1, 0, 2, 3)),
                           (d * chia, d * chic))  # ip a jp b
        # Schmidt decomposition #
        X, Y, Z = cp.linalg.svd(theta, full_matrices=0)
        chi2 = np.min([cp.sum(Y > 10.**(-10)).get(), chi])
        piv = cp.zeros(len(Y), cp.bool)
        piv[(cp.argsort(Y)[::-1])[:chi2]] = True
        Y = Y[piv]
        invsq = cp.sqrt(sum(Y**2))
        X = X[:, piv]
        Z = Z[piv, :]
        # Obtain the new values for B and s #
        s[ib] = Y / invsq
        X = cp.reshape(X, (d, chia, chi2))
        X = cp.transpose(cp.tensordot(cp.diag(s[ia]**(-1)), X, axes=(1, 1)),
                         (1, 0, 2))
        B[ib] = cp.tensordot(X, cp.diag(s[ib]), axes=(2, 0))
        B[ic] = cp.transpose(cp.reshape(Z, (chi2, d, chic)), (1, 0, 2))

    return s, B
Example #10
0
def jitter(raster, window):
    ntrials, nbins = raster.shape
    # if needed, pad to be divisible by window
    if nbins % window:
        pad = cp.zeros((ntrials, -nbins % window))
        raster = cp.concatenate([raster, pad], axis=1)
    nbins_rounded = raster.shape[1]
    n_jitter_bins = nbins_rounded // window

    # get psth
    psth = raster.mean(axis=0)

    # bin over window and sum
    raster_binned = cp.reshape(raster, (ntrials, window, n_jitter_bins)).sum(axis=1)
    psth_binned = cp.reshape(psth, (window, n_jitter_bins)).sum(axis=0)

    # determine correction
    correction = raster_binned / cp.expand_dims(psth_binned, 0)
    correction = cp.tile(cp.expand_dims(correction, 1), [1, window, 1])
    correction = cp.reshape(correction, (ntrials, nbins_rounded))

    # apply correction
    raster_jittered = cp.expand_dims(psth, 0) * correction

    # trim off padding
    raster_jittered = raster_jittered[:, :nbins]
    raster_jittered[cp.isnan(raster_jittered)] = 0
    return raster_jittered
def bss_eval_sources_cupy(reference_sources,
                          estimated_sources,
                          compute_permutation=True,
                          nsrc=2):
    """
    

    """

    # make sure the input is of shape (nsrc, nsampl)
    nsampl = estimated_sources.shape[-1]
    estimated_sources = cp.reshape(
        cp.array(estimated_sources, dtype=cp.float64), [nsrc, nsampl])
    reference_sources = cp.reshape(
        cp.array(reference_sources, dtype=cp.float64), [nsrc, nsampl])

    # does user desire permutations?
    if compute_permutation:
        # compute criteria for all possible pair matches
        sdr = cp.empty((nsrc, nsrc))
        sir = cp.empty((nsrc, nsrc))
        sar = cp.empty((nsrc, nsrc))
        for jest in range(nsrc):
            for jtrue in range(nsrc):
                s_true, e_spat, e_interf, e_artif = \
                    _bss_decomp_mtifilt_cupy(reference_sources,
                                        estimated_sources[jest],
                                        jtrue, 512, nsrc)
                sdr[jest,jtrue], sir[jest,jtrue], sar[jest,jtrue] = \
                    _bss_source_crit_cupy(s_true, e_spat, e_interf, e_artif)
        # select the best ordering
        perms = list(itertools.permutations(list(range(nsrc))))
        mean_sir = np.empty(len(perms))
        dum = np.arange(nsrc)
        for (i, perm) in enumerate(perms):
            mean_sir[i] = np.mean(cp.asnumpy(sir)[perm, dum])
        popt = perms[np.argmax(mean_sir)]
        idx = (popt, dum)
        return (cp.asnumpy(sdr)[idx], cp.asnumpy(sir)[idx],
                cp.asnumpy(sar)[idx], np.asarray(popt))
    else:
        # compute criteria for only the simple correspondence
        # (estimate 1 is estimate corresponding to reference source 1, etc.)
        sdr = np.empty(nsrc)
        sir = np.empty(nsrc)
        sar = np.empty(nsrc)
        for j in range(nsrc):
            s_true, e_spat, e_interf, e_artif = \
                _bss_decomp_mtifilt(reference_sources,
                                    estimated_sources[j],
                                    j, 512)
            sdr[j], sir[j], sar[j] = \
                _bss_source_crit(s_true, e_spat, e_interf, e_artif)

        # return the default permutation for compatibility
        popt = np.arange(nsrc)
        return (sdr, sir, sar, popt)
Example #12
0
def gather_indexes(sequence_tensor, positions):
    """Gathers the vectors at the specific positions over a minibatch."""
    batch_size, seq_length, width = sequence_tensor.shape

    flat_offsets = np.reshape(np.arange(0, batch_size, dtype=np.int32) * seq_length, [-1, 1])
    flat_positions = np.reshape(positions + flat_offsets, [-1])
    flat_sequence_tensor = np.reshape(sequence_tensor,
                                      [batch_size * seq_length, width])
    output_tensor = flat_sequence_tensor[[flat_positions]]
    return output_tensor
Example #13
0
def updater(x_row,updated_h,weights,num_features,num_models,learning_rate):
    x_row = cp.array(x_row.toarray())
    x_row = cp.reshape(x_row,(1,num_features))
    #x_row = x_row.reshape(1, num_features)
    #x_row = cupyx.scipy.sparse.csr_matrix(x_row)
    updated_h = cp.array(updated_h)
    updated_h = cp.reshape(updated_h, (num_models, 1))
    #update = cupyx.scipy.sparse.csr_matrix.dot(updated_h, x_row) * learning_rate
    update = cp.dot(updated_h,x_row) * learning_rate
    weights += update
    cp.cuda.Stream.null.synchronize()
Example #14
0
def cupy_jit_resizer4D(data4D,resized_size,return_numpy=False):
    data_size = np.shape(data4D)
    flattened_shape = (data_size[0]*data_size[1],data_size[2]*data_size[3])
    data4D_flatten = cp.reshape(cp.asarray(data4D),flattened_shape)
    flat_res_shape = (data_size[0]*data_size[1],resized_size[0]*resized_size[1])
    flatres4D = cp.zeros(flat_res_shape,dtype=data4D.dtype)
    cupy_jit_2D_xdim[1,32](data4D_flatten,flat_res_shape[1],flatres4D,flat_res_shape[0]) # call numba.cuda kernel with 1 block and 32 threads in that block
    res4D = cp.reshape(flatres4D,(data_size[0],data_size[1],resized_size[0],resized_size[1]))
    if return_numpy:
       res4D = cp.asnumpy(res4D)
    return res4D
def check_eigenvalues(PHI):
    a, b, c, d, e, f, g, h, i = cp.reshape(PHI[3:12, :], [9, 1, J])
    a0 = -a * e * i + a * f * h + b * d * i - b * f * g - c * d * h + c * e * g
    a1 = a * e - b * d + a * i - c * g + e * i - f * h
    a2 = -a - e - i
    #1
    bb1 = (abs(a2 + a0) <= 1 + a1).astype(int)
    #2
    bb2 = (abs(a2 - 3 * a0) <= (3 - a1)).astype(int)
    #3
    bb3 = ((a0**2 + a1 - a0 * a2) <= 1).astype(int)
    return cp.reshape(((bb1 + bb2 + bb3 != 3).astype(int)), [J])
def visualizeWeights(w1, w2, w3):
    fig, (ax1, ax2, ax3) = plt.subplots(nrows=1, ncols=3)
    new_w1 = cp.reshape(w1[:-1], (48, 48))
    new_w2 = cp.reshape(w2[:-1], (48, 48))
    new_w3 = cp.reshape(w3[:-1], (48, 48))
    ax1.imshow(new_w1)
    ax1.set_title('Part A')
    ax2.imshow(new_w2)
    ax2.set_title('Part B')
    ax3.imshow(new_w3)
    ax3.set_title('Part C')
    fig.show()
Example #17
0
 def predict_row(self, x):  # x.shape=[N,2]
     #pdb.set_trace()
     x = self.normX(cp.asarray(x))
     dist = cp.tile(self.X, [x.shape[0], 1]) - cp.reshape(
         cp.tile(x, [1, self.X.shape[0]]),
         [self.X.shape[0] * x.shape[0], 2])
     Psi = cp.reshape(
         cp.exp(
             -cp.sum(self.theta * cp.power(cp.abs(dist), self.pl), axis=1)),
         [x.shape[0], self.X.shape[0]])  # 次元方向に和
     ccc = Psi.dot(self.bbb)
     fff = ccc + self.mu
     return cp.asnumpy(self.inversenormy(fff))
    def decoder(self, z):
        #self.d_out : reconstruction image 28x28

        self.z = np.reshape(z, (self.batch_size, self.nz))

        self.d_h0_l = self.z.dot(self.d_W0) + self.d_b0
        self.d_h0_a = relu(self.d_h0_l)

        self.d_h1_l = self.d_h0_a.dot(self.d_W1) + self.d_b1
        self.d_h1_a = sigmoid(self.d_h1_l)

        self.d_out = np.reshape(self.d_h1_a, (self.batch_size, 28, 28, 1))

        return self.d_out
Example #19
0
def cupy_resizer4D(data4D, resized_size, return_numpy=False):
    data4D = cp.asarray(data4D)
    data_size = data4D.shape
    flattened_shape = (data_size[0] * data_size[1],
                       data_size[2] * data_size[3])
    data4D = cp.reshape(data4D, flattened_shape)
    flat_res_shape = (data_size[0] * data_size[1],
                      resized_size[0] * resized_size[1])
    res4D = cp.zeros(flat_res_shape, dtype=data4D.dtype)
    res4D = cupy_xdim_res_loop(data4D, res4D, flat_res_shape[0])
    res4D = cp.reshape(
        res4D, (data_size[0], data_size[1], resized_size[0], resized_size[1]))
    if return_numpy:
        res4D = cp.asnumpy(res4D)
    return res4D
Example #20
0
def hebbian_rule(W,
                 b,
                 zeta,
                 a,
                 prev_layer,
                 activation_func,
                 den_activation,
                 y,
                 w=None,
                 d=None):
    dW = {}
    dB = {}
    delta = None

    try:
        batch_size = y.shape[1]
    except IndexError:
        batch_size = 1
        y = cp.reshape(y, (y.shape[0], batch_size))

    y = cp.argmax(y, axis=0).reshape((1, y.shape[1]))

    a['s'] = cp.reshape(a['s'], (a['s'].shape[0], 1, a['s'].shape[1]))
    out_in = cp.einsum('nij,nkj->nik', a['s'], a['d'])
    out_w = cp.einsum('nik,nij->nkj', a['s'], W['s'])
    out_w_out = cp.einsum('nik,nji->njk', out_w, a['s'])
    dW['s'] = (1 / batch_size) * (out_in - out_w_out)

    out_b = cp.einsum('nik,nij->nkj', a['s'], b['s'])
    out_b_out = cp.einsum('nik,nji->njk', out_b, a['s'])

    dB['s'] = (1 / batch_size) * cp.sum(y, axis=1)
    dB['s'] = cp.reshape(dB['s'], (dB['s'].shape[0], 1, 1))

    # prev_layer = cp.reshape(prev_layer,(prev_layer.shape[0],1,prev_layer.shape[1]))

    out_in = cp.einsum('nij,kj->nik', a['d'], prev_layer)
    out_w = cp.einsum('nik,nij->nkj', a['d'], W['d'])
    out_w_out = cp.einsum('nik,nji->njk', out_w, a['d'])
    dW['d'] = (1 / batch_size) * (out_in - out_w_out)

    out_b = cp.einsum('nik,nij->nkj', a['d'], b['d'])
    out_b_out = cp.einsum('nik,nji->njk', out_b, a['d'])
    dB['d'] = (out_in - out_b_out)
    dB['d'] = (1 / batch_size) * cp.sum(dB['d'], axis=2)
    dB['d'] = cp.reshape(dB['d'], (dB['d'].shape[0], dB['d'].shape[1], 1))

    return [dW, dB, delta]
Example #21
0
    def get_atom_dists(self):
        if os.path.isfile(self.file):
            pdb_file = open(self.file,'r')
        else:
            raise OSError('File {} does not exist'.format(self.file))

        lineno = 0
        frames = []
        atoms = []
        val_frames = []
        val_atoms = []
        
        for line in pdb_file:
            lineno += 1
            if line.startswith('ATOM'):
                try:
                    at_obj = PDBAtom(line)
                    atoms.append([at_obj.x, at_obj.y, at_obj.z])
                    val_atoms.append(at_obj.valence_count)
                except:
                    sys.stderr.write('\nProblem parsing line {} in file {}\n'.format(lineno, self.file))
                    sys.stderr.write(line)
                    sys.stderr.write('Probably ATOM entry is formatted incorrectly?\n')
                    sys.stderr.write('Please refer to - http://www.wwpdb.org/documentation/format32/sect9.html#ATOM\n\n')
                    sys.exit(1)
            elif line.startswith('END'):
                frames.append(atoms)
                atoms = []
                val_frames.append(val_atoms)
                val_atoms = []
        pdb_file.close()
    
        base = cp.zeros((len(framesindices), len(frames[0]), 3))
        for i in range(len(framesindices)):
            for j in range(len(frames[i])):
                for k in range(len(frames[i][j])):
                    base[i][j][k] = frames[i][j][k]
        dists = cp.reshape(base, (len(framesindices), 1, len(frames[0]), 3)) - cp.reshape(base, (len(framesindices), len(frames[0]), 1, 3))  
        cp.cuda.Stream.null.synchronize()
        dists = dists**2
        dists = dists.sum(3)
        dists = cp.sqrt(dists)
        cp.cuda.Stream.null.synchronize()
        
        self.valence_list = val_frames
        self.distance_graphs = dists
        
        return self.distance_graphs
Example #22
0
    def update_path(self, quantile):
        w = 1 / self.population * w_quantile(self.population, quantile)
        w /= cp.sum(w)
        w = w[:, cp.newaxis]

        eta_m = 1.0
        eta_c = 1.0 / ((self.in_dim * self.out_dim)**2 * cp.sum(w**2))

        self.W_mean = cp.array(self.W_mean)
        self.W_cov = cp.array(self.W_cov)
        self.b_mean = cp.array(self.b_mean)
        self.b_cov = cp.array(self.b_cov)

        W_mean_ = self.W_mean
        b_mean_ = self.b_mean

        self.W_mean = self.W_mean + eta_m * cp.sum(
            w *
            (cp.reshape(self.W, [self.population, self.in_dim * self.out_dim])
             - self.W_mean),
            axis=0)
        self.W_p_sig = (1 - self.W_c_sig) * self.W_p_sig + cp.sqrt(
            1 - (1 - self.W_c_sig)**2) * cp.sqrt(1 / np.sum(w**2)) * cp.sqrt(
                1 / self.W_cov) * (self.W_mean - W_mean_) / self.W_sigma
        self.W_sigma = self.W_sigma * cp.exp(
            self.W_c_sig * (cp.linalg.norm(self.W_p_sig) /
                            (math.sqrt(self.in_dim * self.out_dim) *
                             (1 - 1 / (4 * self.in_dim * self.out_dim) + 1 /
                              (21 * ((self.in_dim * self.out_dim)**2)))) - 1))
        self.b_mean = self.b_mean + eta_m * cp.sum(w * (self.b - self.b_mean),
                                                   axis=0)
        self.b_p_sig = (1 - self.b_c_sig) * self.b_p_sig + cp.sqrt(
            1 - (1 - self.b_c_sig)**2) * cp.sqrt(1 / np.sum(w**2)) * cp.sqrt(
                1 / self.b_cov) * (self.b_mean - b_mean_) / self.b_sigma
        self.b_sigma = self.b_sigma * cp.exp(self.b_c_sig *
                                             (cp.linalg.norm(self.b_p_sig) /
                                              (math.sqrt(self.out_dim) *
                                               (1 - 1 /
                                                (4 * self.out_dim) + 1 /
                                                (21 *
                                                 (self.out_dim**2)))) - 1))

        self.W_cov = self.W_cov + eta_c * cp.sum(w * ((
            (cp.reshape(self.W, [self.population, self.in_dim * self.out_dim])
             - W_mean_) / self.W_sigma)**2 - self.W_cov),
                                                 axis=0)
        self.b_cov = self.b_cov + eta_c * cp.sum(
            w * (((self.b - b_mean_) / self.b_sigma)**2 - self.b_cov), axis=0)
Example #23
0
def kron(matrix1, matrix2):
    """Kronecker product"""
    s1, s2 = matrix1.shape
    s3, s4 = matrix2.shape
    return cp.reshape(
                matrix1.reshape((s1, 1, s2, 1))*matrix2.reshape((1, s3, 1, s4)),
                (s1*s3, s2*s4))
 def _derivativenorm(self):
     """Compute the derivative of the norm
     Returns
     -------
     derivative : numpy array, shape (m_parameters,)
     """
     w2 = cp.reshape(self.w,(self.n_features,self.d,self.D,self.D))
     derivative = cp.zeros((self.n_features,self.d,self.D,self.D)) 
     
     tmp=cp.zeros((self.n_features,self.D))
     tmp2=cp.zeros((self.n_features,self.D))
     tmp[0,:]=cp.sum(cp.square(w2[0,:,0,:]),0)
     for i in range(1,self.n_features-1):
         tmp[i,:]=cp.dot(tmp[i-1,:],cp.sum(cp.square(w2[i,:,:,:]),0)) 
     tmp[self.n_features-1,:]=cp.inner(tmp[self.n_features-2,:],
             cp.sum(cp.square(w2[self.n_features-1,:,:,0]),0))
     tmp2[self.n_features-1,:]=cp.sum(cp.square(w2[self.n_features-1,:,:,0]),0)
     for i in range(self.n_features-2,-1,-1):
         tmp2[i,:]=cp.dot(cp.sum(cp.square(w2[i,:,:,:]),0),tmp2[i+1,:])
     tmp2[0,:]=cp.inner(cp.sum(cp.square(w2[0,:,0,:]),0),tmp2[1,:])
 
     for j in range(self.d):
         derivative[0,j,0,:]=cp.multiply(tmp2[1,:],2*(w2[0,j,0,:]))
         derivative[self.n_features-1,j,:,0]=\
             cp.multiply(tmp[self.n_features-2,:],2*(w2[self.n_features-1,j,:,0]))
     for i in range(1,self.n_features-1):
         temp3=cp.outer(tmp[i-1,:],tmp2[i+1,:])
         for j in range(self.d):
             derivative[i,j,:,:]=cp.multiply(temp3,2*(w2[i,j,:,:]))
     return derivative.reshape(self.m_parameters)
Example #25
0
def objective(trial):
    seq_len = trial.suggest_int('seq_len', 10, 60)
    N=10
    datas=[]
    for i in range(N):
        l=len(data)*i//N
        r=len(data)*(i+1)//N
        datas.append(data[l:r])
    #train_iter = LSTM_Iterator(train, batch_size=10, seq_len=seq_len)

    # 学習結果の保存
    logs=[]
    for i in range(N):
        print("phase :" + str(i) )
        #test = [v.tolist() for v in datas[i]]
        test = datas[i]
        p=datas[0:i] + datas[i+1:N]
        #p=np.asarray(p)
        train = [v.tolist() for v in np.reshape(p, -1)]
        train = cp.array(train, dtype=cp.float32)
        logs.append(learn(trial, train, test, seq_len))

    logsum = {}
    for v in logs:
        for key, value in v.items():
            logsum[key] = logsum.get(key, 0) + value/N

    print(logsum)
    for key, value in logsum.items():
        trial.set_user_attr(key, value)

    # 最終的なバリデーションの値を返す
    val_err = logsum['validation/main/loss']
    return val_err
Example #26
0
def inject_error(weight, mask0, mask1, num_bits=32):
    if num_bits == 32:
        dtype = cp.uint32
        ftype = cp.float32
    shape = weight.shape
    weight_flatten = cp.ravel(weight).view(dtype)
    mask0, mask0_bit = mask0
    mask1, mask1_bit = mask1
    zero = cp.zeros(1, dtype=dtype)

    if (mask0.__len__() is not 0) or (mask1.__len__() is not 0):
        for b in range(num_bits):
            fault = cp.full(weight_flatten.size, 2**b, dtype=dtype)
            bit_loc0 = cp.where(mask0_bit == b, mask0, zero).nonzero()[0]
            bit_loc1 = cp.where(mask1_bit == b, mask1, zero).nonzero()[0]
            uniform0 = cp.zeros(weight_flatten.size, dtype=dtype)
            uniform1 = cp.zeros(weight_flatten.size, dtype=dtype)
            # Inject bit error
            if bit_loc0.__len__() > 0:
                cp.put(uniform0, mask0[bit_loc0], fault)
                cp.put(uniform1, mask1[bit_loc1], fault)
                # Stuck at 0
                not_mask0 = cp.invert(uniform0)
                weight_flatten = cp.bitwise_and(weight_flatten, not_mask0)
                # Stuck at 1
                weight_flatten = cp.bitwise_or(weight_flatten, uniform1)
        weight_float = weight_flatten.view(ftype)
        return cp.reshape(weight_float, shape)
    else:
        return weight
Example #27
0
def my_conv2(x, sig, varargin=None, **kwargs):
    # TODO: Fix so output matches my_conv2_cpu
    # x is the matrix to be filtered along a choice of axes
    # sig is either a scalar or a sequence of scalars, one for each axis to be filtered
    # varargin can be the dimensions to do filtering, if len(sig) != x.shape
    # if sig is scalar and no axes are provided, the default axis is 2
    if sig <= .25:
        return x
    idims = 1
    if varargin is not None:
        idims = varargin
    idims = _make_vect(idims)
    if _is_vect(idims) and _is_vect(sig):
        sigall = sig
    else:
        sigall = np.tile(sig, len(idims))

    for sig, idim in zip(sigall, idims):
        Nd = x.ndim
        x = cp.transpose(x, [idim] + list(range(0, idim)) +
                         list(range(idim + 1, Nd)))
        dsnew = x.shape
        x = cp.reshape(x, (x.shape[0], -1), order='F')

        tmax = ceil(4 * sig)
        dt = cp.arange(-tmax, tmax + 1)
        gaus = cp.exp(-dt**2 / (2 * sig**2))
        gaus = gaus / cp.sum(gaus)

        y = convolve_gpu(x, gaus, **kwargs)
        y = y.reshape(dsnew, order='F')
        y = cp.transpose(
            y,
            list(range(1, idim + 1)) + [0] + list(range(idim + 1, Nd)))
    return y
def forward_prop(x, local_time, sequence, isFirst, timestamp, satellite_name):

    s = cp.empty([local_time, distance_forward, channels_hidden, M, N])

    e = cp.empty([local_time, distance_forward])

    alpha = cp.empty([local_time, distance_forward])

    p = cp.empty([local_time, channels_p, M, N])

    # Hidden Unit
    h = cp.empty([local_time + 1, channels_hidden, M, N])
    h[-1] = cp.zeros([channels_hidden, M, N])
    # LSTM FORWARD PROPAGATION
    for t in np.arange(local_time):

        # Attention Network
        for z in range(
                timestamp + t - (distance + learning_window), timestamp +
                distance_forward + t - (distance + learning_window)):
            temp = cp.concatenate(
                (cp.asarray(satellite_images[sequence][z]), h[t - 1]), axis=0)
            s[t][z - (timestamp + t - (distance + learning_window))] = tanh(
                cp.asarray(
                    F.convolution_2d(temp.reshape(
                        1, channels_img + channels_hidden, M, N),
                                     e_kernel,
                                     b=None,
                                     pad=pad_constant)[0].data) + bias_e)
            s_temp = s[t][z - (timestamp + t -
                               (distance + learning_window))].reshape(
                                   M * N * channels_hidden)
            e[t][z - (timestamp + t - (distance + learning_window))] = cp.dot(
                v_connected_weights,
                s_temp) + bias_v[z - (timestamp + t -
                                      (distance + learning_window))]

        xtemp = satellite_images[sequence][timestamp - distance:timestamp -
                                           distance + distance_forward, 0]

        alpha[t] = softmax(e[t])
        p[t] = cp.tensordot(alpha[t], cp.asarray(xtemp), axes=1).reshape(
            1, M, N)  # Sum all x arrays up, weighted array

        temporary = cp.concatenate((x[t], p[t], h[t - 1]), axis=0)
        temporary = temporary.reshape(
            1, channels_img + channels_p + channels_hidden, M, N)

        h[t] = tanh(
            cp.asarray(
                F.convolution_2d(temporary, main_kernel, b=None, pad=2)
                [0].data) + bias_h)

    # 1 x 1 convolution
    output = cp.matmul(connected_weights, h[local_time - 1].reshape(
        channels_hidden, M * N)).reshape(M, N) + bias_y[0]
    true_output = rect_linear(output)

    return true_output, output, cp.reshape(
        h[local_time - 1], (channels_hidden, M * N)), p, h, s, e, alpha, xtemp
Example #29
0
def _reshape_nd(arr, ndim, dim):
    """Reshape a 1D array to have n dimensions, all singletons but one.

    Parameters
    ----------
    arr : array, shape (N,)
        Input array
    ndim : int
        Number of desired dimensions of reshaped array.
    dim : int
        Which dimension/axis will not be singleton-sized.

    Returns
    -------
    arr_reshaped : array, shape ([1, ...], N, [1,...])
        View of `arr` reshaped to the desired shape.

    Examples
    --------
    >>> arr = cp.random.random(7)
    >>> _reshape_nd(arr, 2, 0).shape
    (7, 1)
    >>> _reshape_nd(arr, 3, 1).shape
    (1, 7, 1)
    >>> _reshape_nd(arr, 4, -1).shape
    (1, 1, 1, 7)
    """
    kernel_shape = _kernel_shape(ndim, dim)
    return cp.reshape(arr, kernel_shape)
    def __call__(self, x, inference=False):
        if self.mask is None:
            self.mask = self.xp.zeros(
                (self.n_centroid, x.shape[1], x.shape[2]), dtype=np.float32)
            self.mask[:, :, :self.length] = 1
            self.centroid.initialize((self.n_centroid, x.shape[1], x.shape[2]))
        I = np.broadcast_to(np.arange(x.shape[0]),
                            (self.n_centroid, x.shape[0])).T
        J = np.broadcast_to(np.arange(self.n_centroid), I.shape)
        I = np.ravel(I)
        J = np.ravel(J)

        centers = F.softmax(self.centroid) * self.mask
        if inference:
            d = edit_distance(x[I], centers.data[J]).astype(np.float32)
            d = cupy.reshape(d, (x.shape[0], self.n_centroid))
            centroid_indexes = np.argmin(cupy.asnumpy(d), axis=1)
            d = d[np.arange(len(centroid_indexes)), centroid_indexes]
            return F.mean(d), centroid_indexes
        else:
            d = soft_edit_distance(x[I], centers[J], self.tau1)
            d = F.reshape(d, (x.shape[0], self.n_centroid))
            coef = F.softmax(-d * self.tau2)
            S = F.broadcast_to(F.sum(coef, axis=0), coef.shape)

            d = F.sum(d * coef / S)
            return d / self.n_centroid
            #d = F.min(d, axis=1)
            return F.mean(d)
Example #31
0
def compute3Dpositions(item_path, file_path, xp=np):
    K = getcamK(file_path, xp=xp)

    fx = K[0, 0]
    fy = K[1, 1]
    u0 = K[0, 2]
    v0 = K[1, 2]

    u = xp.tile(xp.arange(1, 641), (480, 1))
    v = xp.expand_dims(xp.arange(1, 481), axis=1)
    v = xp.tile(v, (1, 640))

    u_u0_by_fx = (u - u0) / fx
    v_v0_by_fy = (v - v0) / fy

    with open(item_path, 'r') as f:
        lines = f.read()
    lines = lines.split()
    str2mat = xp.array(lines, dtype=xp.float64)
    z = xp.reshape(str2mat, (480, 640))

    z = z / xp.sqrt(u_u0_by_fx**2 + v_v0_by_fy**2 + 1)

    x = ((u - u0) / fx) * z
    y = ((v - v0) / fy) * z

    return z
Example #32
0
def take(a, indices, axis=None, out=None):
    """Takes elements of an array at specified indices along an axis.

    This is an implementation of "fancy indexing" at single axis.

    This function does not support ``mode`` option.

    Args:
        a (cupy.ndarray): Array to extract elements.
        indices (int or array-like): Indices of elements that this function
            takes.
        axis (int): The axis along which to select indices. The flattened input
            is used by default.
        out (cupy.ndarray): Output array. If provided, it should be of
            appropriate shape and dtype.

    Returns:
        cupy.ndarray: The result of fancy indexing.

    .. seealso:: :func:`numpy.take`

    """
    if axis is None:
        a = a.ravel()
        lshape = ()
        rshape = ()
    else:
        if axis >= a.ndim:
            raise ValueError('Axis overrun')
        lshape = a.shape[:axis]
        rshape = a.shape[axis + 1:]

    if numpy.isscalar(indices):
        a = cupy.rollaxis(a, axis)
        if out is None:
            return a[indices].copy()
        else:
            out[:] = a[indices]
            return out
    elif not isinstance(indices, cupy.ndarray):
        indices = cupy.array(indices, dtype=int)

    out_shape = lshape + indices.shape + rshape
    if out is None:
        out = cupy.empty(out_shape, dtype=a.dtype)
    else:
        if out.dtype != a.dtype:
            raise TypeError('Output dtype mismatch')
        if out.shape != out_shape:
            raise ValueError('Output shape mismatch')

    cdim = indices.size
    rdim = internal.prod(rshape)
    indices = cupy.reshape(
        indices, (1,) * len(lshape) + indices.shape + (1,) * len(rshape))
    return _take_kernel(a, indices, cdim, rdim, out)
Example #33
0
 def __call__(self, hs, path_through, test=False):
     """ zのフォーマットはshape(1, 512, 2, 2)となっており、512bitが最大値だと思われる """
     """ 
     path_throughのフォーマットはshape(256)なので、全然足りない
     4倍して,2,2で切って、代入する
     """
     path_through_4 = cp.repeat(path_through, 4).astype('float32')
     path_through_2x2 = chainer.cuda.to_gpu(cp.reshape(path_through_4, (1, 256, 2, 2)) )
     #print("orig shape.", hs[-1].shape)
     #print("tag shape.", path_through_2x2.shape)
     #print("tag object.", type(path_through_2x2))
     #print("shape, ", type(hs), type(hs[-1]))
     hs[-1] = F.concat( (hs[-1], Variable(path_through_2x2)) )
     """
     - device: <CUDA Device 0>
     - volatile: OFF
     - backend: <class 'cupy.core.core.ndarray'>
     - shape: (1, 512, 2, 2)
     - dtype: float32
     """
     #cupy.concat(hs[-1],) 
     """
     import numpy as np
     import cupy as cp
     import cupy.manipulation.join as cujoin
     from chainer import Variable
     cucat = cujoin.concatenate
     path_through
     for i in range(len(path_through)-4):
         p1, p2, p3, p4 = path_through[i:i+4]
         sample = cp.array([[[[p1, p2], [p3, p4]]]]).astype('float32')
         break
     #print( sample.shape )
     vsample = Variable(sample)
     import chainer.functions.array.concat as cat
     hs[-1] = cat.concat([hs[-1], vsample])
     """
     #print("hs", hs[-1], hs[-1].__len__(), hs[-1].debug_print())
     h = self.c0(hs[-1], test=test)
     for i in range(1,8):
         h = F.concat([h, hs[-i-1]])
         if i<7:
             h = self['c%d'%i](h, test=test)
         else:
             h = self.c7(h)
     return h
Example #34
0
def _tensordot_core(a, b, out, n, m, k, ret_shape):
    ret_dtype = a.dtype.char
    if ret_dtype != b.dtype.char:
        ret_dtype = numpy.find_common_type((ret_dtype, b.dtype), ()).char

    # Cast to float32 or float64
    if ret_dtype == 'f' or ret_dtype == 'd':
        dtype = ret_dtype
    else:
        dtype = numpy.find_common_type((ret_dtype, 'f'), ()).char

    a = a.astype(dtype, copy=False)
    b = b.astype(dtype, copy=False)

    if not a.size or not b.size:
        if a.size or b.size:
            raise ValueError('cannot dot zero-sized and non-zero-sized arrays')
        if out is None:
            return cupy.zeros(ret_shape, dtype=ret_dtype)
        else:
            out.fill(0)
            return out

    if out is None:
        out = cupy.empty(ret_shape, dtype)
        if dtype == ret_dtype:
            ret = out
        else:
            ret = cupy.empty(ret_shape, ret_dtype)
    else:
        ret = out
        if out.dtype != dtype:
            out = cupy.empty(ret_shape, dtype)

    # It copies the operands if needed
    if a.shape != (k, n):
        a = cupy.reshape(a, (k, n))
    if b.shape != (k, m):
        b = cupy.reshape(b, (k, m))
    c = out
    if c.shape != (n, m):
        c = c.view()
        c.shape = (n, m)

    # Be careful that cuBLAS uses the FORTRAN-order matrix representation.
    if k == 1:
        if n == 1:
            # Scalar-vector product
            cupy.multiply(a, b, c)
        elif m == 1:
            # Scalar-vector product
            cupy.multiply(a.T, b, c)
        else:
            # Outer product A^T * B
            # c is C-contiguous while cuBLAS requires F-contiguous arrays, so
            # we compute C^T = B^T * A here.
            handle = cuda.Device().cublas_handle
            c.fill(0)
            a, inca = _to_cublas_vector(a, 1)
            b, incb = _to_cublas_vector(b, 1)
            if dtype == 'f':
                ger = cublas.sger
            elif dtype == 'd':
                ger = cublas.dger
            ger(handle, m, n, 1, b.data.ptr, incb, a.data.ptr, inca,
                c.data.ptr, m)

        if dtype != ret_dtype:
            elementwise.copy(out, ret)
        return ret

    handle = cuda.Device().cublas_handle
    if n == 1:
        if m == 1:
            # Inner product
            a, inca = _to_cublas_vector(a, 0)
            b, incb = _to_cublas_vector(b, 0)
            mode = cublas.getPointerMode(handle)
            cublas.setPointerMode(handle,
                                  cublas.CUBLAS_POINTER_MODE_DEVICE)
            if dtype == 'f':
                dot = cublas.sdot
            elif dtype == 'd':
                dot = cublas.ddot
            try:
                dot(handle, k, a.data.ptr, inca, b.data.ptr, incb, c.data.ptr)
            finally:
                cublas.setPointerMode(handle, mode)
        else:
            # Matrix-vector product B^T * A
            a, inca = _to_cublas_vector(a, 0)
            b, transb, ldb = _mat_to_cublas_contiguous(b, 1)
            if transb:
                # gemv requires (m, k) as the original matrix dimensions
                # rather than the transposed dimensions.
                m, k = k, m
            if dtype == 'f':
                gemv = cublas.sgemv
            elif dtype == 'd':
                gemv = cublas.dgemv
            gemv(handle, transb, m, k, 1, b.data.ptr, ldb, a.data.ptr, inca,
                 0, c.data.ptr, 1)
    elif m == 1:
        # Matrix-vector product A^T * B
        a, transa, lda = _mat_to_cublas_contiguous(a, 1)
        b, incb = _to_cublas_vector(b, 0)
        if transa:
            # gemv requires (n, k) as the original matrix dimensions rather
            # than the transposed dimensions.
            n, k = k, n
        if dtype == 'f':
            gemv = cublas.sgemv
        elif dtype == 'd':
            gemv = cublas.dgemv
        gemv(handle, transa, n, k, 1, a.data.ptr, lda, b.data.ptr, incb, 0,
             c.data.ptr, 1)
    else:
        # Matrix-Matrix product A^T * B
        # c is C-contiguous while cuBLAS assumes F-contiguous inputs, so we
        # compute C^T = B^T * A here.
        a, transa, lda = _mat_to_cublas_contiguous(a, 0)
        b, transb, ldb = _mat_to_cublas_contiguous(b, 1)
        if dtype == 'f':
            gemm = cublas.sgemm
        elif dtype == 'd':
            gemm = cublas.dgemm
        gemm(handle, transb, transa, m, n, k, 1, b.data.ptr, ldb, a.data.ptr,
             lda, 0, c.data.ptr, m)

    if dtype != ret_dtype:
        elementwise.copy(out, ret)
    return ret
Example #35
0
    def choice(self, a, size=None, replace=True, p=None):
        """Returns an array of random values from a given 1-D array.

        .. seealso::
            :func:`cupy.random.choice` for full document,
            :meth:`numpy.random.choice`

        """
        if a is None:
            raise ValueError('a must be 1-dimensional or an integer')
        if isinstance(a, cupy.ndarray) and a.ndim == 0:
            raise NotImplementedError
        if isinstance(a, six.integer_types):
            a_size = a
            if a_size <= 0:
                raise ValueError('a must be greater than 0')
        else:
            a = cupy.array(a, copy=False)
            if a.ndim != 1:
                raise ValueError('a must be 1-dimensional or an integer')
            else:
                a_size = len(a)
                if a_size == 0:
                    raise ValueError('a must be non-empty')

        if p is not None:
            p = cupy.array(p)
            if p.ndim != 1:
                raise ValueError('p must be 1-dimensional')
            if len(p) != a_size:
                raise ValueError('a and p must have same size')
            if not (p >= 0).all():
                raise ValueError('probabilities are not non-negative')
            p_sum = cupy.sum(p).get()
            if not numpy.allclose(p_sum, 1):
                raise ValueError('probabilities do not sum to 1')

        if not replace:
            raise NotImplementedError

        if size is None:
            raise NotImplementedError
        shape = size
        size = numpy.prod(shape)

        if p is not None:
            p = cupy.broadcast_to(p, (size, a_size))
            index = cupy.argmax(cupy.log(p) -
                                cupy.random.gumbel(size=(size, a_size)),
                                axis=1)
            if not isinstance(shape, six.integer_types):
                index = cupy.reshape(index, shape)
        else:
            index = cupy.random.randint(0, a_size, size=shape)
            # Align the dtype with NumPy
            index = index.astype(cupy.int64, copy=False)

        if isinstance(a, six.integer_types):
            return index

        if index.ndim == 0:
            return cupy.array(a[index], dtype=a.dtype)

        return a[index]