def sliced_rows_batch_scaled_add(stream, embd_rows_indxs, nrows, ncols, alpha, dense_matrices, embd_nrows, embd_ncols, embd_matrix): status = gpu_matrix_kernels._slicedRowsBatchScaledAdd( stream, embd_rows_indxs, nrows, ncols, alpha, dense_matrices, embd_nrows, embd_ncols, embd_matrix) cudart.check_cuda_status(status)
def add_scaled_columns_slice(stream, nrows, ncols, alpha, dense_matrix, embedding_column_indxs, embedding_matrix): status = gpu_matrix_kernels._addScaledColumnsSlice(stream, nrows, ncols, alpha, dense_matrix, embedding_column_indxs, embedding_matrix) cudart.check_cuda_status(status)
def batch_horizontal_split(stream, n, nrows, x_ncols, y_ncols, matrices, x_matrices, y_matrices): status = gpu_matrix_kernels._batchHorizontalSplit(stream, n, nrows, x_ncols, y_ncols, matrices, x_matrices, y_matrices) cudart.check_cuda_status(status)
def softmax_ce_derivative(stream, batchSize, num_classes, probs, target_classes, derivatives): status = gpu_matrix_kernels._softmaxCeDerivative(stream, batchSize, num_classes, probs, target_classes, derivatives) cudart.check_cuda_status(status)
def slice_rows_int(stream, embedding_matrix_nrows, embedding_row_indxs, embedding_matrix, nrows, ncols, dense_matrix): status = gpu_matrix_kernels._sliceRowsInt(stream, embedding_matrix_nrows, embedding_row_indxs, embedding_matrix, nrows, ncols, dense_matrix) cudart.check_cuda_status(status)
def sigmoid_der(stream, nelems, data, sigmoid_data, derivative): status = nonlinearities._sigmoidDer(stream, nelems, data, sigmoid_data, derivative) cudart.check_cuda_status(status)
def tanh_sigm_der(stream, axis, nrows, ncols, data, tanh_sigm_data, derivatve): status = nonlinearities._tanhSigmDer(stream, axis, nrows, ncols, data, tanh_sigm_data, derivatve) cudart.check_cuda_status(status)
def assign_sum(stream, nelems, matrices, n, s): status = gpu_matrix_kernels._assign_sum(stream, nelems, matrices, n, s) cudart.check_cuda_status(status)
def mask_column_numbers_row_wise(stream, nrows, ncols, numbers, out): status = gpu_matrix_kernels._maskColumnNumbersRowWise(stream, nrows, ncols, numbers, out) cudart.check_cuda_status(status)
def assign_masked_addition_column_broadcasted(stream, nrows, ncols, mask, a, b, out): status = gpu_matrix_kernels._assignMaskedAdditionColumnBroadcasted(stream, nrows, ncols, mask, a, b, out) cudart.check_cuda_status(status)
def assign_masked_addition(stream, nelems, mask, a, b, out): status = gpu_matrix_kernels._assignMaskedAddition(stream, nelems, mask, a, b, out) cudart.check_cuda_status(status)
def add_mask_zeros(stream, nelems, a, b, out): status = gpu_matrix_kernels._addMaskZeros(stream, nelems, a, b, out) cudart.check_cuda_status(status)
def dropout(stream, nelems, dropout_prob, data, uniform_data, out): status = gpu_matrix_kernels._dropout(stream, nelems, dropout_prob, data, uniform_data, out) cudart.check_cuda_status(status)
def add_scaled_subtraction(stream, nelems, alpha, a, b, out): status = gpu_matrix_kernels._addScaledSubtraction(stream, nelems, alpha, a, b, out) cudart.check_cuda_status(status)
def add_softmax_ce_derivative(stream, batchSize, num_classes, probs, target_classes, derivatives): status = gpu_matrix_kernels._addSoftmaxCeDerivative(stream, batchSize, num_classes, probs, target_classes, derivatives) cudart.check_cuda_status(status)
def add_hprod_one_minus_mask_column_broadcasted(stream, nrows, ncols, mask, a, out): status = gpu_matrix_kernels._addHprodOneMinusMaskColumnBroadcasted(stream, nrows, ncols, mask, a, out) cudart.check_cuda_status(status)
def add_hprod_one_minus_mask(stream, nelems, mask, a, out): status = gpu_matrix_kernels._addHprodOneMinusMask(stream, nelems, mask, a, out) cudart.check_cuda_status(status)
def repeat_along_col(stream, repeats, nrows, ncols, a, out): status = gpu_matrix_kernels._repeatAlongCol(stream, repeats, nrows, ncols, a, out) cudart.check_cuda_status(status)
def clip(stream, nelems, min_value, max_value, data, out): status = gpu_matrix_kernels._clip(stream, nelems, min_value, max_value, data, out) cudart.check_cuda_status(status)
def matrix_vector_column_hprod(stream, nrows, ncols, matrix, vector, out): status = gpu_matrix_kernels._matrixVectorColumnHprod(stream, nrows, ncols, matrix, vector, out) cudart.check_cuda_status(status)
def callback(stream, status, user_data): cudart.check_cuda_status(status) args, kwargs = ct.cast(user_data, ct_py_object_p).contents.value function(*args, **kwargs) GpuContext._user_data[ct.cast(stream, ct.c_void_p).value].popleft()
def relu_der(stream, nelems, data, relu_data, derivative): status = nonlinearities._reluDer(stream, nelems, data, relu_data, derivative) cudart.check_cuda_status(status)
def tanh_der(stream, nelems, data, tanh_data, derivative): status = nonlinearities._tanhDer(stream, nelems, data, tanh_data, derivative) cudart.check_cuda_status(status)
def add_repeat_along_col_derivative(stream, repeats, a, nrows, ncols, derivative): status = gpu_matrix_kernels._addRepeatAlongColDerivative(stream, repeats, a, nrows, ncols, derivative) cudart.check_cuda_status(status)
def add_scaled_div_sqrt(stream, nelems, alpha, a, b, epsilon, c): status = gpu_matrix_kernels._addScaledDivSqrt(stream, nelems, alpha, a, b, epsilon, c) cudart.check_cuda_status(status)
def transpose_int(stream, nrows, ncols, in_, out): status = gpu_matrix_kernels._transposeInt(stream, nrows, ncols, in_, out) cudart.check_cuda_status(status)
def add_hadamard_product_2(stream, nelems, a, b, alpha, c): status = gpu_matrix_kernels._addHadamardProduct2(stream, nelems, a, b, alpha, c) cudart.check_cuda_status(status)
def assign_sequential_sum_pooling(stream, nrows, ncols, matrices, n, out): status = gpu_matrix_kernels._assignSequentialSumPooling(stream, nrows, ncols, matrices, n, out) cudart.check_cuda_status(status)
def assign_sequential_weighted_sum(stream, nrows, ncols, matrices, weights, n, out): status = gpu_matrix_kernels._assignSequentialWeightedSum(stream, nrows, ncols, matrices, weights, n, out) cudart.check_cuda_status(status)
def sequentially_tile(stream, nelems, a, matrices, n): status = gpu_matrix_kernels._sequentiallyTile(stream, nelems, a, matrices, n) cudart.check_cuda_status(status)
def assign_dL_dpre_a(stream, nrows, ncols, matrices, derivative, weights, n, out): status = gpu_matrix_kernels._assignDLDprea(stream, nrows, ncols, matrices, derivative, weights, n, out) cudart.check_cuda_status(status)
def add_attention_derivative(stream, nrows, ncols, matrices, derivative, n, out): status = gpu_matrix_kernels._addAttentionDerivative(stream, nrows, ncols, matrices, derivative, n, out) cudart.check_cuda_status(status)
def tanh_sigm(stream, axis, nrows, ncols, data, tanh_sigm_data): status = nonlinearities._tanhSigm(stream, axis, nrows, ncols, data, tanh_sigm_data) cudart.check_cuda_status(status)
def add_attention_tile(stream, nrows, ncols, derivative, a, dL_dpre_a, u, n, matrices_derivs): status = gpu_matrix_kernels._addAttentionTile(stream, nrows, ncols, derivative, a, dL_dpre_a, u, n, matrices_derivs) cudart.check_cuda_status(status)
def relu(stream, nelems, data, relu_data): status = nonlinearities._relu(stream, nelems, data, relu_data) cudart.check_cuda_status(status)
def slice_rows_batch(stream, embd_rows_indxs, nrows, ncols, embd_matrix, embd_nrows, embd_ncols, dense_matrices): status = gpu_matrix_kernels._sliceRowsBatch(stream, embd_rows_indxs, nrows, ncols, embd_matrix, embd_nrows, embd_ncols, dense_matrices) cudart.check_cuda_status(status)
def sigmoid(stream, nelems, data, sigmoid_data): status = nonlinearities._sigmoid(stream, nelems, data, sigmoid_data) cudart.check_cuda_status(status)
def sliced_rows_batch_scaled_add(stream, embd_rows_indxs, nrows, ncols, alpha, dense_matrices, embd_nrows, embd_ncols, embd_matrix): status = gpu_matrix_kernels._slicedRowsBatchScaledAdd(stream, embd_rows_indxs, nrows, ncols, alpha, dense_matrices, embd_nrows, embd_ncols, embd_matrix) cudart.check_cuda_status(status)
def tanh(stream, nelems, data, tanh_data): status = nonlinearities._tanh(stream, nelems, data, tanh_data) cudart.check_cuda_status(status)
def assign_scaled_addition(stream, nelems, alpha, a, b, out): status = gpu_matrix_kernels._assignScaledAddition(stream, nelems, alpha, a, b, out) cudart.check_cuda_status(status)
def test_dependencies(cuda_stream, node_id, blocking_nodes, blocking_nodes_num, execution_checklist, test_results): status = test_events._testDependencies(cuda_stream, node_id, blocking_nodes, blocking_nodes_num, execution_checklist, test_results) cudart.check_cuda_status(status)
def masked_fill(stream, nelems, value, mask_data, true_value, out_data): status = gpu_matrix_kernels.\ _maskedFill(stream, nelems, value, mask_data, true_value, out_data) cudart.check_cuda_status(status)