Exemple #1
0
def get_parameters(fn, handle, weight_buf):
    """Returns weight and bias tensors for each layer of the RNN. These tensors
    are views on the underlying weight buffer allocated by CuDNN.

    Note: for LSTM and GRU, which have multiple parameters of each type (4 and 3, respectively),
          these parameters are concatenated along the first dimension.
          These parameters are returned in a consistent order by CuDNN:
              (reset, forget, cell, outut) for LSTM
              (reset, input, new) for GRU
    Args:
        fn: The RNN function object holding the RNN state
        handle: a CuDNN handle
        weight_buf: a 1D tensor containing the CuDNN-allocated weight (or grad_weight) buffer
    Returns:
        parameters: [(weight_ih, weight_hh, bias_ih, bias_hh)*], with length equal to the num_layers.
    """

    cudnn_methods = [
        cudnn.lib.cudnnGetRNNLinLayerMatrixParams,
        cudnn.lib.cudnnGetRNNLinLayerBiasParams
    ]

    params = []
    num_linear_layers = _num_linear_layers(fn)
    num_layers = fn.num_directions * fn.num_layers
    for layer in range(num_layers):
        layer_params = []
        for cudnn_method in cudnn_methods:
            for linear_id in range(num_linear_layers):
                lin_layer_mat_desc = cudnn.FilterDescriptor()
                matrix_pointer = ctypes.c_void_p()
                check_error(
                    cudnn_method(handle, fn.rnn_desc, layer, fn.x_descs[0],
                                 fn.w_desc,
                                 ctypes.c_void_p(weight_buf.data_ptr()),
                                 linear_id, lin_layer_mat_desc,
                                 ctypes.byref(matrix_pointer)))

                data_type = ctypes.c_int()
                format = ctypes.c_int()
                nb_dims = ctypes.c_int()
                min_dim = 3
                filter_dim_a = torch.IntTensor(min_dim)
                check_error(
                    cudnn.lib.cudnnGetFilterNdDescriptor(
                        lin_layer_mat_desc, min_dim, ctypes.byref(data_type),
                        ctypes.byref(format), ctypes.byref(nb_dims),
                        ctypes.c_void_p(filter_dim_a.data_ptr())))

                filter_dim_a.resize_(nb_dims.value)
                elem_size = cudnn._sizeofmap[fn.datatype]
                offset_bytes = (matrix_pointer.value - weight_buf.data_ptr())
                assert (offset_bytes % elem_size == 0)
                offset = offset_bytes // elem_size

                # for all the RNN types provided by CUDNN, all the ih weights
                # are the same size and are allocated in a contiguous chunk
                # (same for the hh weights, and the ih and hh biases).
                # Since we're storing all the weights in a single tensor anyway,
                # might as well merge the CUDNN ones into a single tensor as well
                if linear_id == 0 or linear_id == num_linear_layers / 2:
                    assert (filter_dim_a.prod() == filter_dim_a[0])
                    param = fn.weight_buf.new().set_(
                        weight_buf.storage(), offset,
                        filter_dim_a[0] * num_linear_layers // 2,
                        filter_dim_a[2])
                    layer_params.append(param)
                else:
                    assert (cur_offset == offset)

                cur_offset = offset + filter_dim_a[0]

        params.append(layer_params)

    return params
Exemple #2
0
def init_weight_descriptor(fn, weight):
    w_desc = cudnn.FilterDescriptor()
    w_view = weight.view(-1, 1, 1)  # seems that filters require >=3 dimensions
    w_desc.set(w_view)
    return w_desc