예제 #1
0
    def flatten_parameters(self):
        """Resets parameter data pointer so that they can use faster code paths.

        Right now, this works only if the module is on the GPU and cuDNN is enabled.
        Otherwise, it's a no-op.
        """
        any_param = next(self.parameters()).data
        if not any_param.is_cuda or not torch.backends.cudnn.is_acceptable(
                any_param):
            self._data_ptrs = []
            return

        with torch.cuda.device_of(any_param):
            # This is quite ugly, but it allows us to reuse the cuDNN code without larger
            # modifications. It's really a low-level API that doesn't belong in here, but
            # let's make this exception.
            from torch.backends.cudnn import rnn
            from torch.backends import cudnn
            from torch.nn._functions.rnn import CudnnRNN
            handle = cudnn.get_handle()
            with warnings.catch_warnings(record=True):
                fn = CudnnRNN(
                    self.mode,
                    self.input_size,
                    self.hidden_size,
                    num_layers=self.num_layers,
                    batch_first=self.batch_first,
                    dropout=self.dropout,
                    train=self.training,
                    bidirectional=self.bidirectional,
                    dropout_state=self.dropout_state,
                )

            # Initialize descriptors
            fn.datatype = cudnn._typemap[any_param.type()]
            fn.x_descs = cudnn.descriptor(any_param.new(1, self.input_size), 1)
            fn.rnn_desc = rnn.init_rnn_descriptor(fn, handle)

            # Allocate buffer to hold the weights
            self._param_buf_size = rnn.get_num_weights(handle, fn.rnn_desc,
                                                       fn.x_descs[0],
                                                       fn.datatype)
            fn.weight_buf = any_param.new(self._param_buf_size).zero_()
            fn.w_desc = rnn.init_weight_descriptor(fn, fn.weight_buf)

            # Slice off views into weight_buf
            params = rnn.get_parameters(fn, handle, fn.weight_buf)
            all_weights = [[p.data for p in l] for l in self.all_weights]

            # Copy weights and update their storage
            rnn._copyParams(all_weights, params)
            for orig_layer_param, new_layer_param in zip(all_weights, params):
                for orig_param, new_param in zip(orig_layer_param,
                                                 new_layer_param):
                    orig_param.set_(new_param.view_as(orig_param))

            self._data_ptrs = list(p.data.data_ptr()
                                   for p in self.parameters())
예제 #2
0
파일: rnn.py 프로젝트: Northrend/pytorch
    def flatten_parameters(self):
        """Resets parameter data pointer so that they can use faster code paths.

        Right now, this works only if the module is on the GPU and cuDNN is enabled.
        Otherwise, it's a no-op.
        """
        any_param = next(self.parameters()).data
        if not any_param.is_cuda or not torch.backends.cudnn.is_acceptable(any_param):
            self._data_ptrs = []
            return

        with torch.cuda.device_of(any_param):
            # This is quite ugly, but it allows us to reuse the cuDNN code without larger
            # modifications. It's really a low-level API that doesn't belong in here, but
            # let's make this exception.
            from torch.backends.cudnn import rnn
            from torch.backends import cudnn
            from torch.nn._functions.rnn import CudnnRNN
            handle = cudnn.get_handle()
            with warnings.catch_warnings(record=True):
                fn = CudnnRNN(
                    self.mode,
                    self.input_size,
                    self.hidden_size,
                    num_layers=self.num_layers,
                    batch_first=self.batch_first,
                    dropout=self.dropout,
                    train=self.training,
                    bidirectional=self.bidirectional,
                    dropout_state=self.dropout_state,
                )

            # Initialize descriptors
            fn.datatype = cudnn._typemap[any_param.type()]
            fn.x_descs = cudnn.descriptor(any_param.new(1, self.input_size), 1)
            fn.rnn_desc = rnn.init_rnn_descriptor(fn, handle)

            # Allocate buffer to hold the weights
            self._param_buf_size = rnn.get_num_weights(handle, fn.rnn_desc, fn.x_descs[0], fn.datatype)
            fn.weight_buf = any_param.new(self._param_buf_size).zero_()
            fn.w_desc = rnn.init_weight_descriptor(fn, fn.weight_buf)

            # Slice off views into weight_buf
            params = rnn.get_parameters(fn, handle, fn.weight_buf)
            all_weights = [[p.data for p in l] for l in self.all_weights]

            # Copy weights and update their storage
            rnn._copyParams(all_weights, params)
            for orig_layer_param, new_layer_param in zip(all_weights, params):
                for orig_param, new_param in zip(orig_layer_param, new_layer_param):
                    orig_param.set_(new_param.view_as(orig_param))

            self._data_ptrs = list(p.data.data_ptr() for p in self.parameters())
예제 #3
0
def forward(fn, input, hx, weight, output, hy):
    with torch.cuda.device_of(input):
        lib = cudnn.lib
        handle = cudnn.get_handle()
        fn.datatype = cudnn._typemap[input.type()]

        if fn.mode == cudnn.CUDNN_LSTM:
            hx, cx = hx
            hy, cy = hy
        else:
            cx, cy = None, None

        if fn.batch_first:
            input = input.transpose(0, 1)

        if input.dim() != 3:
            raise RuntimeError('input must have 3 dimensions, got {}'.format(
                input.dim()))
        if fn.input_size != input.size(2):
            raise RuntimeError(
                'input.size(2) must be equal to input_size. Expected {}, got {}'
                .format(fn.input_size))
        if fn.dropout != 0 and cudnn.lib.version < 5103:
            raise RuntimeError(
                'dropout supported only in cudnn v5.1 and above')

        fn.seq_length, fn.mini_batch, fn.input_size = input.size()
        hidden_size = _hidden_size(fn)
        output_size = _output_size(fn)
        x = input.contiguous()
        output.resize_(*output_size)
        hy.resize_(*hidden_size).zero_()
        if cy:
            cy.resize_(*hidden_size).zero_()
        y = output

        # init descriptors
        fn.dropout_desc = init_dropout_descriptor(fn, handle)
        fn.rnn_desc = init_rnn_descriptor(fn)
        fn.x_descs = cudnn.descriptor(x[0], fn.seq_length)
        fn.y_descs = cudnn.descriptor(y[0], fn.seq_length)
        fn.hx_desc = cudnn.descriptor(hx)
        fn.hy_desc = cudnn.descriptor(hx)
        fn.cx_desc = cudnn.descriptor(cx) if cx else None
        fn.cy_desc = cudnn.descriptor(cx) if cx else None

        # create the weight buffer and copy the weights into it
        num_weights = get_num_weights(handle, fn.rnn_desc, fn.x_descs[0],
                                      fn.datatype)
        fn.weight_buf = input.new(num_weights)
        fn.w_desc = init_weight_descriptor(fn, fn.weight_buf)
        w = fn.weight_buf
        # this zero might not seem necessary, but it is in the case
        # where biases are disabled; then they won't be copied and must be zero'd.
        # Alternatively, _copyParams could be written more carefully.
        w.zero_()
        params = get_parameters(fn, handle, w)
        _copyParams(weight, params)

        if tuple(hx.size()) != hidden_size:
            raise RuntimeError('Expected hidden size {}, got {}'.format(
                hidden_size, tuple(hx.size())))
        if cx and tuple(cx.size()) != hidden_size:
            raise RuntimeError('Expected cell size {}, got {}'.format(
                hidden_size, tuple(cx.size())))

        workspace_size = ctypes.c_long()
        check_error(
            lib.cudnnGetRNNWorkspaceSize(handle, fn.rnn_desc, fn.seq_length,
                                         fn.x_descs,
                                         ctypes.byref(workspace_size)))
        fn.workspace = torch.cuda.ByteTensor(workspace_size.value)
        if fn.train:
            reserve_size = ctypes.c_long()
            check_error(
                lib.cudnnGetRNNTrainingReserveSize(handle, fn.rnn_desc,
                                                   fn.seq_length, fn.x_descs,
                                                   ctypes.byref(reserve_size)))
            fn.reserve = torch.cuda.ByteTensor(reserve_size.value)

            check_error(
                lib.cudnnRNNForwardTraining(
                    handle, fn.rnn_desc, fn.seq_length, fn.x_descs,
                    ctypes.c_void_p(x.data_ptr()), fn.hx_desc,
                    ctypes.c_void_p(hx.data_ptr()), fn.cx_desc,
                    ctypes.c_void_p(cx.data_ptr()) if cx else None, fn.w_desc,
                    ctypes.c_void_p(w.data_ptr()), fn.y_descs,
                    ctypes.c_void_p(y.data_ptr()), fn.hy_desc,
                    ctypes.c_void_p(hy.data_ptr()), fn.cy_desc,
                    ctypes.c_void_p(cy.data_ptr()) if cx else None,
                    ctypes.c_void_p(fn.workspace.data_ptr()),
                    fn.workspace.size(0),
                    ctypes.c_void_p(fn.reserve.data_ptr()),
                    fn.reserve.size(0)))
        else:  # inference
            check_error(
                lib.cudnnRNNForwardInference(
                    handle, fn.rnn_desc, fn.seq_length, fn.x_descs,
                    ctypes.c_void_p(x.data_ptr()), fn.hx_desc,
                    ctypes.c_void_p(hx.data_ptr()), fn.cx_desc,
                    ctypes.c_void_p(cx.data_ptr()) if cx else None, fn.w_desc,
                    ctypes.c_void_p(w.data_ptr()), fn.y_descs,
                    ctypes.c_void_p(y.data_ptr()), fn.hy_desc,
                    ctypes.c_void_p(hy.data_ptr()), fn.cy_desc,
                    ctypes.c_void_p(cy.data_ptr()) if cx else None,
                    ctypes.c_void_p(fn.reserve.data_ptr()),
                    fn.reserve.size(0)))

        if fn.batch_first:
            output = output.transpose(0, 1)
예제 #4
0
def forward(fn, input, hx, weight, output, hy):
    with torch.cuda.device_of(input):
        lib = cudnn.lib
        handle = cudnn.get_handle()
        fn.datatype = cudnn._typemap[input.type()]
        is_input_packed = fn.batch_sizes is not None

        if fn.mode == cudnn.CUDNN_LSTM:
            hx, cx = hx
            hy, cy = hy
        else:
            cx, cy = None, None

        if fn.batch_first and not is_input_packed:
            input = input.transpose(0, 1)

        if fn.dropout != 0 and cudnn.version() < 5103:
            raise RuntimeError('dropout supported only in cudnn v5.1 and above')

        if is_input_packed:
            fn.seq_length = len(fn.batch_sizes)
            fn.mini_batch = fn.batch_sizes[0]
            fn.input_size = input.size(-1)
        else:
            fn.seq_length, fn.mini_batch, fn.input_size = input.size()
        hidden_size = _hidden_size(fn)
        output_size = _output_size(fn, input)

        assert hx.is_contiguous()
        assert cx is None or cx.is_contiguous()
        x = input.contiguous()
        output.resize_(*output_size)
        hy.resize_(*hidden_size)
        if cy is not None:
            cy.resize_(*hidden_size)
        y = output

        # init descriptors
        fn.rnn_desc = init_rnn_descriptor(fn, handle)
        if is_input_packed:
            fn.x_descs = cudnn.descriptor_sequence(x, fn.batch_sizes)
            fn.y_descs = cudnn.descriptor_sequence(y, fn.batch_sizes)
        else:
            fn.x_descs = cudnn.descriptor(x[0], fn.seq_length)
            fn.y_descs = cudnn.descriptor(y[0], fn.seq_length)
        fn.hx_desc = cudnn.descriptor(hx)
        fn.hy_desc = cudnn.descriptor(hx)
        fn.cx_desc = cudnn.descriptor(cx) if cx is not None else None
        fn.cy_desc = cudnn.descriptor(cx) if cx is not None else None

        # create the weight buffer and copy the weights into it
        if fn.weight_buf is None:
            num_weights = get_num_weights(
                handle, fn.rnn_desc, fn.x_descs[0], fn.datatype)
            fn.weight_buf = x.new(num_weights)
            fn.w_desc = init_weight_descriptor(fn, fn.weight_buf)
            w = fn.weight_buf
            # this zero might not seem necessary, but it is in the case
            # where biases are disabled; then they won't be copied and must be zero'd.
            # Alternatively, _copyParams could be written more carefully.
            w.zero_()
            params = get_parameters(fn, handle, w)
            _copyParams(weight, params)
        else:
            fn.w_desc = init_weight_descriptor(fn, fn.weight_buf)
            w = fn.weight_buf

        if cx is not None and tuple(cx.size()) != hidden_size:
            raise RuntimeError('Expected cell size {}, got {}'.format(
                hidden_size, tuple(cx.size())))

        workspace_size = ctypes.c_long()
        check_error(lib.cudnnGetRNNWorkspaceSize(
            handle,
            fn.rnn_desc,
            fn.seq_length,
            fn.x_descs,
            ctypes.byref(workspace_size)
        ))
        fn.workspace_size = workspace_size.value
        with torch.cuda.device_of(input):
            workspace = torch.cuda.ByteTensor(fn.workspace_size)
        if fn.requires_grad:
            reserve_size = ctypes.c_long()
            check_error(lib.cudnnGetRNNTrainingReserveSize(
                handle,
                fn.rnn_desc,
                fn.seq_length,
                fn.x_descs,
                ctypes.byref(reserve_size)
            ))
            fn.reserve = torch.cuda.ByteTensor(reserve_size.value)

            check_error(lib.cudnnRNNForwardTraining(
                handle,
                fn.rnn_desc,
                fn.seq_length,
                fn.x_descs, ctypes.c_void_p(x.data_ptr()),
                fn.hx_desc, ctypes.c_void_p(hx.data_ptr()),
                fn.cx_desc, ctypes.c_void_p(cx.data_ptr()) if cx is not None else None,
                fn.w_desc, ctypes.c_void_p(w.data_ptr()),
                fn.y_descs, ctypes.c_void_p(y.data_ptr()),
                fn.hy_desc, ctypes.c_void_p(hy.data_ptr()),
                fn.cy_desc, ctypes.c_void_p(cy.data_ptr()) if cx is not None else None,
                ctypes.c_void_p(workspace.data_ptr()), workspace.size(0),
                ctypes.c_void_p(fn.reserve.data_ptr()), fn.reserve.size(0)
            ))
        else:  # inference
            check_error(lib.cudnnRNNForwardInference(
                handle,
                fn.rnn_desc,
                fn.seq_length,
                fn.x_descs, ctypes.c_void_p(x.data_ptr()),
                fn.hx_desc, ctypes.c_void_p(hx.data_ptr()),
                fn.cx_desc, ctypes.c_void_p(cx.data_ptr()) if cx is not None else None,
                fn.w_desc, ctypes.c_void_p(w.data_ptr()),
                fn.y_descs, ctypes.c_void_p(y.data_ptr()),
                fn.hy_desc, ctypes.c_void_p(hy.data_ptr()),
                fn.cy_desc, ctypes.c_void_p(cy.data_ptr()) if cx is not None else None,
                ctypes.c_void_p(workspace.data_ptr()), workspace.size(0)
            ))

        if fn.batch_first and not is_input_packed:
            output.transpose_(0, 1)