Exemple #1
0
def test_consistency_GPU_parallel():
    """
    Verify that the random numbers generated by GPU_mrg_uniform, in
    parallel, are the same as the reference (Java) implementation by
    L'Ecuyer et al.

    """
    if not cuda_available:
        raise SkipTest('Optional package cuda not available')
    if config.mode == 'FAST_COMPILE':
        mode = 'FAST_RUN'
    else:
        mode = config.mode

    seed = 12345
    n_samples = 5
    n_streams = 12
    n_substreams = 7  # 7 samples will be drawn in parallel

    samples = []
    curr_rstate = numpy.array([seed] * 6, dtype='int32')

    for i in range(n_streams):
        stream_samples = []
        rstate = [curr_rstate.copy()]
        for j in range(1, n_substreams):
            rstate.append(rng_mrg.ff_2p72(rstate[-1]))
        rstate = numpy.asarray(rstate).flatten()
        # HACK - transfer these int32 to the GPU memory as float32
        # (reinterpret_cast)
        tmp_float_buf = numpy.frombuffer(rstate.data, dtype='float32')
        # Transfer to device
        rstate = float32_shared_constructor(tmp_float_buf)

        new_rstate, sample = rng_mrg.GPU_mrg_uniform.new(rstate,
                                                         ndim=None,
                                                         dtype='float32',
                                                         size=(n_substreams, ))
        rstate.default_update = new_rstate

        # Not really necessary, just mimicking
        # rng_mrg.MRG_RandomStreams' behavior
        sample.rstate = rstate
        sample.update = (rstate, new_rstate)

        # We need the sample back in the main memory
        cpu_sample = tensor.as_tensor_variable(sample)
        f = theano.function([], cpu_sample, mode=mode)

        for k in range(n_samples):
            s = f()
            stream_samples.append(s)

        samples.append(numpy.array(stream_samples).T.flatten())

        # next stream
        curr_rstate = rng_mrg.ff_2p134(curr_rstate)

    samples = numpy.array(samples).flatten()
    assert (numpy.allclose(samples, java_samples))
def test_consistency_GPU_parallel():
    """
    Verify that the random numbers generated by GPU_mrg_uniform, in
    parallel, are the same as the reference (Java) implementation by
    L'Ecuyer et al.

    """
    if not cuda_available:
        raise SkipTest('Optional package cuda not available')
    if config.mode == 'FAST_COMPILE':
        mode = 'FAST_RUN'
    else:
        mode = config.mode

    seed = 12345
    n_samples = 5
    n_streams = 12
    n_substreams = 7  # 7 samples will be drawn in parallel

    samples = []
    curr_rstate = numpy.array([seed] * 6, dtype='int32')

    for i in range(n_streams):
        stream_samples = []
        rstate = [curr_rstate.copy()]
        for j in range(1, n_substreams):
            rstate.append(rng_mrg.ff_2p72(rstate[-1]))
        rstate = numpy.asarray(rstate).flatten()
        # HACK - transfer these int32 to the GPU memory as float32
        # (reinterpret_cast)
        tmp_float_buf = numpy.frombuffer(rstate.data, dtype='float32')
        # Transfer to device
        rstate = float32_shared_constructor(tmp_float_buf)

        new_rstate, sample = rng_mrg.GPU_mrg_uniform.new(rstate, ndim=None,
                                                         dtype='float32',
                                                         size=(n_substreams,))
        rstate.default_update = new_rstate

        # Not really necessary, just mimicking
        # rng_mrg.MRG_RandomStreams' behavior
        sample.rstate = rstate
        sample.update = (rstate, new_rstate)

        # We need the sample back in the main memory
        cpu_sample = tensor.as_tensor_variable(sample)
        f = theano.function([], cpu_sample, mode=mode)

        for k in range(n_samples):
            s = f()
            stream_samples.append(s)

        samples.append(numpy.array(stream_samples).T.flatten())

        # next stream
        curr_rstate = rng_mrg.ff_2p134(curr_rstate)

    samples = numpy.array(samples).flatten()
    assert(numpy.allclose(samples, java_samples))
Exemple #3
0
def test_consistency_GPU_serial():
    """Verify that the random numbers generated by GPU_mrg_uniform, serially,
    are the same as the reference (Java) implementation by L'Ecuyer et al.
    """
    if not cuda_available:
        raise SkipTest("Optional package cuda not available")
    if config.mode == "FAST_COMPILE":
        mode = "FAST_RUN"
    else:
        mode = config.mode

    seed = 12345
    n_samples = 5
    n_streams = 12
    n_substreams = 7

    samples = []
    curr_rstate = numpy.array([seed] * 6, dtype="int32")

    for i in range(n_streams):
        stream_rstate = curr_rstate.copy()
        for j in range(n_substreams):
            substream_rstate = numpy.array(stream_rstate.copy(), dtype="int32")
            # HACK - we transfer these int32 to the GPU memory as float32
            # (reinterpret_cast)
            tmp_float_buf = numpy.frombuffer(substream_rstate.data, dtype="float32")
            rstate = float32_shared_constructor(tmp_float_buf)  # Transfer to device

            new_rstate, sample = rng_mrg.GPU_mrg_uniform.new(rstate, ndim=None, dtype="float32", size=(1,))
            rstate.default_update = new_rstate

            # Not really necessary, just mimicking rng_mrg.MRG_RandomStreams' behavior
            sample.rstate = rstate
            sample.update = (rstate, new_rstate)

            # We need the sample back in the main memory
            cpu_sample = tensor.as_tensor_variable(sample)
            f = theano.function([], cpu_sample, mode=mode)
            for k in range(n_samples):
                s = f()
                samples.append(s)

            # next substream
            stream_rstate = rng_mrg.ff_2p72(stream_rstate)

        # next stream
        curr_rstate = rng_mrg.ff_2p134(curr_rstate)

    samples = numpy.array(samples).flatten()
    assert numpy.allclose(samples, java_samples)
Exemple #4
0
    def uniform(self, size, low=0.0, high=1.0, ndim=None, dtype=None,
                nstreams=None):
        """
        Sample a tensor of given size whose element from a uniform
        distribution between low and high.

        If the size argument is ambiguous on the number of dimensions,
        ndim may be a plain integer to supplement the missing
        information.

        :param low: Lower bound of the interval on which values are sampled.
        If the ``dtype`` arg is provided, ``low`` will be cast into dtype.
        This bound is excluded.

        :param high: Higher bound of the interval on which values are sampled.
        If the ``dtype`` arg is provided, ``high`` will be cast into dtype.
        This bound is excluded.

        :param size: Can be a list of integer or Theano variable
                (ex: the shape of other Theano Variable)

        :param dtype: The output data type. If dtype is not specified, it will
        be inferred from the dtype of low and high, but will be at least as
        precise as floatX.
        """
        low = as_tensor_variable(low)
        high = as_tensor_variable(high)
        if dtype is None:
            dtype = scal.upcast(config.floatX, low.dtype, high.dtype)

        low = cast(low, dtype=dtype)
        high = cast(high, dtype=dtype)

        if isinstance(size, tuple):
            msg = "size must be a tuple of int or a Theano variable"
            assert all([isinstance(i, (numpy.integer, int, Variable))
                        for i in size]), msg
            if any([isinstance(i, (numpy.integer, int)) and i <= 0 for i in size]):
                raise ValueError(
                    "The specified size contains a dimension with value <= 0",
                    size)

        else:
            if not (isinstance(size, Variable) and size.ndim == 1):
                raise TypeError("size must be a tuple of int or a Theano "
                                "Variable with 1 dimension, got " + str(size) +
                                " of type " + str(type(size)))

        if nstreams is None:
            nstreams = self.n_streams(size)

        if self.use_cuda and dtype == 'float32':
            rstates = self.get_substream_rstates(nstreams)
            rstates = rstates.flatten()
            # HACK - we use fact that int32 and float32 have same size to
            # sneak ints into the CudaNdarray type.
            # these *SHOULD NEVER BE USED AS FLOATS*
            tmp_float_buf = numpy.frombuffer(rstates.data, dtype='float32')
            assert tmp_float_buf.shape == rstates.shape
            assert (tmp_float_buf.view('int32') == rstates).all()
            # transfer to device
            node_rstate = float32_shared_constructor(tmp_float_buf)
            assert isinstance(node_rstate.type, CudaNdarrayType)

            # we can't use the normal mrg_uniform constructor + later
            # optimization
            # because of the tmp_float_buf hack above.  There is
            # currently no Theano node that will do a frombuffer
            # reinterpretation.
            u = self.pretty_return(node_rstate,
                                   *GPU_mrg_uniform.new(node_rstate,
                                                        ndim, dtype, size))
        else:
            node_rstate = shared(self.get_substream_rstates(nstreams))
            u = self.pretty_return(node_rstate,
                                   *mrg_uniform.new(node_rstate,
                                                    ndim, dtype, size))
        r = u * (high - low) + low

        if u.type.broadcastable != r.type.broadcastable:
            raise NotImplementedError(
                'Increase the size to match the broadcasting pattern of '
                '`low` and `high` arguments')

        assert r.dtype == dtype
        return r
Exemple #5
0
    def uniform(self,
                size,
                low=0.0,
                high=1.0,
                ndim=None,
                dtype=None,
                nstreams=None):
        """
        Sample a tensor of given size whose element from a uniform
        distribution between low and high.

        If the size argument is ambiguous on the number of dimensions,
        ndim may be a plain integer to supplement the missing
        information.

        :param low: Lower bound of the interval on which values are sampled.
        If the ``dtype`` arg is provided, ``low`` will be cast into dtype.

        :param high: Higher bound of the interval on which values are sampled.
        If the ``dtype`` arg is provided, ``high`` will be cast into dtype.

        :param size: Can be a list of integer or Theano variable
                (ex: the shape of other Theano Variable)

        :param dtype: The output data type. If dtype is not specified, it will
        be inferred from the dtype of low and high, but will be at least as
        precise as floatX.
        """
        low = as_tensor_variable(low)
        high = as_tensor_variable(high)
        if dtype is None:
            dtype = scal.upcast(config.floatX, low.dtype, high.dtype)

        low = cast(low, dtype=dtype)
        high = cast(high, dtype=dtype)

        if isinstance(size, tuple):
            msg = "size must be a tuple of int or a Theano variable"
            assert all([
                isinstance(i, (numpy.integer, int, Variable)) for i in size
            ]), msg
            if any(
                [isinstance(i, (numpy.integer, int)) and i <= 0
                 for i in size]):
                raise ValueError(
                    "The specified size contains a dimension with value <= 0",
                    size)

        else:
            if not (isinstance(size, Variable) and size.ndim == 1):
                raise TypeError("size must be a tuple of int or a Theano "
                                "Variable with 1 dimension, got " + str(size) +
                                " of type " + str(type(size)))

        if nstreams is None:
            nstreams = self.n_streams(size)

        if self.use_cuda and dtype == 'float32':
            rstates = self.get_substream_rstates(nstreams)
            rstates = rstates.flatten()
            # HACK - we use fact that int32 and float32 have same size to
            # sneak ints into the CudaNdarray type.
            # these *SHOULD NEVER BE USED AS FLOATS*
            tmp_float_buf = numpy.frombuffer(rstates.data, dtype='float32')
            assert tmp_float_buf.shape == rstates.shape
            assert (tmp_float_buf.view('int32') == rstates).all()
            # transfer to device
            node_rstate = float32_shared_constructor(tmp_float_buf)
            assert isinstance(node_rstate.type, CudaNdarrayType)

            # we can't use the normal mrg_uniform constructor + later
            # optimization
            # because of the tmp_float_buf hack above.  There is
            # currently no Theano node that will do a frombuffer
            # reinterpretation.
            u = self.pretty_return(
                node_rstate,
                *GPU_mrg_uniform.new(node_rstate, ndim, dtype, size))
        else:
            node_rstate = shared(self.get_substream_rstates(nstreams))
            u = self.pretty_return(
                node_rstate, *mrg_uniform.new(node_rstate, ndim, dtype, size))
        r = u * (high - low) + low

        if u.type.broadcastable != r.type.broadcastable:
            raise NotImplementedError(
                'Increase the size to match the broadcasting pattern of '
                '`low` and `high` arguments')

        assert r.dtype == dtype
        return r