Esempio n. 1
0
        first_half = sqrt_ln_U1 * cos(numpy.array(2.0 * numpy.pi, dtype=dtype) * U2)
        second_half = sqrt_ln_U1 * sin(numpy.array(2.0 * numpy.pi, dtype=dtype) * U2)
        normal_samples = join(0, first_half, second_half)

        final_samples = None
        if evened:
            final_samples = normal_samples[:-1]
        elif constant:
            final_samples = normal_samples
        else:
            final_samples = normal_samples[:prod(size)]

        if size:
            final_samples = final_samples.reshape(size)

        final_samples = avg + std * final_samples

        assert final_samples.dtype == dtype
        return final_samples


@local_optimizer([None])
def mrg_random_make_inplace(node):
    op = node.op
    if isinstance(op, mrg_uniform) and not op.inplace:
        # op might be gpu version
        new_op = op.__class__(op.output_type, inplace=True)
        return new_op.make_node(*node.inputs).outputs
    return False
optdb.register('random_make_inplace_mrg', opt.in2out(mrg_random_make_inplace, ignore_newtrees=True), 99, 'fast_run', 'inplace')
Esempio n. 2
0
    op = node.op
    if isinstance(op, RandomFunction) and not op.inplace:
        # Read op_fn from op.state, not from op.fn, since op.fn
        # may not be picklable.
        op_fn, op_outtype, op_inplace, op_ndim_added = op._props()
        new_op = RandomFunction(op_fn,
                                op_outtype,
                                inplace=True,
                                ndim_added=op_ndim_added)
        return new_op.make_node(*node.inputs).outputs
    return False


optdb.register(
    "random_make_inplace",
    opt.in2out(random_make_inplace, ignore_newtrees=True),
    99,
    "fast_run",
    "inplace",
)


class RandomStreamsBase(object):
    def binomial(self,
                 size=None,
                 n=1,
                 p=0.5,
                 ndim=None,
                 dtype="int64",
                 prob=None):
        """
Esempio n. 3
0
from theano.compile import optdb
from theano.gof import local_optimizer, LocalOptGroup
from theano.tensor.opt import in2out


@local_optimizer([gpugemv_no_inplace], inplace=True)
def local_inplace_gpuagemv(node):
    if node.op == gpugemv_no_inplace:
        return [gpugemv_inplace(*node.inputs)]


@local_optimizer([gpugemm_no_inplace], inplace=True)
def local_inplace_gpuagemm(node):
    if node.op == gpugemm_no_inplace:
        return [gpugemm_inplace(*node.inputs)]


@local_optimizer([gpuger_no_inplace], inplace=True)
def local_inplace_gpuager(node):
    if node.op == gpuger_no_inplace:
        return [gpuger_inplace(*node.inputs)]


gpuablas_opt_inplace = in2out(LocalOptGroup(local_inplace_gpuagemv,
                                            local_inplace_gpuagemm,
                                            local_inplace_gpuager),
                              name='gpuablas_opt_inplace')
optdb.register('InplaceGpuaBlasOpt', gpuablas_opt_inplace, 70.0, 'fast_run',
               'inplace', 'gpuarray')
Esempio n. 4
0
gpu_dot22 = GpuDot22()

from theano.compile import optdb
from theano.gof import local_optimizer, LocalOptGroup
from theano.tensor.opt import in2out


@local_optimizer([gpugemv_no_inplace], inplace=True)
def local_inplace_gpuagemv(node):
    if node.op == gpugemv_no_inplace:
        return [gpugemv_inplace(*node.inputs)]


@local_optimizer([gpugemm_no_inplace], inplace=True)
def local_inplace_gpuagemm(node):
    if node.op == gpugemm_no_inplace:
        return [gpugemm_inplace(*node.inputs)]


@local_optimizer([gpuger_no_inplace], inplace=True)
def local_inplace_gpuager(node):
    if node.op == gpuger_no_inplace:
        return [gpuger_inplace(*node.inputs)]

gpuablas_opt_inplace = in2out(LocalOptGroup(
        local_inplace_gpuagemv, local_inplace_gpuagemm, local_inplace_gpuager),
                              name='gpuablas_opt_inplace')
optdb.register('InplaceGpuaBlasOpt',
               gpuablas_opt_inplace,
               70.0, 'fast_run', 'inplace', 'gpuarray')
Esempio n. 5
0
        return list(rval)
    else:
        return tuple(rval)


@gof.local_optimizer([IfElse])
def cond_make_inplace(node):
    op = node.op
    if isinstance(op, IfElse) and not op.as_view:
        return IfElse(n_outs=op.n_outs, as_view=True, gpu=op.gpu,
                      name=op.name)(*node.inputs, **dict(return_list=True))
    return False


optdb.register('cond_make_inplace',
               opt.in2out(cond_make_inplace, ignore_newtrees=True), 95,
               'fast_run', 'inplace')

# XXX: Optimizations commented pending further debugging (certain optimizations
# make computation less lazy than it should be currently).
#
# ifelse_equilibrium = gof.EquilibriumDB()
# ifelse_seqopt = gof.SequenceDB()
# ifelse_equilibrium.register('seq_ifelse', ifelse_seqopt, 'fast_run',
#                             'ifelse')
''' Comments:
I've wrote this comments to explain how the optimization of ifelse function
(for future developers that need to parse this part of code. Please try to
keep this comments in sync with whatever changes you add to the code.

ifelse optimization are registered before canonicalize !
Esempio n. 6
0
@local_optimizer([CGemv(inplace=False)])
def make_c_gemv_destructive(node):
    if isinstance(node.op, CGemv) and not node.op.inplace:
        inputs = list(node.inputs)
        dest = inputs[0]
        if (dest.owner and isinstance(dest.owner.op, tt.AllocEmpty)
                and len(dest.clients) > 1):
            inputs[0] = tt.AllocEmpty(dest.dtype)(*dest.owner.inputs)

        return [cgemv_inplace(*inputs)]


# ##### ####### #######
# Optimizers
# ##### ####### #######

blas_optdb.register("use_c_blas", in2out(use_c_ger, use_c_gemv), 20,
                    "fast_run", "c_blas")

# this matches the InplaceBlasOpt defined in blas.py
optdb.register(
    "c_blas_destructive",
    in2out(make_c_ger_destructive,
           make_c_gemv_destructive,
           name="c_blas_destructive"),
    70.0,
    "fast_run",
    "inplace",
    "c_blas",
)
Esempio n. 7
0
scipy_ger_no_inplace = ScipyGer(False)
scipy_ger_inplace = ScipyGer(True)


@local_optimizer([ger, ger_destructive])
def use_scipy_ger(node):
    if node.op == ger:
        return [scipy_ger_no_inplace(*node.inputs)]


@local_optimizer([scipy_ger_no_inplace])
def make_ger_destructive(node):
    if node.op == scipy_ger_no_inplace:
        return [scipy_ger_inplace(*node.inputs)]

use_scipy_blas = in2out(use_scipy_ger)
make_scipy_blas_destructive = in2out(make_ger_destructive)

if have_fblas:
    # scipy_blas is scheduled in the blas_optdb very late, because scipy sortof
    # sucks, but it is almost always present.
    # C implementations should be scheduled earlier than this, so that they take
    # precedence. Once the original Ger is replaced, then these optimizations
    # have no effect.
    blas_optdb.register('scipy_blas',
                        use_scipy_blas,
                        100, 'fast_run')

    # this matches the InplaceBlasOpt defined in blas.py
    optdb.register('make_scipy_blas_destructive',
                   make_scipy_blas_destructive,
Esempio n. 8
0
        return [CGemv(inplace=True)(*node.inputs)]


@local_optimizer([CGemv(inplace=False)])
def make_c_gemv_destructive(node):
    if isinstance(node.op, CGemv) and not node.op.inplace:
        inputs = list(node.inputs)
        dest = inputs[0]
        if (dest.owner and
                isinstance(dest.owner.op, T.AllocEmpty) and
                len(dest.clients) > 1):
            inputs[0] = T.AllocEmpty(dest.dtype)(*dest.owner.inputs)

        return [cgemv_inplace(*inputs)]


# ##### ####### #######
# Optimizers
# ##### ####### #######

blas_optdb.register('use_c_blas',
                    in2out(use_c_ger, use_c_gemv),
                    20, 'fast_run', 'c_blas')

# this matches the InplaceBlasOpt defined in blas.py
optdb.register('c_blas_destructive',
               in2out(make_c_ger_destructive,
                      make_c_gemv_destructive,
                      name="c_blas_destructive"),
               70.0, 'fast_run', 'inplace', 'c_blas')
Esempio n. 9
0
            assert all([isinstance(i, int) or isinstance(i, Variable)
                for i in size]), msg
        else:
            msg = "size must be a tuple of int or a Theano variable"
            assert isinstance(size, Variable) and size.ndim == 1, msg
        generator = theano.shared(False)  # makes a generic
        s_size = theano.tensor.as_tensor_variable(size)
        u = CURAND_Normal.new_auto_update(generator, ndim, dtype, s_size,
                self.next_seed())
        self.state_updates.append(u.update)
        rval = u * std + avg
        if u.type.broadcastable != rval.type.broadcastable:
            raise NotImplementedError(
                'Increase the size to match the broadcasting pattern of `low`'
                'and `high` arguments'
            )
        return  rval


@local_optimizer([CURAND_Base])
def local_destructive(node):
    op = node.op
    if isinstance(op, CURAND_Base) and not op.destructive:
        # op might be gpu version
        new_op = op.as_destructive()
        return new_op.make_node(*node.inputs).outputs
    return False
optdb.register('CURAND_destructive',
        opt.in2out(local_destructive, ignore_newtrees=True), 99, 'fast_run',
                   'inplace')
Esempio n. 10
0
            assert all([isinstance(i, int) or isinstance(i, Variable)
                        for i in size]), msg
        else:
            msg = "size must be a tuple of int or a Theano variable"
            assert isinstance(size, Variable) and size.ndim == 1, msg
        generator = theano.shared(None)  # makes a generic
        s_size = theano.tensor.as_tensor_variable(size)
        u = CURAND_Normal.new_auto_update(generator, ndim, dtype, s_size,
                                          self.next_seed())
        self.state_updates.append(u.update)
        rval = u * std + avg
        if u.type.broadcastable != rval.type.broadcastable:
            raise NotImplementedError(
                'Increase the size to match the broadcasting pattern of `low`'
                'and `high` arguments'
            )
        return rval


@local_optimizer([CURAND_Base])
def local_destructive(node):
    op = node.op
    if isinstance(op, CURAND_Base) and not op.destructive:
        # op might be gpu version
        new_op = op.as_destructive()
        return new_op.make_node(*node.inputs).outputs
    return False
optdb.register('CURAND_destructive',
               opt.in2out(local_destructive, ignore_newtrees=True),
               99, 'fast_run', 'inplace')
Esempio n. 11
0
            ]), msg
        else:
            msg = "size must be a tuple of int or a Theano variable"
            assert isinstance(size, Variable) and size.ndim == 1, msg
        generator = theano.shared(False)  # makes a generic
        s_size = theano.tensor.as_tensor_variable(size)
        u = CURAND_Normal.new_auto_update(generator, ndim, dtype, s_size,
                                          self.next_seed())
        self.state_updates.append(u.update)
        rval = u * std + avg
        if u.type.broadcastable != rval.type.broadcastable:
            raise NotImplementedError(
                'Increase the size to match the broadcasting pattern of `low`'
                'and `high` arguments')
        return rval


@local_optimizer([None])
def local_destructive(node):
    op = node.op
    if isinstance(op, CURAND_Base) and not op.destructive:
        # op might be gpu version
        new_op = op.as_destructive()
        return new_op.make_node(*node.inputs).outputs
    return False


optdb.register('CURAND_destructive',
               opt.in2out(local_destructive, ignore_newtrees=True), 99,
               'fast_run', 'inplace')
Esempio n. 12
0
@gof.local_optimizer([IfElse])
def cond_make_inplace(node):
    op = node.op
    if (
        isinstance(op, IfElse)
        and not op.as_view
        and
        # For big graph, do not make inplace scalar to speed up
        # optimization.
        (len(node.fgraph.apply_nodes) < 500 or not all([getattr(o.type, "ndim", -1) == 0 for o in node.outputs]))
    ):
        return IfElse(n_outs=op.n_outs, as_view=True, gpu=op.gpu, name=op.name)(*node.inputs, **dict(return_list=True))
    return False


optdb.register("cond_make_inplace", opt.in2out(cond_make_inplace, ignore_newtrees=True), 95, "fast_run", "inplace")

# XXX: Optimizations commented pending further debugging (certain optimizations
# make computation less lazy than it should be currently).
#
# ifelse_equilibrium = gof.EquilibriumDB()
# ifelse_seqopt = gof.SequenceDB()
# ifelse_equilibrium.register('seq_ifelse', ifelse_seqopt, 'fast_run',
#                             'ifelse')
""" Comments:
I've wrote this comments to explain how the optimization of ifelse function
(for future developers that need to parse this part of code. Please try to
keep this comments in sync with whatever changes you add to the code.

ifelse optimization are registered before canonicalize !
Esempio n. 13
0
        return [cgemv_inplace(*node.inputs)]


@local_optimizer([CGemv(inplace=False)])
def make_c_gemv_destructive(node):
    if isinstance(node.op, CGemv) and not node.op.inplace:
        inputs = list(node.inputs)
        dest = inputs[0]
        if (dest.owner and
                isinstance(dest.owner.op, T.AllocEmpty) and
                len(dest.clients) > 1):
            inputs[0] = T.AllocEmpty(dest.dtype)(*dest.owner.inputs)

        return [cgemv_inplace(*inputs)]


# ##### ####### #######
# Optimizers
# ##### ####### #######

blas_optdb.register('use_c_blas',
                    in2out(use_c_ger, use_c_gemv),
                    20, 'fast_run', 'c_blas')

# this matches the InplaceBlasOpt defined in blas.py
optdb.register('c_blas_destructive',
               in2out(make_c_ger_destructive,
                      make_c_gemv_destructive,
                      name="c_blas_destructive"),
               70.0, 'fast_run', 'inplace', 'c_blas')
Esempio n. 14
0
        return (0,)

    def c_headers(self):
        ret = super(GpuDot22, self).c_headers()
        return ret + ["<compyte/numpy_compat.h>"]


gpu_dot22 = GpuDot22()

from theano.compile import optdb
from theano.gof import local_optimizer, LocalOptGroup
from theano.tensor.opt import in2out


@local_optimizer([gpugemv_no_inplace])
def local_inplace_gpuagemv(node):
    if node.op == gpugemv_no_inplace:
        return [gpugemv_inplace(*node.inputs)]


@local_optimizer([gpugemm_no_inplace])
def local_inplace_gpuagemm(node):
    if node.op == gpugemm_no_inplace:
        return [gpugemm_inplace(*node.inputs)]


gpuablas_opt_inplace = in2out(
    LocalOptGroup(local_inplace_gpuagemv, local_inplace_gpuagemm), name="gpuablas_opt_inplace"
)
optdb.register("InplaceGpuaBlasOpt", gpuablas_opt_inplace, 70.0, "fast_run", "inplace", "gpuarray")
Esempio n. 15
0
scipy_ger_no_inplace = ScipyGer(False)
scipy_ger_inplace = ScipyGer(True)


@local_optimizer([ger, ger_destructive])
def use_scipy_ger(node):
    if node.op == ger:
        return [scipy_ger_no_inplace(*node.inputs)]


@local_optimizer([scipy_ger_no_inplace])
def make_ger_destructive(node):
    if node.op == scipy_ger_no_inplace:
        return [scipy_ger_inplace(*node.inputs)]


use_scipy_blas = in2out(use_scipy_ger)
make_scipy_blas_destructive = in2out(make_ger_destructive)

if have_fblas:
    # scipy_blas is scheduled in the blas_optdb very late, because scipy sortof
    # sucks, but it is almost always present.
    # C implementations should be scheduled earlier than this, so that they take
    # precedence. Once the original Ger is replaced, then these optimizations
    # have no effect.
    blas_optdb.register('scipy_blas', use_scipy_blas, 100, 'fast_run')

    # this matches the InplaceBlasOpt defined in blas.py
    optdb.register('make_scipy_blas_destructive', make_scipy_blas_destructive,
                   70.0, 'fast_run', 'inplace')
Esempio n. 16
0
        """
        force_init_beta = check_force_gemv_init()

        return [
            CGemv(inplace=False, force_init_beta=force_init_beta)(*node.inputs)
        ]
    if (node.op == gemv_inplace
            and node.outputs[0].dtype in ['float32', 'float64']):
        return [CGemv(inplace=True)(*node.inputs)]


@local_optimizer([CGemv(inplace=False)])
def make_c_gemv_destructive(node):
    if isinstance(node.op, CGemv) and not node.op.inplace:
        return [cgemv_inplace(*node.inputs)]


# ##### ####### #######
# Optimizers
# ##### ####### #######

blas_optdb.register('use_c_blas', in2out(use_c_ger, use_c_gemv), 20,
                    'fast_run', 'c_blas')

# this matches the InplaceBlasOpt defined in blas.py
optdb.register(
    'c_blas_destructive',
    in2out(make_c_ger_destructive,
           make_c_gemv_destructive,
           name="c_blas_destructive"), 70.0, 'fast_run', 'inplace', 'c_blas')
Esempio n. 17
0
        op_outs = scan_utils.clone(op_outs, replace=givens)
        nw_info = copy.deepcopy(op.info)
        nw_info['n_seqs'] = nw_n_seqs
        # DEBUG CHECK
        nwScan = scan_op.Scan(nw_inner, op_outs, nw_info)
        nw_outs = nwScan.make_node(*nw_outer).outputs
        return nw_outs
    else:
        return False

scan_seqopt = theano.gof.SequenceDB()
# We run before blas opt at 1.7 and specialize 2.0
# but after stabilize at 1.5. Should we put it before stabilize?
optdb.register('scan_seqopt', scan_seqopt, 1.6, 'fast_run', 'scan')
scan_seqopt.register('scanOp_remove_constants_and_unused_inputs',
                     opt.in2out(remove_constants_and_unused_inputs_scan,
                                ignore_newtrees=True),
                     5,
                     'fast_run',
                     'scan')


# This is a global opt for historical reason
# It should be possible to change it to a local opt.
class PushOutNonSeqScan(gof.Optimizer):

    def __init__(self):
        gof.Optimizer.__init__(self)

    def add_requirements(self, fgraph):
        fgraph.attach_feature(gof.toolbox.ReplaceValidate())
Esempio n. 18
0
        """
        if isinstance(size, tuple):
            msg = "size must be a tuple of int or a Theano variable"
            assert all([isinstance(i, int) or isinstance(i, Variable) for i in size]), msg
        else:
            msg = "size must be a tuple of int or a Theano variable"
            assert isinstance(size, Variable) and size.ndim == 1, msg
        generator = theano.shared(False)  # makes a generic
        s_size = theano.tensor.as_tensor_variable(size)
        u = CURAND_Normal.new_auto_update(generator, ndim, dtype, s_size, self.next_seed())
        self.state_updates.append(u.update)
        rval = u * std + avg
        if u.type.broadcastable != rval.type.broadcastable:
            raise NotImplementedError(
                "Increase the size to match the broadcasting pattern of `low`" "and `high` arguments"
            )
        return rval


@local_optimizer([CURAND_Base])
def local_destructive(node):
    op = node.op
    if isinstance(op, CURAND_Base) and not op.destructive:
        # op might be gpu version
        new_op = op.as_destructive()
        return new_op.make_node(*node.inputs).outputs
    return False


optdb.register("CURAND_destructive", opt.in2out(local_destructive, ignore_newtrees=True), 99, "fast_run", "inplace")
Esempio n. 19
0
        op_outs = scan_utils.clone(op_outs, replace=givens)
        nw_info = op.info.copy()
        nw_info['n_seqs'] = nw_n_seqs
        # DEBUG CHECK
        nwScan = scan_op.Scan(nw_inner, op_outs, nw_info)
        nw_outs = nwScan.make_node(*nw_outer).outputs
        return nw_outs
    else:
        return False

scan_seqopt = theano.gof.SequenceDB()
# We run before blas opt at 1.7 and specialize 2.0
# but after stabilize at 1.5. Should we put it before stabilize?
optdb.register('scan_seqopt', scan_seqopt, 1.6, 'fast_run', 'scan')
scan_seqopt.register('scanOp_remove_constants_and_unused_inputs',
                     opt.in2out(remove_constants_and_unused_inputs_scan,
                                ignore_newtrees=True),
                     5,
                     'fast_run',
                     'scan')


# This is a global opt for historical reason
# It should be possible to change it to a local opt.
class PushOutNonSeqScan(gof.Optimizer):

    def __init__(self):
        gof.Optimizer.__init__(self)

    def add_requirements(self, env):
        env.extend(gof.toolbox.ReplaceValidate())
Esempio n. 20
0
    op = node.op
    if (isinstance(op, IfElse) and
        not op.as_view and
        # For big graph, do not make inplace scalar to speed up
        # optimization.
        (len(node.fgraph.apply_nodes) < 500 or
         not all([getattr(o.type, 'ndim', -1) == 0
                  for o in node.outputs]))):
        return IfElse(n_outs=op.n_outs,
                      as_view=True,
                      gpu=op.gpu,
                      name=op.name)(*node.inputs, **dict(return_list=True))
    return False


optdb.register('cond_make_inplace', opt.in2out(cond_make_inplace,
               ignore_newtrees=True), 95, 'fast_run', 'inplace')

# XXX: Optimizations commented pending further debugging (certain optimizations
# make computation less lazy than it should be currently).
#
# ifelse_equilibrium = gof.EquilibriumDB()
# ifelse_seqopt = gof.SequenceDB()
# ifelse_equilibrium.register('seq_ifelse', ifelse_seqopt, 'fast_run',
#                             'ifelse')
''' Comments:
I've wrote this comments to explain how the optimization of ifelse function
(for future developers that need to parse this part of code. Please try to
keep this comments in sync with whatever changes you add to the code.

ifelse optimization are registered before canonicalize !
Esempio n. 21
0
        nw_info["n_seqs"] = nw_n_seqs
        # DEBUG CHECK
        nwScan = scan_op.Scan(nw_inner, op_outs, nw_info)
        nw_outs = nwScan.make_node(*nw_outer).outputs
        return nw_outs
    else:
        return False


scan_seqopt = theano.gof.SequenceDB()
# We run before blas opt at 1.7 and specialize 2.0
# but after stabilize at 1.5. Should we put it before stabilize?
optdb.register("scan_seqopt", scan_seqopt, 1.6, "fast_run", "scan")
scan_seqopt.register(
    "scanOp_remove_constants_and_unused_inputs",
    opt.in2out(remove_constants_and_unused_inputs_scan, ignore_newtrees=True),
    5,
    "fast_run",
    "scan",
)


# This is a global opt for historical reason
# It should be possible to change it to a local opt.
class PushOutNonSeqScan(gof.Optimizer):
    def __init__(self):
        gof.Optimizer.__init__(self)

    def add_requirements(self, env):
        env.extend(gof.toolbox.ReplaceValidate())
Esempio n. 22
0
    else:
        return tuple(rval)


@gof.local_optimizer([None])
def cond_make_inplace(node):
    op = node.op
    if isinstance(op, IfElse) and not op.as_view:
        return IfElse(n_outs=op.n_outs,
                      as_view=True,
                      gpu=op.gpu,
                      name=op.name)(*node.inputs, **dict(return_list=True))
    return False


optdb.register('cond_make_inplace', opt.in2out(cond_make_inplace,
    ignore_newtrees=True), 95, 'fast_run', 'inplace')

# XXX: Optimizations commented pending further debugging (certain optimizations
# make computation less lazy than it should be currently).
#
# ifelse_equilibrium = gof.EquilibriumDB()
# ifelse_seqopt = gof.SequenceDB()
# ifelse_equilibrium.register('seq_ifelse', ifelse_seqopt, 'fast_run',
#                             'ifelse')
''' Comments:
I've wrote this comments to explain how the optimization of ifelse function
(for future developers that need to parse this part of code. Please try to
keep this comments in sync with whatever changes you add to the code.

ifelse optimization are registered before canonicalize !
Esempio n. 23
0
        final_samples = None
        if evened:
            final_samples = normal_samples[:-1]
        elif constant:
            final_samples = normal_samples
        else:
            final_samples = normal_samples[:prod(size)]

        if not size:
            # Force the dtype to be int64, otherwise reshape complains
            size = tensor.constant(size, dtype='int64')
        final_samples = final_samples.reshape(size)

        final_samples = avg + std * final_samples

        assert final_samples.dtype == dtype
        return final_samples


@local_optimizer([mrg_uniform])
def mrg_random_make_inplace(node):
    op = node.op
    if isinstance(op, mrg_uniform) and not op.inplace:
        # op might be gpu version
        new_op = op.__class__(op.output_type, inplace=True)
        return new_op.make_node(*node.inputs).outputs
    return False
optdb.register('random_make_inplace_mrg',
               opt.in2out(mrg_random_make_inplace, ignore_newtrees=True),
               99, 'fast_run', 'inplace')
Esempio n. 24
0
        # so trying this instead
        first_half = sqrt_ln_U1 * cos(numpy.array(2.0 * numpy.pi, dtype=dtype) * U2)
        second_half = sqrt_ln_U1 * sin(numpy.array(2.0 * numpy.pi, dtype=dtype)*U2)
        normal_samples = join(0, first_half, second_half)

        final_samples = None
        if evened:
            final_samples = normal_samples[:-1]
        elif constant:
            final_samples = normal_samples
        else:
            final_samples = normal_samples[:prod(size)]

        if size:
            final_samples = final_samples.reshape(size)

        final_samples = avg + std * final_samples

        assert final_samples.dtype == dtype
        return final_samples

@local_optimizer([None])
def mrg_random_make_inplace(node):
    op = node.op
    if isinstance(op, mrg_uniform) and not op.inplace:
        # op might be gpu version
        new_op = op.__class__(op.output_type, inplace=True)
        return new_op.make_node(*node.inputs).outputs
    return False
optdb.register('random_make_inplace_mrg', opt.in2out(mrg_random_make_inplace, ignore_newtrees=True), 99, 'fast_run', 'inplace')
Esempio n. 25
0
    return op(random_state, size, n, pvals)


@gof.local_optimizer([RandomFunction])
def random_make_inplace(node):
    op = node.op
    if isinstance(op, RandomFunction) and not op.inplace:
        # Read op_fn from op.state, not from op.fn, since op.fn
        # may not be picklable.
        op_fn, op_outtype, op_inplace, op_ndim_added = op._props()
        new_op = RandomFunction(op_fn, op_outtype, inplace=True,
                                ndim_added=op_ndim_added)
        return new_op.make_node(*node.inputs).outputs
    return False

optdb.register('random_make_inplace', opt.in2out(random_make_inplace,
                                                 ignore_newtrees=True),
               99, 'fast_run', 'inplace')


class RandomStreamsBase(object):

    def binomial(self, size=None, n=1, p=0.5, ndim=None, dtype='int64',
                 prob=None):
        """
        Sample n times with probability of success p for each trial and
        return the number of successes.

        If the size argument is ambiguous on the number of dimensions,
        ndim may be a plain integer to supplement the missing information.

        """
Esempio n. 26
0
        cannot be performed at that time.
        """
        force_init_beta = check_force_gemv_init()

        return [CGemv(inplace=False, force_init_beta=force_init_beta)(*node.inputs)]
    if node.op == gemv_inplace and node.outputs[0].dtype in ["float32", "float64"]:
        return [CGemv(inplace=True)(*node.inputs)]


@local_optimizer([CGemv(inplace=False)])
def make_c_gemv_destructive(node):
    if node.op == cgemv_no_inplace:
        return [cgemv_inplace(*node.inputs)]


# ##### ####### #######
# Optimizers
# ##### ####### #######

blas_optdb.register("use_c_blas", in2out(use_c_ger, use_c_gemv), 20, "fast_run", "c_blas")

# this matches the InplaceBlasOpt defined in blas.py
optdb.register(
    "c_blas_destructive",
    in2out(make_c_ger_destructive, make_c_gemv_destructive, name="c_blas_destructive"),
    70.0,
    "fast_run",
    "inplace",
    "c_blas",
)
Esempio n. 27
0
conv_groupopt.register('local_conv2d_gradinputs_cpu',
                       local_conv2d_gradinputs_cpu, 40,
                       'fast_compile', 'fast_run')


# Verify that no AbstractConv are present in the graph
@local_optimizer([AbstractConv2d,
                  AbstractConv2d_gradWeights,
                  AbstractConv2d_gradInputs])
def local_abstractconv_check(node):
    if isinstance(node.op, AbstractConv2d):
        raise AssertionError(
            'AbstractConv2d theano optimization failed. '
            'Did you exclude both "conv_dnn" and "conv_gemm" from '
            'the optimizer? Is cudnn available and does the GPU support it?')
    elif isinstance(node.op, AbstractConv2d_gradWeights):
        raise AssertionError(
            'AbstractConv2d_gradWeights theano optimization failed. '
            'Did you exclude both "conv_dnn" and "conv_gemm" from '
            'the optimizer? Is cudnn available and does the GPU support it?')
    elif isinstance(node.op, AbstractConv2d_gradInputs):
        raise AssertionError(
            'AbstractConv2d_gradInputs theano optimization failed. '
            'Did you exclude both "conv_dnn" and "conv_gemm" from '
            'the optimizer? Is cudnn available and does the GPU support it?')

optdb.register('AbstracConvCheck',
               opt.in2out(local_abstractconv_check,
                          name="AbstractConvCheck"),
               48.7, 'fast_compile', 'fast_run')
Esempio n. 28
0
        return final_samples


from theano.sandbox.gpuarray.opt import register_opt as register_gpua, host_from_gpu as host_from_gpua


@register_gpua()
@local_optimizer([mrg_uniform])
def local_gpua_mrg(node):
    if type(node.op) == mrg_uniform and isinstance(node.inputs[0].type, GpuArrayType):
        outs = GPUA_mrg_uniform.new(node.inputs[0], node.op.output_type.ndim, node.op.output_type.dtype, node.inputs[1])
        return [outs[0], host_from_gpua(outs[1])]


MRG_RNGs = (mrg_uniform, GPU_mrg_uniform, GPUA_mrg_uniform)


@local_optimizer(MRG_RNGs)
def mrg_random_make_inplace(node):
    op = node.op
    if isinstance(op, MRG_RNGs) and not op.inplace:
        # op might be gpu version
        new_op = op.__class__(op.output_type, inplace=True)
        return new_op.make_node(*node.inputs).outputs
    return False


optdb.register(
    "random_make_inplace_mrg", opt.in2out(mrg_random_make_inplace, ignore_newtrees=True), 99, "fast_run", "inplace"
)
Esempio n. 29
0
                       'fast_compile', 'fast_run')
# Legacy convolution
conv_groupopt.register('local_conv2d_cpu', local_conv2d_cpu, 40,
                       'fast_compile', 'fast_run')
conv_groupopt.register('local_conv2d_gradweight_cpu',
                       local_conv2d_gradweight_cpu, 40, 'fast_compile',
                       'fast_run')
conv_groupopt.register('local_conv2d_gradinputs_cpu',
                       local_conv2d_gradinputs_cpu, 40, 'fast_compile',
                       'fast_run')


# Verify that no AbstractConv are present in the graph
@local_optimizer(
    [AbstractConv2d, AbstractConv2d_gradWeights, AbstractConv2d_gradInputs])
def local_abstractconv_check(node):
    if isinstance(node.op, (AbstractConv2d, AbstractConv2d_gradWeights,
                            AbstractConv2d_gradInputs)):
        raise AssertionError(
            '%s Theano optimization failed: there is no implementation '
            'available supporting the requested options. Did you exclude '
            'both "conv_dnn" and "conv_gemm" from the optimizer? If on GPU, '
            'is cuDNN available and does the GPU support it? If on CPU, '
            'do you have a BLAS library installed Theano can link against?' %
            node.op.__class__.__name__)


optdb.register('AbstracConvCheck',
               opt.in2out(local_abstractconv_check, name="AbstractConvCheck"),
               48.7, 'fast_compile', 'fast_run')
Esempio n. 30
0
    return op(random_state, size, n, pvals)


@gof.local_optimizer([RandomFunction])
def random_make_inplace(node):
    op = node.op
    if isinstance(op, RandomFunction) and not op.inplace:
        # Read op_fn from op.state, not from op.fn, since op.fn
        # may not be picklable.
        op_fn, op_outtype, op_inplace, op_ndim_added = op.__getstate__()
        new_op = RandomFunction(op_fn, op_outtype, inplace=True, ndim_added=op_ndim_added)
        return new_op.make_node(*node.inputs).outputs
    return False


optdb.register("random_make_inplace", opt.in2out(random_make_inplace, ignore_newtrees=True), 99, "fast_run", "inplace")


class RandomStreamsBase(object):
    def binomial(self, size=None, n=1, p=0.5, ndim=None, dtype="int64", prob=None):
        """
        Sample n times with probability of success p for each trial and
        return the number of successes.

        If the size argument is ambiguous on the number of dimensions,
        ndim may be a plain integer to supplement the missing
        information.
        """
        if prob is not None:
            p = prob
            print(