Esempi in Python per register, esempi in Python per theano.compile.optdb.register

Esempio n. 1

0

Mostra file

File: PoolHWBCOp.py Progetto: papar22/returnn-hmm

    def __init__(self, pool_shape, inplace, BCHW_grad_output):
        pool_shape = tuple(pool_shape)
        super(PoolHWBCOpGrad, self).__init__()
        assert len(pool_shape) == 2, len(pool_shape)
        assert pool_shape[0] > 0, pool_shape[0]
        assert pool_shape[1] > 0, pool_shape[1]
        if BCHW_grad_output:
            assert inplace
        self.pool_shape = pool_shape
        self.inplace = inplace
        self.BCHW_grad_output = BCHW_grad_output

        if inplace:
            self.destroy_map = {0: [0]}
        #register optimization for this pool_shape
        else:
            if not hasattr(optdb, 'PoolHWBCOpGradInplaceOpt_registered'):
                optdb.PoolHWBCOpGradInplaceOpt_registered = []
            if pool_shape not in optdb.PoolHWBCOpGradInplaceOpt_registered:
                PoolHWBCOpGradInplaceOpt = OpSub(
                    self,
                    PoolHWBCOpGrad(self.pool_shape,
                                   inplace=True,
                                   BCHW_grad_output=False))
                optdb.PoolHWBCOpGradInplaceOpt_registered.append(pool_shape)
                optdb.register(
                    'PoolHWBCOpGradInplaceOpt' + str(pool_shape),
                    theano.gof.TopoOptimizer(
                        PoolHWBCOpGradInplaceOpt,
                        failure_callback=gof.TopoOptimizer.warn_inplace), 50.0,
                    'fast_run', 'inplace', 'gpuarray')

Esempio n. 2

0

Mostra file

File: __init__.py Progetto: MarcCote/Theano

 def f(local_opt):
     name = (kwargs and kwargs.pop('name')) or local_opt.__name__
     optdb.register(
         name, TopoOptimizer(
             local_opt, failure_callback=TopoOptimizer.warn_inplace),
         60, 'fast_run', 'inplace', 'gpu', *tags)
     return local_opt

Esempio n. 3

0

Mostra file

 def f(local_opt):
     name = (kwargs and kwargs.pop('name')) or local_opt.__name__
     optdb.register(
         name, TopoOptimizer(
             local_opt, failure_callback=TopoOptimizer.warn_inplace),
         60, 'fast_run', 'inplace', 'gpuarray', *tags)
     return local_opt

Esempio n. 4

0

Mostra file

File: optdb.py Progetto: michaelosthege/aesara

 def f(local_opt):
     name = (kwargs and kwargs.pop("name")) or local_opt.__name__
     optdb.register(
         name,
         TopoOptimizer(local_opt,
                       failure_callback=TopoOptimizer.warn_inplace),
         60,
         "fast_run",
         "inplace",
         "gpuarray",
         *tags,
     )
     return local_opt

Esempio n. 5

0

Mostra file

File: lstm_custom.py Progetto: vieting/returnn

def register_func(recurrent_transform):
    """
  :type recurrent_transform: RecurrentTransform.RecurrentTransformBase
  """
    fn = recurrent_transform.name
    key = (fn, id(recurrent_transform))
    if key in function_ops:
        return function_ops[key]

    # register op
    no_inpl = LSTMCustomOp(fun_name=fn,
                           inplace=False,
                           recurrent_transform=recurrent_transform)
    inpl = LSTMCustomOp(fun_name=fn,
                        inplace=True,
                        recurrent_transform=recurrent_transform)
    function_ops[key] = no_inpl

    # hack to avoid being called twice
    attr = 'LSTMCustomMOpInplaceOpt_%s_%i' % (fn, id(recurrent_transform))
    if not hasattr(optdb, attr):
        opt = OpSub(no_inpl, inpl)
        optdb.register(attr, theano.gof.TopoOptimizer(opt), 50.0, 'fast_run',
                       'inplace', 'gpuarray')
        setattr(optdb, attr, True)

    # the same for grad
    no_inpl = LSTMCustomOpGrad(fun_name=fn,
                               inplace=False,
                               recurrent_transform=recurrent_transform)
    inpl = LSTMCustomOpGrad(fun_name=fn,
                            inplace=True,
                            recurrent_transform=recurrent_transform)
    grad_ops[key] = no_inpl

    # hack to avoid being called twice
    attr = 'LSTMCustomMOpGradInplaceOpt_%s_%i' % (fn, id(recurrent_transform))
    if not hasattr(optdb, attr):
        opt = OpSub(no_inpl, inpl)
        optdb.register(attr, theano.gof.TopoOptimizer(opt), 50.0, 'fast_run',
                       'inplace', 'gpuarray')
        setattr(optdb, attr, True)

    return function_ops[key]

Esempio n. 6

0

Mostra file

File: PoolHWBCOp.py Progetto: chagge/returnn

  def __init__(self, pool_shape, inplace, BCHW_grad_output):
    pool_shape = tuple(pool_shape)
    super(PoolHWBCOpGrad, self).__init__()
    assert len(pool_shape) == 2, len(pool_shape)
    assert pool_shape[0] > 0, pool_shape[0]
    assert pool_shape[1] > 0, pool_shape[1]
    if BCHW_grad_output:
      assert inplace
    self.pool_shape = pool_shape
    self.inplace = inplace
    self.BCHW_grad_output = BCHW_grad_output

    if inplace:
      self.destroy_map = {0: [0]}
    #register optimization for this pool_shape
    else:
      if not hasattr(optdb, 'PoolHWBCOpGradInplaceOpt_registered'):
        optdb.PoolHWBCOpGradInplaceOpt_registered = []
      if pool_shape not in optdb.PoolHWBCOpGradInplaceOpt_registered:
        PoolHWBCOpGradInplaceOpt = OpSub(self, PoolHWBCOpGrad(self.pool_shape, inplace=True, BCHW_grad_output=False))
        optdb.PoolHWBCOpGradInplaceOpt_registered.append(pool_shape)
        optdb.register('PoolHWBCOpGradInplaceOpt' + str(pool_shape),
                       theano.gof.TopoOptimizer(PoolHWBCOpGradInplaceOpt, failure_callback=gof.TopoOptimizer.warn_inplace),
                       50.0, 'fast_run', 'inplace', 'gpuarray')

Esempio n. 7

0

Mostra file

File: OpLSTMCustom.py Progetto: rwth-i6/returnn

def register_func(recurrent_transform):
  """
  :type recurrent_transform: RecurrentTransform.RecurrentTransformBase
  """
  fn = recurrent_transform.name
  key = (fn, id(recurrent_transform))
  if key in function_ops:
    return function_ops[key]

  # register op
  no_inpl = LSTMCustomOp(fun_name=fn, inplace=False, recurrent_transform=recurrent_transform)
  inpl = LSTMCustomOp(fun_name=fn, inplace=True, recurrent_transform=recurrent_transform)
  function_ops[key] = no_inpl

  # hack to avoid being called twice
  attr = 'LSTMCustomMOpInplaceOpt_%s_%i' % (fn, id(recurrent_transform))
  if not hasattr(optdb, attr):
    opt = OpSub(no_inpl, inpl)
    optdb.register(attr, theano.gof.TopoOptimizer(opt),
                   50.0, 'fast_run', 'inplace', 'gpuarray')
    setattr(optdb, attr, True)

  # the same for grad
  no_inpl = LSTMCustomOpGrad(fun_name=fn, inplace=False, recurrent_transform=recurrent_transform)
  inpl = LSTMCustomOpGrad(fun_name=fn, inplace=True, recurrent_transform=recurrent_transform)
  grad_ops[key] = no_inpl

  # hack to avoid being called twice
  attr = 'LSTMCustomMOpGradInplaceOpt_%s_%i' % (fn, id(recurrent_transform))
  if not hasattr(optdb, attr):
    opt = OpSub(no_inpl, inpl)
    optdb.register(attr, theano.gof.TopoOptimizer(opt),
                   50.0, 'fast_run', 'inplace', 'gpuarray')
    setattr(optdb, attr, True)

  return function_ops[key]

Esempio n. 8

0

Mostra file

File: builders.py Progetto: Fadh1/Virtual-Tutor

            # we wont need this copy anymore
            output[0] = variable.copy()


@gof.local_optimizer([OpFromGraph])
def inline_ofg_expansion(node):
    """
    This optimization expands internal graph of OpFromGraph.
    Only performed if node.op.is_inline == True
    Doing so can improve optimization at the cost of compilation speed.
    """
    op = node.op
    if not isinstance(op, OpFromGraph):
        return False
    if not op.is_inline:
        return False
    return theano.clone(
        op.local_outputs,
        {u: v
         for u, v in izip(node.op.local_inputs, node.inputs)})


# We want to run this before the first merge optimizer
# and before the first scan optimizer.
optdb.register('inline_ofg_expansion', gof.opt.in2out(inline_ofg_expansion),
               -0.01, 'fast_compile', 'fast_run')

# Since OpFromGraph contains a Theano compiled function,
# we should let DebugMode know about it
ops_with_inner_function[OpFromGraph] = 'fn'

Esempio n. 9

0

Mostra file

File: CuDNNConvHWBCOp.py Progetto: papar22/returnn-hmm

                                                                inplace=False)
CuDNNConvHWBCOpGradValidInplaceInstance = CuDNNConvHWBCOpGrad("valid",
                                                              inplace=True)
CuDNNConvHWBCOpGradFullNoInplaceInstance = CuDNNConvHWBCOpGrad("full",
                                                               inplace=False)
CuDNNConvHWBCOpGradFullInplaceInstance = CuDNNConvHWBCOpGrad("full",
                                                             inplace=True)

CuDNNConvHWBCOpGradValidInplaceOpt = OpSub(
    CuDNNConvHWBCOpGradValidNoInplaceInstance,
    CuDNNConvHWBCOpGradValidInplaceInstance)
#hack to avoid being called twice
if not hasattr(optdb, 'CuDNNConvHWBCOpGradValidInplaceOpt_registered'):
    optdb.register(
        'CuDNNConvHWBCOpGradValidInplaceOpt',
        theano.gof.TopoOptimizer(
            CuDNNConvHWBCOpGradValidInplaceOpt,
            failure_callback=gof.TopoOptimizer.warn_inplace), 50.0, 'fast_run',
        'inplace', 'gpuarray')
    optdb.CuDNNConvHWBCOpGradValidInplaceOpt_registered = True

#TODO: maybe this optimization causes problems
#CuDNNConvHWBCOpGradFullInplaceOpt = OpSub(CuDNNConvHWBCOpGradFullNoInplaceInstance, CuDNNConvHWBCOpGradFullInplaceInstance)
##hack to avoid being called twice
#if not hasattr(optdb, 'CuDNNConvHWBCOpGradFullInplaceOpt_registered'):
#  optdb.register('CuDNNConvHWBCOpGradFullInplaceOpt',
#                 theano.gof.TopoOptimizer(CuDNNConvHWBCOpGradFullInplaceOpt, failure_callback=gof.TopoOptimizer.warn_inplace),
#                 50.0, 'fast_run', 'inplace', 'gpuarray')
#  optdb.CuDNNConvHWBCOpGradFullInplaceOpt_registered = True

#------------------------------------------------------

Esempio n. 10

0

Mostra file

File: CropToBatchImageSizeOp.py Progetto: papar22/returnn-hmm

  def infer_shape(self, node, input_shapes):
    return input_shapes[0],

  #def c_code_cache_version(self):
  #  return 1, 0


CropToBatchImageSizeInstance = CropToBatchImageSizeOp(-1e20, False)
CropToBatchImageSizeInplaceInstance = CropToBatchImageSizeOp(-1e20, True)
CropToBatchImageSizeZeroInstance = CropToBatchImageSizeOp(0.0, False)
CropToBatchImageSizeZeroInplaceInstance = CropToBatchImageSizeOp(0.0, True)


CropToBatchImageSizeGradInplaceOpt1 = OpSub(CropToBatchImageSizeInstance, CropToBatchImageSizeInplaceInstance)
#hack to avoid being called twice
if not hasattr(optdb, 'CropToBatchImageSizeGradInplaceOpt1_registered'):
  optdb.register('CropToBatchImageSizeGradInplaceOpt1',
                 theano.gof.TopoOptimizer(CropToBatchImageSizeGradInplaceOpt1,
                                          failure_callback=gof.TopoOptimizer.warn_inplace),
                 50.0, 'fast_run', 'inplace', 'gpuarray')
  optdb.CropToBatchImageSizeGradInplaceOpt1_registered = True

CropToBatchImageSizeGradInplaceOpt2 = OpSub(CropToBatchImageSizeZeroInstance, CropToBatchImageSizeZeroInplaceInstance)
#hack to avoid being called twice
if not hasattr(optdb, 'CropToBatchImageSizeGradInplaceOpt2_registered'):
  optdb.register('CropToBatchImageSizeGradInplaceOpt2',
                 theano.gof.TopoOptimizer(CropToBatchImageSizeGradInplaceOpt2,
                                          failure_callback=gof.TopoOptimizer.warn_inplace),
                 50.0, 'fast_run', 'inplace', 'gpuarray')
  optdb.CropToBatchImageSizeGradInplaceOpt2_registered = True

Esempio n. 11

0

Mostra file

File: multi_batch_beam.py Progetto: papar22/returnn-hmm

        grad_op = grad_op.__class__(**kwargs)
    else:
        old_grad_op_input0 = grad_op_v.owner.inputs[0]
        sum_inputs = [old_grad_op_input0] + sum_inputs
    assert len(sum_inputs) > 0
    if len(sum_inputs) == 1:
        new_grad_op_input0 = sum_inputs[0]
    else:
        new_grad_op_input0 = T.add(*sum_inputs)
    new_grad_op_inputs = [new_grad_op_input0] + grad_op_v.owner.inputs[1:]
    new_v = grad_op(*new_grad_op_inputs)
    return [new_v]


optdb.register('add_merge_MultiBatchBeamGradAddOp',
               gof.TopoOptimizer(add_merge_MultiBatchBeamGradAddOp), 0.1,
               'fast_run')


@gof.local_optimizer([MultiBatchBeamGradAddOp], inplace=True)
def inplace_MultiBatchBeamGradAddOp(node):
    if isinstance(node.op, MultiBatchBeamGradAddOp
                  ) and not node.op.inplace and not node.op.zero_with_shape:
        kwargs = {k: getattr(node.op, k) for k in node.op.__props__}
        kwargs["inplace"] = True
        new_op = node.op.__class__(**kwargs)
        new_v = new_op(*node.inputs)
        return [new_v]
    return False

Esempio n. 12

0

Mostra file

File: opt.py Progetto: DingKe/attention-lvcsr

conv_groupopt.register('local_conv2d_gradinputs_cpu',
                       local_conv2d_gradinputs_cpu, 40,
                       'fast_compile', 'fast_run')


# Verify that no AbstractConv are present in the graph
@local_optimizer([AbstractConv2d,
                  AbstractConv2d_gradWeights,
                  AbstractConv2d_gradInputs])
def local_abstractconv_check(node):
    if isinstance(node.op, AbstractConv2d):
        raise AssertionError(
            'AbstractConv2d theano optimization failed. '
            'Did you exclude both "conv_dnn" and "conv_gemm" from '
            'the optimizer? Is cudnn available and does the GPU support it?')
    elif isinstance(node.op, AbstractConv2d_gradWeights):
        raise AssertionError(
            'AbstractConv2d_gradWeights theano optimization failed. '
            'Did you exclude both "conv_dnn" and "conv_gemm" from '
            'the optimizer? Is cudnn available and does the GPU support it?')
    elif isinstance(node.op, AbstractConv2d_gradInputs):
        raise AssertionError(
            'AbstractConv2d_gradInputs theano optimization failed. '
            'Did you exclude both "conv_dnn" and "conv_gemm" from '
            'the optimizer? Is cudnn available and does the GPU support it?')

optdb.register('AbstracConvCheck',
               opt.in2out(local_abstractconv_check,
                          name="AbstractConvCheck"),
               48.7, 'fast_compile', 'fast_run')

Esempio n. 13

0

Mostra file

File: raw_random.py Progetto: jtirtanata/flask-makeup-app


@gof.local_optimizer([RandomFunction])
def random_make_inplace(node):
    op = node.op
    if isinstance(op, RandomFunction) and not op.inplace:
        # Read op_fn from op.state, not from op.fn, since op.fn
        # may not be picklable.
        op_fn, op_outtype, op_inplace, op_ndim_added = op._props()
        new_op = RandomFunction(op_fn, op_outtype, inplace=True,
                                ndim_added=op_ndim_added)
        return new_op.make_node(*node.inputs).outputs
    return False

optdb.register('random_make_inplace', opt.in2out(random_make_inplace,
                                                 ignore_newtrees=True),
               99, 'fast_run', 'inplace')


class RandomStreamsBase(object):

    def binomial(self, size=None, n=1, p=0.5, ndim=None, dtype='int64',
                 prob=None):
        """
        Sample n times with probability of success p for each trial and
        return the number of successes.

        If the size argument is ambiguous on the number of dimensions,
        ndim may be a plain integer to supplement the missing information.

        """

Esempio n. 14

0

Mostra file

File: blas.py Progetto: Jackwangyang/Theano

gpu_dot22 = GpuDot22()

from theano.compile import optdb
from theano.gof import local_optimizer, LocalOptGroup
from theano.tensor.opt import in2out


@local_optimizer([gpugemv_no_inplace], inplace=True)
def local_inplace_gpuagemv(node):
    if node.op == gpugemv_no_inplace:
        return [gpugemv_inplace(*node.inputs)]


@local_optimizer([gpugemm_no_inplace], inplace=True)
def local_inplace_gpuagemm(node):
    if node.op == gpugemm_no_inplace:
        return [gpugemm_inplace(*node.inputs)]


@local_optimizer([gpuger_no_inplace], inplace=True)
def local_inplace_gpuager(node):
    if node.op == gpuger_no_inplace:
        return [gpuger_inplace(*node.inputs)]

gpuablas_opt_inplace = in2out(LocalOptGroup(
        local_inplace_gpuagemv, local_inplace_gpuagemm, local_inplace_gpuager),
                              name='gpuablas_opt_inplace')
optdb.register('InplaceGpuaBlasOpt',
               gpuablas_opt_inplace,
               70.0, 'fast_run', 'inplace', 'gpuarray')

Esempio n. 15

0

Mostra file

File: nerv.py Progetto: huamichaelchen/Theano


@opt.register_opt()
@alpha_merge(Gemm16, alpha_in=1, beta_in=4)
def local_gemm16_alpha_merge(node, *inputs):
    return [Gemm16(relu=node.op.relu)(*inputs)]


@opt.register_opt()
@output_merge(Gemm16, alpha_in=1, beta_in=4, out_in=0)
def local_gemm16_output_merge(node, *inputs):
    return [Gemm16(relu=node.op.relu)(*inputs)]


@local_optimizer([Gemm16], inplace=True)
def local_gemm16_inplace(node):
    if type(node.op) != Gemm16 or node.op.inplace:
        return
    inputs = list(node.inputs)
    C = inputs[0]
    if (C.owner and
            isinstance(C.owner.op, GpuAllocEmpty) and
            len(C.clients) > 1):
        inputs[0] = C.owner.op(*C.owner.inputs)
    return [Gemm16(relu=node.op.relu, inplace=True)(*inputs)]

optdb.register('local_gemm16_inplace',
               tensor.opt.in2out(local_gemm16_inplace,
                                 name='local_gemm16_inplace'),
               70.0, 'fast_run', 'inplace', 'gpuarray')

Esempio n. 16

0

Mostra file

File: dnn.py Progetto: hhoareau/Theano

@inplace_allocempty(GpuDnnConvGradW, 2)
def local_dnn_convgw_inplace(node, inputs):
    return [GpuDnnConvGradW(algo=node.op.algo, inplace=True)(*inputs)]


@inplace_allocempty(GpuDnnConvGradI, 2)
def local_dnn_convgi_inplace(node, inputs):
    return [GpuDnnConvGradI(algo=node.op.algo, inplace=True)(*inputs)]


optdb.register(
    "local_dnna_conv_inplace",
    tensor.opt.in2out(
        local_dnn_conv_inplace, local_dnn_convgw_inplace, local_dnn_convgi_inplace, name="local_dnna_conv_inplace"
    ),
    70.0,
    "fast_run",
    "inplace",
    "gpuarray",
    "cudnn",
)


@register_opt("cudnn")
@alpha_merge(GpuDnnConv, alpha_in=4, beta_in=5)
def local_dnn_conv_alpha_merge(node, *inputs):
    return [GpuDnnConv(algo=node.op.algo)(*inputs)]


@register_opt("cudnn")
@alpha_merge(GpuDnnConvGradW, alpha_in=4, beta_in=5)

Esempio n. 17

0

Mostra file

File: OpLSTM.py Progetto: atuxhe/returnn

    }

    """ % locals()

  #!!! change this when changing the code!
  def c_code_cache_version(self):
    return 1, 5

LSTMOpGradNoInplaceInstance = LSTMOpGrad(inplace=False)
LSTMOpGradInplaceInstance = LSTMOpGrad(inplace=True)

LSTMOpGradInplaceOpt = OpSub(LSTMOpGradNoInplaceInstance, LSTMOpGradInplaceInstance)

#hack to avoid being called twice
if not hasattr(optdb, 'LSTMOpGradInplaceOpt_registered'):
  optdb.register('LSTMOpGradInplaceOpt', theano.gof.TopoOptimizer(LSTMOpGradInplaceOpt),
                 50.0, 'fast_run', 'inplace', 'gpuarray')
  optdb.LSTMOpGradInplaceOpt_registered = True


#------------------------

class LSTMOp(theano.sandbox.cuda.GpuOp):
  def __init__(self, inplace):
    self.inplace = inplace
    if inplace:
      #all outputs operate inplace on input 0 (which is Z)
      #but when the input is marked multiple times, we get an error
      #so we only mark that output 0 destroys input 0
      #anyway theano knows that input 0 will be destroyed, so it should be OK
      #TODO
      self.destroy_map = {0: [0]}

Esempio n. 18

0

Mostra file

File: opt.py Progetto: olivierverdier/Theano

    GpuCrossentropySoftmaxArgmax1HotWithBias,
    GpuCrossentropySoftmax1HotWithBiasDx,
    GpuSoftmax,
    GpuSoftmaxWithBias,
)
from theano.compile import optdb
from theano.tensor.blas import _is_real_vector, _is_real_matrix

# optdb.print_summary()  # shows what is currently registered

gpu_optimizer = EquilibriumDB()
gpu_cut_copies = EquilibriumDB()
gpu_seqopt = SequenceDB()
gpu_seqopt.register("gpu_local_optimizations", gpu_optimizer, 1, "fast_run", "inplace")
gpu_seqopt.register("gpu_cut_transfers", gpu_cut_copies, 2, "fast_run", "gpu")
optdb.register("gpu_opt", gpu_seqopt, optdb.__position__.get("add_destroy_handler", 49.5) - 1, "gpu")
# This second pass is needed as the fusion can put all the non float32 code
# inside the elemwise. When it there is no float64 op, this is working.
optdb.register("gpu_after_fusion", ProxyDB(gpu_seqopt), optdb.__position__.get("elemwise_fusion", 71) + 0.1, "gpu")


def register_opt(*tags, **kwargs):
    def f(local_opt):
        name = (kwargs and kwargs.pop("name")) or local_opt.__name__
        gpu_optimizer.register(name, local_opt, "fast_run", "inplace", *tags)
        return local_opt

    return f


# register local_track_shape_i at this level too

Esempio n. 19

0

Mostra file

            ]), msg
        else:
            msg = "size must be a tuple of int or a Theano variable"
            assert isinstance(size, Variable) and size.ndim == 1, msg
        generator = theano.shared(False)  # makes a generic
        s_size = theano.tensor.as_tensor_variable(size)
        u = CURAND_Normal.new_auto_update(generator, ndim, dtype, s_size,
                                          self.next_seed())
        self.state_updates.append(u.update)
        rval = u * std + avg
        if u.type.broadcastable != rval.type.broadcastable:
            raise NotImplementedError(
                'Increase the size to match the broadcasting pattern of `low`'
                'and `high` arguments')
        return rval


@local_optimizer([None])
def local_destructive(node):
    op = node.op
    if isinstance(op, CURAND_Base) and not op.destructive:
        # op might be gpu version
        new_op = op.as_destructive()
        return new_op.make_node(*node.inputs).outputs
    return False


optdb.register('CURAND_destructive',
               opt.in2out(local_destructive, ignore_newtrees=True), 99,
               'fast_run', 'inplace')

Esempio n. 20

0

Mostra file

File: NativeOp.py Progetto: chagge/returnn

    any_inplace = False
    for info in kwargs["in_info"]:
      if info.get("want_inplace", -1) >= 0:
        any_inplace = True
        info["is_inplace"] = True
    if not any_inplace:
      return False
    new_op = node.op.__class__(**kwargs)
    from TheanoUtil import make_var_tuple
    new_v = make_var_tuple(new_op(*node.inputs))
    return new_v
  return False

optdb.register('inplace_NativeOp',
               gof.TopoOptimizer(inplace_NativeOp
                                 , failure_callback=gof.TopoOptimizer.warn_inplace
                                 ),
               60, 'fast_run', 'inplace')


@try_register_gpu_opt(NativeOp)
def local_gpu_NativeOp(node):
  if isinstance(node.op, NativeOp):
    # see also: https://github.com/Theano/Theano/blob/master/theano/sandbox/cuda/opt.py
    from theano.sandbox.cuda import host_from_gpu, gpu_from_host, as_cuda_ndarray_variable
    args = node.inputs
    if any([(x.owner and x.owner.op == host_from_gpu) for x in args]):
      gpu_op = GpuNativeOp(**{key: getattr(node.op, key) for key in node.op.__props__})
      args = [x.owner.inputs[0] if (x.owner and x.owner.op == host_from_gpu) else x
              for x in args]
      from TheanoUtil import make_var_tuple

Esempio n. 21

0

Mostra file

File: PoolHWBCOp.py Progetto: chagge/returnn

  def add_requirements(self, fgraph):
    fgraph.attach_feature(toolbox.ReplaceValidate())

  def apply(self, fgraph):
    for node in fgraph.toposort():
      #print node
      if type(node.op) == GpuDimShuffle and node.op.new_order == (2, 3, 0, 1):
        X = node.inputs[0]
        if hasattr(X.owner, "op") and type(X.owner.op) == PoolHWBCOpGrad and X.owner.op.inplace:
          fgraph.replace_validate(node.outputs[0], node.inputs[0])
          replace_op = PoolHWBCOpGrad(X.owner.op.pool_shape, inplace=True, BCHW_grad_output=True)
          fgraph.replace_validate(X.owner.outputs[0], replace_op(*X.owner.inputs))

RemoveConvGradDimshuffleOptimizer = RemoveConvGradDimshuffle()
if not hasattr(optdb, 'RemoveConvGradDimshuffleOptimizer_registered'):
  optdb.register('RemoveConvGradDimshuffle', RemoveConvGradDimshuffleOptimizer, 50.5, 'fast_run', 'inplace', 'gpuarray')
  optdb.RemoveConvGradDimshuffleOptimizer_registered = True

#---------------------------


#for the moment we implement only ignore_border = True and no padding
class PoolHWBCOp(theano.sandbox.cuda.GpuOp):
  __props__ = ("pool_shape",)

  def __init__(self, pool_shape):
    pool_shape = tuple(pool_shape)
    super(PoolHWBCOp, self).__init__()
    assert len(pool_shape) == 2, len(pool_shape)
    assert pool_shape[0] > 0, pool_shape[0]
    assert pool_shape[1] > 0, pool_shape[1]

Esempio n. 22

0

Mostra file

File: dnn.py Progetto: orhanf/Theano

def local_dnn_conv_inplace(node, inputs):
    return [GpuDnnConv(algo=node.op.algo, inplace=True)(*inputs)]


@inplace_allocempty(GpuDnnConvGradW, 2)
def local_dnn_convgw_inplace(node, inputs):
    return [GpuDnnConvGradW(algo=node.op.algo, inplace=True)(*inputs)]


@inplace_allocempty(GpuDnnConvGradI, 2)
def local_dnn_convgi_inplace(node, inputs):
    return [GpuDnnConvGradI(algo=node.op.algo, inplace=True)(*inputs)]

optdb.register('local_dnna_conv_inplace',
               tensor.opt.in2out(local_dnn_conv_inplace,
                                 local_dnn_convgw_inplace,
                                 local_dnn_convgi_inplace,
                                 name="local_dnna_conv_inplace"),
               70.0, 'fast_run', 'inplace', 'gpuarray', 'cudnn')


@register_opt('cudnn')
@alpha_merge(GpuDnnConv, alpha_in=4, beta_in=5)
def local_dnn_conv_alpha_merge(node, *inputs):
    return [GpuDnnConv(algo=node.op.algo)(*inputs)]


@register_opt('cudnn')
@alpha_merge(GpuDnnConvGradW, alpha_in=4, beta_in=5)
def local_dnn_convw_alpha_merge(node, *inputs):
    return [GpuDnnConvGradW(algo=node.op.algo)(*inputs)]

Esempio n. 23

0

Mostra file

File: TheanoUtil.py Progetto: tazdriver/returnn

        dout = T.as_tensor_variable(dout)
        return [dout]


@gof.local_optimizer([Contiguous], inplace=True)
def opt_remove_contiguous(node):
    if isinstance(node.op, Contiguous):
        x, = node.inputs
        if x.owner and isinstance(
                x.owner.op,
            (T.Alloc, T.AllocEmpty, T.extra_ops.CpuContiguous)):
            return [x]
    return False


optdb.register('opt_remove_contiguous',
               gof.TopoOptimizer(opt_remove_contiguous), 10, 'fast_run')


# Theano will not do this optimization. So we register it now.
# See: https://github.com/Theano/Theano/issues/4400
@try_register_gpu_opt(Contiguous)
def local_gpu_Contiguous(node):
    if isinstance(node.op, Contiguous):
        # see also: https://github.com/Theano/Theano/blob/master/theano/sandbox/cuda/opt.py
        from theano.sandbox.cuda import host_from_gpu
        x, = node.inputs
        if x.owner and x.owner.op == host_from_gpu:
            from theano.sandbox.cuda.basic_ops import gpu_contiguous
            return [host_from_gpu(gpu_contiguous(x.owner.inputs[0]))]

Esempio n. 24

0

Mostra file

    if len(nw_inner) != len(op_ins):
        op_outs = scan_utils.clone(op_outs, replace=givens)
        nw_info = op.info.copy()
        nw_info['n_seqs'] = nw_n_seqs
        # DEBUG CHECK
        nwScan = scan_op.Scan(nw_inner, op_outs, nw_info)
        nw_outs = nwScan.make_node(*nw_outer).outputs
        return nw_outs
    else:
        return False

scan_seqopt = theano.gof.SequenceDB()
# We run before blas opt at 1.7 and specialize 2.0
# but after stabilize at 1.5. Should we put it before stabilize?
optdb.register('scan_seqopt', scan_seqopt, 1.6, 'fast_run', 'scan')
scan_seqopt.register('scanOp_remove_constants_and_unused_inputs',
                     opt.in2out(remove_constants_and_unused_inputs_scan,
                                ignore_newtrees=True),
                     5,
                     'fast_run',
                     'scan')


# This is a global opt for historical reason
# It should be possible to change it to a local opt.
class PushOutNonSeqScan(gof.Optimizer):

    def __init__(self):
        gof.Optimizer.__init__(self)

Esempio n. 25

0

Mostra file

File: rng_curand.py Progetto: Dimitris0mg/Theano

            assert all([isinstance(i, int) or isinstance(i, Variable)
                for i in size]), msg
        else:
            msg = "size must be a tuple of int or a Theano variable"
            assert isinstance(size, Variable) and size.ndim == 1, msg
        generator = theano.shared(False)  # makes a generic
        s_size = theano.tensor.as_tensor_variable(size)
        u = CURAND_Normal.new_auto_update(generator, ndim, dtype, s_size,
                self.next_seed())
        self.state_updates.append(u.update)
        rval = u * std + avg
        if u.type.broadcastable != rval.type.broadcastable:
            raise NotImplementedError(
                'Increase the size to match the broadcasting pattern of `low`'
                'and `high` arguments'
            )
        return  rval


@local_optimizer([CURAND_Base])
def local_destructive(node):
    op = node.op
    if isinstance(op, CURAND_Base) and not op.destructive:
        # op might be gpu version
        new_op = op.as_destructive()
        return new_op.make_node(*node.inputs).outputs
    return False
optdb.register('CURAND_destructive',
        opt.in2out(local_destructive, ignore_newtrees=True), 99, 'fast_run',
                   'inplace')

Esempio n. 26

0

Mostra file

File: opt.py Progetto: benmoran/Theano

                        GpuAdvancedIncSubtensor1,
                        GpuAdvancedIncSubtensor1_dev20)

gpu_optimizer = EquilibriumDB()
gpu_cut_copies = EquilibriumDB()

gpu_seqopt = SequenceDB()

gpu_seqopt.register('gpuarray_local_optimiziations', gpu_optimizer, 1,
                    'fast_compile', 'fast_run', 'inplace', 'gpuarray')
gpu_seqopt.register('gpuarray_cut_transfers', gpu_cut_copies, 2,
                    'fast_compile', 'fast_run', 'gpuarray')

# do not add 'fast_run' to these two as this would always enable gpuarray mode
optdb.register('gpuarray_opt', gpu_seqopt,
               optdb.__position__.get('add_destroy_handler', 49.5) - 1,
               'gpuarray')


def register_opt(*tags, **kwargs):
    def f(local_opt):
        name = (kwargs and kwargs.pop('name')) or local_opt.__name__
        gpu_optimizer.register(name, local_opt, 'fast_run', 'gpuarray', *tags)
        return local_opt
    return f

register_opt('fast_compile')(theano.tensor.opt.local_track_shape_i)

gpu_optimizer.register('local_remove_all_assert',
                       theano.tensor.opt.local_remove_all_assert,
                       'unsafe')

Esempio n. 27

0

Mostra file

File: op.py Progetto: pascal20100/factored_output_layer

            a = LargeSparseTargets(what_to_output=2).make_node(*fnode.inputs)
            f, g = a.outputs

            z = fnode.outputs[0]
            fgraph.replace_validate(z, f, "replace by a cost+grad op")

            for gnode in gnodes:
                z = gnode.outputs[0]
                fgraph.replace_validate(z, g, "replace by a cost+grad op")


mergelst = MergeLargeSparseTargetOps()
#optdb['specialize'].register('merge_large_sparse_target_ops', mergelst, 'fast_run')

optdb.register("global_large_sparse_targets_merge", mergelst, 48.5, "fast_run")


# add CPU TO GPU merge
#@register_specialize
#@local_optimizer([LargeSparseTargets])
def local_large_sparse_targets_gpu(node):
    if not isinstance(node.op,
                      LargeSparseTargets) or theano.config.device == "cpu":
        return False

    if node.op.what_to_output == 0:
        return [GpuLargeSparseTargets(node.op.what_to_output)(*node.inputs)]
    elif node.op.what_to_output == 1:
        return [
            host_from_gpu(

Esempio n. 28

0

Mostra file

                       'fast_compile', 'fast_run')
# Legacy convolution
conv_groupopt.register('local_conv2d_cpu', local_conv2d_cpu, 40,
                       'fast_compile', 'fast_run')
conv_groupopt.register('local_conv2d_gradweight_cpu',
                       local_conv2d_gradweight_cpu, 40, 'fast_compile',
                       'fast_run')
conv_groupopt.register('local_conv2d_gradinputs_cpu',
                       local_conv2d_gradinputs_cpu, 40, 'fast_compile',
                       'fast_run')


# Verify that no AbstractConv are present in the graph
@local_optimizer(
    [AbstractConv2d, AbstractConv2d_gradWeights, AbstractConv2d_gradInputs])
def local_abstractconv_check(node):
    if isinstance(node.op, (AbstractConv2d, AbstractConv2d_gradWeights,
                            AbstractConv2d_gradInputs)):
        raise AssertionError(
            '%s Theano optimization failed: there is no implementation '
            'available supporting the requested options. Did you exclude '
            'both "conv_dnn" and "conv_gemm" from the optimizer? If on GPU, '
            'is cuDNN available and does the GPU support it? If on CPU, '
            'do you have a BLAS library installed Theano can link against?' %
            node.op.__class__.__name__)


optdb.register('AbstracConvCheck',
               opt.in2out(local_abstractconv_check, name="AbstractConvCheck"),
               48.7, 'fast_compile', 'fast_run')

Esempio n. 29

0

Mostra file

File: rng_curand.py Progetto: huamichaelchen/Theano

        """
        if isinstance(size, tuple):
            msg = "size must be a tuple of int or a Theano variable"
            assert all([isinstance(i, int) or isinstance(i, Variable) for i in size]), msg
        else:
            msg = "size must be a tuple of int or a Theano variable"
            assert isinstance(size, Variable) and size.ndim == 1, msg
        generator = theano.shared(False)  # makes a generic
        s_size = theano.tensor.as_tensor_variable(size)
        u = CURAND_Normal.new_auto_update(generator, ndim, dtype, s_size, self.next_seed())
        self.state_updates.append(u.update)
        rval = u * std + avg
        if u.type.broadcastable != rval.type.broadcastable:
            raise NotImplementedError(
                "Increase the size to match the broadcasting pattern of `low`" "and `high` arguments"
            )
        return rval


@local_optimizer([CURAND_Base])
def local_destructive(node):
    op = node.op
    if isinstance(op, CURAND_Base) and not op.destructive:
        # op might be gpu version
        new_op = op.as_destructive()
        return new_op.make_node(*node.inputs).outputs
    return False


optdb.register("CURAND_destructive", opt.in2out(local_destructive, ignore_newtrees=True), 99, "fast_run", "inplace")

Esempio n. 30

0

Mostra file

File: opt.py Progetto: GeorgyKonoplich/vehicle_detection

gpu_cut_copies = EquilibriumDB()

gpu_seqopt = SequenceDB()

# Don't register this right now
conv_groupopt = LocalGroupDB()
conv_groupopt.__name__ = "gpua_conv_opts"

gpu_seqopt.register('gpuarray_local_optimiziations', gpu_optimizer, 1,
                    'fast_compile', 'fast_run', 'gpuarray')
gpu_seqopt.register('gpuarray_cut_transfers', gpu_cut_copies, 2,
                    'fast_compile', 'fast_run', 'gpuarray')

# do not add 'fast_run' to these two as this would always enable gpuarray mode
optdb.register('gpuarray_opt', gpu_seqopt,
               optdb.__position__.get('add_destroy_handler', 49.5) - 1,
               'gpuarray')


def register_opt(*tags, **kwargs):
    def f(local_opt):
        name = (kwargs and kwargs.pop('name')) or local_opt.__name__
        gpu_optimizer.register(name, local_opt, 'fast_run', 'gpuarray', *tags)
        return local_opt
    return f


def register_inplace(*tags, **kwargs):
    def f(local_opt):
        name = (kwargs and kwargs.pop('name')) or local_opt.__name__
        optdb.register(

Esempio n. 31

0

Mostra file

File: opt.py Progetto: DeepLearningIndia/Theano

from basic_ops import host_from_gpu, gpu_from_host, gpu_alloc
from elemwise import GpuElemwise, _is_scalar

gpu_optimizer = EquilibriumDB()
gpu_cut_copies = EquilibriumDB()

gpu_seqopt = SequenceDB()

gpu_seqopt.register('gpuarray_local_optimiziations', gpu_optimizer, 1,
                    'fast_run', 'inplace', 'gpuarray')
gpu_seqopt.register('gpuarray_cut_transfers', gpu_cut_copies, 2,
                    'fast_run', 'gpuarray')

# do not add 'fast_run' to these two as this would always enable gpuarray mode
optdb.register('gpuarray_opt', gpu_seqopt,
               optdb.__position__.get('add_destroy_handler', 49.5) - 1,
               'gpuarray')

def register_opt(*tags, **kwargs):
    def f(local_opt):
        name = (kwargs and kwargs.pop('name')) or local_opt.__name__
        gpu_optimizer.register(name, local_opt, 'fast_run', 'gpuarray', *tags)
        return local_opt
    return f

register_opt()(theano.tensor.opt.local_track_shape_i)

class InputToGpuOptimizer(Optimizer):
    "Transfer the input to the gpu to start the rolling wave."

    def add_requirements(self, fgraph):

Esempio n. 32

0

Mostra file

    op = node.op
    if (isinstance(op, IfElse) and not op.as_view and
            # For big graph, do not make inplace scalar to speed up
            # optimization.
        (len(node.fgraph.apply_nodes) < 500
         or not all([getattr(o.type, "ndim", -1) == 0
                     for o in node.outputs]))):
        return IfElse(n_outs=op.n_outs, as_view=True, gpu=op.gpu,
                      name=op.name)(*node.inputs, **dict(return_list=True))
    return False


optdb.register(
    "cond_make_inplace",
    opt.in2out(cond_make_inplace, ignore_newtrees=True),
    95,
    "fast_run",
    "inplace",
)

# XXX: Optimizations commented pending further debugging (certain optimizations
# make computation less lazy than it should be currently).
#
# ifelse_equilibrium = gof.EquilibriumDB()
# ifelse_seqopt = gof.SequenceDB()
# ifelse_equilibrium.register('seq_ifelse', ifelse_seqopt, 'fast_run',
#                             'ifelse')
""" Comments:
I've wrote this comments to explain how the optimization of ifelse function
(for future developers that need to parse this part of code. Please try to
keep this comments in sync with whatever changes you add to the code.

Esempio n. 33

0

Mostra file

File: BiDirectionalTwoDLSTMOp.py Progetto: papar22/returnn-hmm


BidirectionalTwoDLSTMOpGradNoInplaceInstance = BidirectionalTwoDLSTMOpGrad(
    inplace=False)
BidirectionalTwoDLSTMOpGradInplaceInstance = BidirectionalTwoDLSTMOpGrad(
    inplace=True)

BidirectionalTwoDLSTMOpInplaceOpt = OpSub(
    BidirectionalTwoDLSTMOpGradNoInplaceInstance,
    BidirectionalTwoDLSTMOpGradInplaceInstance)

#hack to avoid being called twice
if not hasattr(optdb, 'BidirectionalTwoDLSTMOpInplaceOpt_registered'):
    optdb.register(
        'BidirectionalTwoDLSTMOpInplaceOpt',
        theano.gof.TopoOptimizer(
            BidirectionalTwoDLSTMOpInplaceOpt,
            failure_callback=gof.TopoOptimizer.warn_inplace), 50.0, 'fast_run',
        'inplace', 'gpuarray')
    optdb.BidirectionalTwoDLSTMOpInplaceOpt_registered = True


class BidirectionalTwoDLSTMOp(theano.sandbox.cuda.GpuOp):
    __props__ = ()

    def __init__(self):
        super(BidirectionalTwoDLSTMOp, self).__init__()

    def make_node(self, X, W1, W2, V_h1, V_h2, V_v1, V_v2, b1, b2, sizes):
        var_names = [
            "X", "W1", "W2", "V_h1", "V_h2", "V_v1", "V_v2", "b1", "b2"
        ]

Esempio n. 34

0

Mostra file

File: scan_opt.py Progetto: xinfanmeng/Theano

    if len(nw_inner) != len(op_ins):
        op_outs = scan_utils.clone(op_outs, replace=givens)
        nw_info = copy.deepcopy(op.info)
        nw_info['n_seqs'] = nw_n_seqs
        # DEBUG CHECK
        nwScan = scan_op.Scan(nw_inner, op_outs, nw_info)
        nw_outs = nwScan.make_node(*nw_outer).outputs
        return nw_outs
    else:
        return False

scan_seqopt = theano.gof.SequenceDB()
# We run before blas opt at 1.7 and specialize 2.0
# but after stabilize at 1.5. Should we put it before stabilize?
optdb.register('scan_seqopt', scan_seqopt, 1.6, 'fast_run', 'scan')
scan_seqopt.register('scanOp_remove_constants_and_unused_inputs',
                     opt.in2out(remove_constants_and_unused_inputs_scan,
                                ignore_newtrees=True),
                     5,
                     'fast_run',
                     'scan')


# This is a global opt for historical reason
# It should be possible to change it to a local opt.
class PushOutNonSeqScan(gof.Optimizer):

    def __init__(self):
        gof.Optimizer.__init__(self)

Esempio n. 35

0

Mostra file

File: scan_opt.py Progetto: delallea/Theano

    if len(nw_inner) != len(op_ins):
        op_outs = scan_utils.clone(op_outs, replace=givens)
        nw_info = op.info.copy()
        nw_info["n_seqs"] = nw_n_seqs
        # DEBUG CHECK
        nwScan = scan_op.Scan(nw_inner, op_outs, nw_info)
        nw_outs = nwScan.make_node(*nw_outer).outputs
        return nw_outs
    else:
        return False


scan_seqopt = theano.gof.SequenceDB()
# We run before blas opt at 1.7 and specialize 2.0
# but after stabilize at 1.5. Should we put it before stabilize?
optdb.register("scan_seqopt", scan_seqopt, 1.6, "fast_run", "scan")
scan_seqopt.register(
    "scanOp_remove_constants_and_unused_inputs",
    opt.in2out(remove_constants_and_unused_inputs_scan, ignore_newtrees=True),
    5,
    "fast_run",
    "scan",
)


# This is a global opt for historical reason
# It should be possible to change it to a local opt.
class PushOutNonSeqScan(gof.Optimizer):
    def __init__(self):
        gof.Optimizer.__init__(self)

Esempio n. 36

0

Mostra file

File: scan_opt.py Progetto: HaniAlmousli/Theano

    if len(nw_inner) != len(op_ins):
        op_outs = scan_utils.clone(op_outs, replace=givens)
        nw_info = op.info.copy()
        nw_info['n_seqs'] = nw_n_seqs
        # DEBUG CHECK
        nwScan = scan_op.Scan(nw_inner, op_outs, nw_info)
        nw_outs = nwScan.make_node(*nw_outer).outputs
        return nw_outs
    else:
        return False

scan_seqopt = theano.gof.SequenceDB()
# We run before blas opt at 1.7 and specialize 2.0
# but after stabilize at 1.5. Should we put it before stabilize?
optdb.register('scan_seqopt', scan_seqopt, 1.6, 'fast_run', 'scan')
scan_seqopt.register('scanOp_remove_constants_and_unused_inputs',
                     opt.in2out(remove_constants_and_unused_inputs_scan,
                                ignore_newtrees=True),
                     5,
                     'fast_run',
                     'scan')


# This is a global opt for historical reason
# It should be possible to change it to a local opt.
class PushOutNonSeqScan(gof.Optimizer):

    def __init__(self):
        gof.Optimizer.__init__(self)

Esempio n. 37

0

Mostra file

File: opt.py Progetto: csxlyan/Theano

from theano.sandbox.gpuarray.subtensor import GpuSubtensor
from theano.sandbox.gpuarray.blas import GpuGemv, GpuGemm

gpu_optimizer = EquilibriumDB()
gpu_cut_copies = EquilibriumDB()

gpu_seqopt = SequenceDB()

gpu_seqopt.register('gpuarray_local_optimiziations', gpu_optimizer, 1,
                    'fast_run', 'inplace', 'gpuarray')
gpu_seqopt.register('gpuarray_cut_transfers', gpu_cut_copies, 2, 'fast_run',
                    'gpuarray')

# do not add 'fast_run' to these two as this would always enable gpuarray mode
optdb.register('gpuarray_opt', gpu_seqopt,
               optdb.__position__.get('add_destroy_handler', 49.5) - 1,
               'gpuarray')


def register_opt(*tags, **kwargs):
    def f(local_opt):
        name = (kwargs and kwargs.pop('name')) or local_opt.__name__
        gpu_optimizer.register(name, local_opt, 'fast_run', 'gpuarray', *tags)
        return local_opt

    return f


register_opt()(theano.tensor.opt.local_track_shape_i)

Esempio n. 38

0

Mostra file

File: scan_opt.py Progetto: shawakaze/Theano

    if len(nw_inner) != len(op_ins):
        op_outs = scan_utils.clone(op_outs, replace=givens)
        nw_info = copy.deepcopy(op.info)
        nw_info["n_seqs"] = nw_n_seqs
        # DEBUG CHECK
        nwScan = scan_op.Scan(nw_inner, op_outs, nw_info)
        nw_outs = nwScan.make_node(*nw_outer).outputs
        return nw_outs
    else:
        return False


scan_seqopt = theano.gof.SequenceDB()
# We run before blas opt at 1.7 and specialize 2.0
# but after stabilize at 1.5. Should we put it before stabilize?
optdb.register("scan_seqopt", scan_seqopt, 1.6, "fast_run", "scan")
scan_seqopt.register(
    "scanOp_remove_constants_and_unused_inputs",
    opt.in2out(remove_constants_and_unused_inputs_scan, ignore_newtrees=True),
    5,
    "fast_run",
    "scan",
)


# This is a global opt for historical reason
# It should be possible to change it to a local opt.
class PushOutNonSeqScan(gof.Optimizer):
    def __init__(self):
        gof.Optimizer.__init__(self)

Esempio n. 39

0

Mostra file

File: PoolHWBCOp.py Progetto: papar22/returnn-hmm

                                                                        0, 1):
                X = node.inputs[0]
                if hasattr(X.owner, "op") and type(
                        X.owner.op) == PoolHWBCOpGrad and X.owner.op.inplace:
                    fgraph.replace_validate(node.outputs[0], node.inputs[0])
                    replace_op = PoolHWBCOpGrad(X.owner.op.pool_shape,
                                                inplace=True,
                                                BCHW_grad_output=True)
                    fgraph.replace_validate(X.owner.outputs[0],
                                            replace_op(*X.owner.inputs))


RemoveConvGradDimshuffleOptimizer = RemoveConvGradDimshuffle()
if not hasattr(optdb, 'RemoveConvGradDimshuffleOptimizer_registered'):
    optdb.register('RemoveConvGradDimshuffle',
                   RemoveConvGradDimshuffleOptimizer, 50.5, 'fast_run',
                   'inplace', 'gpuarray')
    optdb.RemoveConvGradDimshuffleOptimizer_registered = True

#---------------------------


#for the moment we implement only ignore_border = True and no padding
class PoolHWBCOp(theano.sandbox.cuda.GpuOp):
    __props__ = ("pool_shape", )

    def __init__(self, pool_shape):
        pool_shape = tuple(pool_shape)
        super(PoolHWBCOp, self).__init__()
        assert len(pool_shape) == 2, len(pool_shape)
        assert pool_shape[0] > 0, pool_shape[0]

Esempio n. 40

0

Mostra file

File: opt.py Progetto: kevinbache/Theano

                                               GpuAdvancedIncSubtensor1_dev20)
from theano.sandbox.gpuarray.type import GpuArrayConstant

gpu_optimizer = EquilibriumDB()
gpu_cut_copies = EquilibriumDB()

gpu_seqopt = SequenceDB()

gpu_seqopt.register('gpuarray_local_optimiziations', gpu_optimizer, 1,
                    'fast_compile', 'fast_run', 'inplace', 'gpuarray')
gpu_seqopt.register('gpuarray_cut_transfers', gpu_cut_copies, 2,
                    'fast_compile', 'fast_run', 'gpuarray')

# do not add 'fast_run' to these two as this would always enable gpuarray mode
optdb.register('gpuarray_opt', gpu_seqopt,
               optdb.__position__.get('add_destroy_handler', 49.5) - 1,
               'gpuarray')


def register_opt(*tags, **kwargs):
    def f(local_opt):
        name = (kwargs and kwargs.pop('name')) or local_opt.__name__
        gpu_optimizer.register(name, local_opt, 'fast_run', 'gpuarray', *tags)
        return local_opt
    return f

register_opt('fast_compile')(theano.tensor.opt.local_track_shape_i)


def safe_to_gpu(x):
    if isinstance(x.type, tensor.TensorType):

Esempio n. 41

0

Mostra file

def inline_ofg_expansion(node):
    """
    This optimization expands internal graph of OpFromGraph.
    Only performed if node.op.is_inline == True
    Doing so can improve optimization at the cost of compilation speed.
    """
    op = node.op
    if not isinstance(op, OpFromGraph):
        return False
    if not op.is_inline:
        return False
    return theano.clone(
        op.local_outputs,
        {u: v
         for u, v in zip(node.op.local_inputs, node.inputs)})


# We want to run this before the first merge optimizer
# and before the first scan optimizer.
optdb.register(
    "inline_ofg_expansion",
    gof.opt.in2out(inline_ofg_expansion),
    -0.01,
    "fast_compile",
    "fast_run",
)

# Since OpFromGraph contains a Theano compiled function,
# we should let DebugMode know about it
ops_with_inner_function[OpFromGraph] = "fn"

Esempio n. 42

0

Mostra file

File: opt.py Progetto: nitish-awasthi/Theano-PyMC

])
def local_abstractconv_check(node):
    if isinstance(
            node.op,
        (
            AbstractConv2d,
            AbstractConv2d_gradWeights,
            AbstractConv2d_gradInputs,
            AbstractConv3d,
            AbstractConv3d_gradWeights,
            AbstractConv3d_gradInputs,
        ),
    ):
        raise LocalMetaOptimizerSkipAssertionError(
            "%s Theano optimization failed: there is no implementation "
            "available supporting the requested options. Did you exclude "
            'both "conv_dnn" and "conv_gemm" from the optimizer? If on GPU, '
            "is cuDNN available and does the GPU support it? If on CPU, "
            "do you have a BLAS library installed Theano can link against? "
            "On the CPU we do not support float16." %
            node.op.__class__.__name__)


optdb.register(
    "AbstractConvCheck",
    in2out(local_abstractconv_check, name="AbstractConvCheck"),
    48.7,
    "fast_compile",
    "fast_run",
)

Esempio n. 43

0

Mostra file

@inplace_allocempty(GpuDnnConvGradI, 2)
def local_dnn_convgi_inplace(node, inputs):
    return [
        GpuDnnConvGradI(algo=node.op.algo,
                        inplace=True,
                        num_groups=node.op.num_groups)(*inputs)
    ]


optdb.register(
    "local_dnna_conv_inplace",
    theano.tensor.opt.in2out(
        local_dnn_conv_inplace,
        local_dnn_convgw_inplace,
        local_dnn_convgi_inplace,
        name="local_dnna_conv_inplace",
    ),
    70.0,
    "fast_run",
    "inplace",
    "gpuarray",
    "cudnn",
)


@register_opt("cudnn")
@alpha_merge(GpuDnnConv, alpha_in=4, beta_in=5)
def local_dnn_conv_alpha_merge(node, *inputs):
    return [
        GpuDnnConv(algo=node.op.algo, num_groups=node.op.num_groups)(*inputs)
    ]

Esempio n. 44

0

Mostra file

File: nerv.py Progetto: xzm2004260/Theano


@opt.register_opt()
@alpha_merge(Gemm16, alpha_in=1, beta_in=4)
def local_gemm16_alpha_merge(node, *inputs):
    return [Gemm16(relu=node.op.relu)(*inputs)]


@opt.register_opt()
@output_merge(Gemm16, alpha_in=1, beta_in=4, out_in=0)
def local_gemm16_output_merge(node, *inputs):
    return [Gemm16(relu=node.op.relu)(*inputs)]


@local_optimizer([Gemm16], inplace=True)
def local_gemm16_inplace(node):
    if type(node.op) != Gemm16 or node.op.inplace:
        return
    inputs = list(node.inputs)
    C = inputs[0]
    if (C.owner and isinstance(C.owner.op, GpuAllocEmpty)
            and len(C.clients) > 1):
        inputs[0] = C.owner.op(*C.owner.inputs)
    return [Gemm16(relu=node.op.relu, inplace=True)(*inputs)]


optdb.register(
    'local_gemm16_inplace',
    tensor.opt.in2out(local_gemm16_inplace, name='local_gemm16_inplace'), 70.0,
    'fast_run', 'inplace', 'gpuarray')

Esempio n. 45

0

Mostra file

File: dnn.py Progetto: hfinger/Theano

@local_optimizer([GpuDnnConvGradI], inplace=True)
def local_dnn_convgi_inplace(node):
    if type(node.op) != GpuDnnConvGradI or node.op.inplace:
        return
    inputs = list(node.inputs)
    dest = inputs[2]
    if (dest.owner and
            isinstance(dest.owner.op, GpuAllocEmpty) and
            len(dest.clients) > 1):
        inputs[2] = GpuAllocEmpty(dest.owner.op.dtype)(*dest.owner.inputs)
    return [GpuDnnConvGradI(algo=node.op.algo, inplace=True)(*inputs)]

optdb.register('local_dnna_conv_inplace',
               tensor.opt.in2out(local_dnn_conv_inplace,
                                 local_dnn_convgw_inplace,
                                 local_dnn_convgi_inplace,
                                 name="local_dnn_conv_inplace"),
               70.0, 'fast_run', 'inplace', 'gpuarray', 'cudnn')


@register_opt('cudnn')
@alpha_merge(GpuDnnConv, alpha_in=4, beta_in=5, nd=4)
def local_dnn_conv_alpha_merge(node, *inputs):
    return [GpuDnnConv(algo=node.op.algo)(*inputs)]


@register_opt('cudnn')
@alpha_merge(GpuDnnConvGradW, alpha_in=4, beta_in=5, nd=4)
def local_dnn_convw_alpha_merge(node, *inputs):
    return [GpuDnnConvGradW(algo=node.op.algo)(*inputs)]

Esempio n. 46

0

Mostra file

File: MultiDirectionalTwoDLSTMOp.py Progetto: chagge/returnn

    """ % locals()

  #!!! change this when changing the code!
  #def c_code_cache_version(self):
  #  return 2, 7

MultiDirectionalTwoDLSTMOpGradNoInplaceInstance = MultiDirectionalTwoDLSTMOpGrad(inplace=False)
MultiDirectionalTwoDLSTMOpGradInplaceInstance = MultiDirectionalTwoDLSTMOpGrad(inplace=True)

MultiDirectionalTwoDLSTMOpInplaceOpt = OpSub(MultiDirectionalTwoDLSTMOpGradNoInplaceInstance,
                                             MultiDirectionalTwoDLSTMOpGradInplaceInstance)

#hack to avoid being called twice
if not hasattr(optdb, 'MultiDirectionalTwoDLSTMOpInplaceOpt_registered'):
  optdb.register('MultiDirectionalTwoDLSTMOpInplaceOpt',
                 theano.gof.TopoOptimizer(MultiDirectionalTwoDLSTMOpInplaceOpt, failure_callback=gof.TopoOptimizer.warn_inplace),
                 50.0, 'fast_run', 'inplace', 'gpuarray')
  optdb.MultiDirectionalTwoDLSTMOpInplaceOpt_registered = True


class MultiDirectionalTwoDLSTMOp(theano.sandbox.cuda.GpuOp):
  __props__ = ()

  def __init__(self):
    super(MultiDirectionalTwoDLSTMOp, self).__init__()

  def make_node(self, X, W1, W2, W3, W4, V_h1, V_h2, V_h3, V_h4, V_v1, V_v2, V_v3, V_v4, b1, b2, b3, b4, sizes):
    var_names = ["X", "W1", "W2", "W3", "W4", "V_h1", "V_h2", "V_h3", "V_h4",
                 "V_v1", "V_v2", "V_v3", "V_v4", "b1", "b2", "b3", "b4"]
    lcl = locals()
    for var_name in var_names:

Esempio n. 47

0

Mostra file

File: ifelse.py Progetto: DeepLearningIndia/Theano

    else:
        return tuple(rval)


@gof.local_optimizer([None])
def cond_make_inplace(node):
    op = node.op
    if isinstance(op, IfElse) and not op.as_view:
        return IfElse(n_outs=op.n_outs,
                      as_view=True,
                      gpu=op.gpu,
                      name=op.name)(*node.inputs, **dict(return_list=True))
    return False


optdb.register('cond_make_inplace', opt.in2out(cond_make_inplace,
    ignore_newtrees=True), 95, 'fast_run', 'inplace')

# XXX: Optimizations commented pending further debugging (certain optimizations
# make computation less lazy than it should be currently).
#
# ifelse_equilibrium = gof.EquilibriumDB()
# ifelse_seqopt = gof.SequenceDB()
# ifelse_equilibrium.register('seq_ifelse', ifelse_seqopt, 'fast_run',
#                             'ifelse')
''' Comments:
I've wrote this comments to explain how the optimization of ifelse function
(for future developers that need to parse this part of code. Please try to
keep this comments in sync with whatever changes you add to the code.

ifelse optimization are registered before canonicalize !

Esempio n. 48

0

Mostra file

File: ifelse.py Progetto: Roger-Chuh/depth_video

        return list(rval)
    else:
        return tuple(rval)


@gof.local_optimizer([IfElse])
def cond_make_inplace(node):
    op = node.op
    if isinstance(op, IfElse) and not op.as_view:
        return IfElse(n_outs=op.n_outs, as_view=True, gpu=op.gpu,
                      name=op.name)(*node.inputs, **dict(return_list=True))
    return False


optdb.register('cond_make_inplace',
               opt.in2out(cond_make_inplace, ignore_newtrees=True), 95,
               'fast_run', 'inplace')

# XXX: Optimizations commented pending further debugging (certain optimizations
# make computation less lazy than it should be currently).
#
# ifelse_equilibrium = gof.EquilibriumDB()
# ifelse_seqopt = gof.SequenceDB()
# ifelse_equilibrium.register('seq_ifelse', ifelse_seqopt, 'fast_run',
#                             'ifelse')
''' Comments:
I've wrote this comments to explain how the optimization of ifelse function
(for future developers that need to parse this part of code. Please try to
keep this comments in sync with whatever changes you add to the code.

ifelse optimization are registered before canonicalize !

Esempio n. 49

0

Mostra file

File: op.py Progetto: adbrebs/factored_output_layer

            a = LargeSparseTargets(what_to_output=2).make_node(*fnode.inputs)
            f, g = a.outputs

            z = fnode.outputs[0]
            fgraph.replace_validate(z, f, "replace by a cost+grad op")

            for gnode in gnodes:
                z = gnode.outputs[0]
                fgraph.replace_validate(z, g, "replace by a cost+grad op")


mergelst = MergeLargeSparseTargetOps()
#optdb['specialize'].register('merge_large_sparse_target_ops', mergelst, 'fast_run')

optdb.register("global_large_sparse_targets_merge", mergelst, 48.5, "fast_run")

# add CPU TO GPU merge
#@register_specialize
#@local_optimizer([LargeSparseTargets])
def local_large_sparse_targets_gpu(node):
    if not isinstance(node.op, LargeSparseTargets) or theano.config.device == "cpu":
        return False

    if node.op.what_to_output == 0:
        return [GpuLargeSparseTargets(node.op.what_to_output)(*node.inputs)]
    elif node.op.what_to_output == 1:
        return [host_from_gpu(GpuLargeSparseTargets(node.op.what_to_output)(*node.inputs))]
    else:
        out = GpuLargeSparseTargets(node.op.what_to_output)(*node.inputs)
        return [out[0], host_from_gpu(out[1])]

Esempio n. 50

0

Mostra file

gpu_cut_copies = EquilibriumDB()

gpu_seqopt = SequenceDB()

# Don't register this right now
conv_groupopt = LocalGroupDB()
conv_groupopt.__name__ = "gpua_conv_opts"

gpu_seqopt.register('gpuarray_local_optimiziations', gpu_optimizer, 1,
                    'fast_compile', 'fast_run', 'gpuarray')
gpu_seqopt.register('gpuarray_cut_transfers', gpu_cut_copies, 2,
                    'fast_compile', 'fast_run', 'gpuarray')

# do not add 'fast_run' to these two as this would always enable gpuarray mode
optdb.register('gpuarray_opt', gpu_seqopt,
               optdb.__position__.get('add_destroy_handler', 49.5) - 1,
               'gpuarray')


def register_opt(*tags, **kwargs):
    def f(local_opt):
        name = (kwargs and kwargs.pop('name')) or local_opt.__name__
        gpu_optimizer.register(name, local_opt, 'fast_run', 'gpuarray', *tags)
        return local_opt

    return f


def register_inplace(*tags, **kwargs):
    def f(local_opt):
        name = (kwargs and kwargs.pop('name')) or local_opt.__name__

Esempio n. 51

0

Mostra file

File: scan_opt.py Progetto: jsalvatier/Theano-1

    if len(nw_inner) != len(op_ins):
        op_outs = scan_utils.clone(op_outs, replace=givens)
        nw_info = copy.deepcopy(op.info)
        nw_info['n_seqs'] = nw_n_seqs
        # DEBUG CHECK
        nwScan = scan_op.Scan(nw_inner, op_outs, nw_info)
        nw_outs = nwScan.make_node(*nw_outer).outputs
        return nw_outs
    else:
        return False

scan_seqopt = theano.gof.SequenceDB()
# We run before blas opt at 1.7 and specialize 2.0
# but after stabilize at 1.5. Should we put it before stabilize?
optdb.register('scan_seqopt', scan_seqopt, 1.6, 'fast_run', 'scan')
scan_seqopt.register('scanOp_remove_constants_and_unused_inputs',
                     opt.in2out(remove_constants_and_unused_inputs_scan,
                                ignore_newtrees=True),
                     5,
                     'fast_run',
                     'scan')


# This is a global opt for historical reason
# It should be possible to change it to a local opt.
class PushOutNonSeqScan(gof.Optimizer):

    def __init__(self):
        gof.Optimizer.__init__(self)

Esempio n. 52

0

Mostra file

File: raw_random.py Progetto: adrn/Theano-PyMC

    if isinstance(op, RandomFunction) and not op.inplace:
        # Read op_fn from op.state, not from op.fn, since op.fn
        # may not be picklable.
        op_fn, op_outtype, op_inplace, op_ndim_added = op._props()
        new_op = RandomFunction(op_fn,
                                op_outtype,
                                inplace=True,
                                ndim_added=op_ndim_added)
        return new_op.make_node(*node.inputs).outputs
    return False


optdb.register(
    "random_make_inplace",
    opt.in2out(random_make_inplace, ignore_newtrees=True),
    99,
    "fast_run",
    "inplace",
)


class RandomStreamsBase(object):
    def binomial(self,
                 size=None,
                 n=1,
                 p=0.5,
                 ndim=None,
                 dtype="int64",
                 prob=None):
        """
        Sample n times with probability of success p for each trial and

Esempio n. 53

0

Mostra file

File: blstm.py Progetto: papar22/returnn-hmm

    #!!! change this when changing the code!
    def c_code_cache_version(self):
        return 1, 7


BLSTMOpGradNoInplaceInstance = BLSTMOpGrad(inplace=False)
BLSTMOpGradInplaceInstance = BLSTMOpGrad(inplace=True)

BLSTMOpGradInplaceOpt = OpSub(BLSTMOpGradNoInplaceInstance,
                              BLSTMOpGradInplaceInstance)

#hack to avoid being called twice
if not hasattr(optdb, 'BLSTMOpGradInplaceOpt_registered'):
    optdb.register('BLSTMOpGradInplaceOpt',
                   theano.gof.TopoOptimizer(BLSTMOpGradInplaceOpt), 50.0,
                   'fast_run', 'inplace', 'gpuarray')
    optdb.BLSTMOpGradInplaceOpt_registered = True

#------------------------


class BLSTMOp(theano.sandbox.cuda.GpuOp):
    def __init__(self, inplace):
        self.inplace = inplace
        if inplace:
            #all outputs operate inplace on input 0 (which is Z)
            #but when the input is marked multiple times, we get an error
            #so we only mark that output 0 destroys input 0
            #anyway theano knows that input 0 will be destroyed, so it should be OK
            #TODO

Esempio n. 54

0

Mostra file

File: opt.py Progetto: SuperElectric/Theano

from theano.sandbox.gpuarray.subtensor import GpuIncSubtensor, GpuSubtensor
from theano.sandbox.gpuarray.type import GpuArrayConstant

gpu_optimizer = EquilibriumDB()
gpu_cut_copies = EquilibriumDB()

gpu_seqopt = SequenceDB()

gpu_seqopt.register('gpuarray_local_optimiziations', gpu_optimizer, 1,
                    'fast_run', 'inplace', 'gpuarray')
gpu_seqopt.register('gpuarray_cut_transfers', gpu_cut_copies, 2,
                    'fast_run', 'gpuarray')

# do not add 'fast_run' to these two as this would always enable gpuarray mode
optdb.register('gpuarray_opt', gpu_seqopt,
               optdb.__position__.get('add_destroy_handler', 49.5) - 1,
               'gpuarray')


def register_opt(*tags, **kwargs):
    def f(local_opt):
        name = (kwargs and kwargs.pop('name')) or local_opt.__name__
        gpu_optimizer.register(name, local_opt, 'fast_run', 'gpuarray', *tags)
        return local_opt
    return f

register_opt()(theano.tensor.opt.local_track_shape_i)


def op_lifter(OP):
    """

Esempio n. 55

0

Mostra file

File: builders.py Progetto: EugenePY/Theano

            output[0] = variable.copy()


@gof.local_optimizer([OpFromGraph])
def inline_ofg_expansion(node):
    """
    This optimization expands internal graph of OpFromGraph.
    Only performed if node.op.is_inline == True
    Doing so can improve optimization at the cost of compilation speed.
    """
    op = node.op
    if not isinstance(op, OpFromGraph):
        return False
    if not op.is_inline:
        return False
    return theano.clone(
        op.local_outputs, {
            u: v for u, v in izip(
                node.op.local_inputs, node.inputs)})

# We want to run this before the first merge optimizer
# and before the first scan optimizer.
optdb.register(
    'inline_ofg_expansion',
    gof.opt.in2out(inline_ofg_expansion),
    -0.01, 'fast_compile', 'fast_run')

# Since OpFromGraph contains a Theano compiled function,
# we should let DebugMode know about it
ops_with_inner_function[OpFromGraph] = 'fn'

Esempio n. 56

0

Mostra file

File: blas.py Progetto: MatthieuCourbariaux/Theano

from theano.compile import optdb
from theano.gof import local_optimizer, LocalOptGroup
from theano.tensor.opt import in2out


@local_optimizer([gpugemv_no_inplace], inplace=True)
def local_inplace_gpuagemv(node):
    if node.op == gpugemv_no_inplace:
        return [gpugemv_inplace(*node.inputs)]


@local_optimizer([gpugemm_no_inplace], inplace=True)
def local_inplace_gpuagemm(node):
    if node.op == gpugemm_no_inplace:
        return [gpugemm_inplace(*node.inputs)]


@local_optimizer([gpuger_no_inplace], inplace=True)
def local_inplace_gpuager(node):
    if node.op == gpuger_no_inplace:
        return [gpuger_inplace(*node.inputs)]


gpuablas_opt_inplace = in2out(LocalOptGroup(local_inplace_gpuagemv,
                                            local_inplace_gpuagemm,
                                            local_inplace_gpuager),
                              name='gpuablas_opt_inplace')
optdb.register('InplaceGpuaBlasOpt', gpuablas_opt_inplace, 70.0, 'fast_run',
               'inplace', 'gpuarray')

Esempio n. 57

0

Mostra file

File: opt.py Progetto: herr-biber/Theano

    GpuSoftmax,
)
from theano.sandbox.gpuarray.elemwise import GpuElemwise, _is_scalar, GpuDimShuffle, GpuCAReduceCuda
from theano.sandbox.gpuarray.subtensor import GpuIncSubtensor, GpuSubtensor
from theano.sandbox.gpuarray.type import GpuArrayConstant

gpu_optimizer = EquilibriumDB()
gpu_cut_copies = EquilibriumDB()

gpu_seqopt = SequenceDB()

gpu_seqopt.register("gpuarray_local_optimiziations", gpu_optimizer, 1, "fast_run", "inplace", "gpuarray")
gpu_seqopt.register("gpuarray_cut_transfers", gpu_cut_copies, 2, "fast_run", "gpuarray")

# do not add 'fast_run' to these two as this would always enable gpuarray mode
optdb.register("gpuarray_opt", gpu_seqopt, optdb.__position__.get("add_destroy_handler", 49.5) - 1, "gpuarray")


def register_opt(*tags, **kwargs):
    def f(local_opt):
        name = (kwargs and kwargs.pop("name")) or local_opt.__name__
        gpu_optimizer.register(name, local_opt, "fast_run", "gpuarray", *tags)
        return local_opt

    return f


register_opt()(theano.tensor.opt.local_track_shape_i)


def op_lifter(OP):

Esempio n. 58

0

Mostra file

File: opt.py Progetto: rollingstone/Theano

_logger = logging.getLogger("theano.sandbox.gpuarray.opt")

gpu_optimizer = EquilibriumDB()
gpu_cut_copies = EquilibriumDB()

gpu_seqopt = SequenceDB()

# Don't register this right now
conv_groupopt = LocalGroupDB()
conv_groupopt.__name__ = "gpua_conv_opts"

gpu_seqopt.register("gpuarray_local_optimiziations", gpu_optimizer, 1, "fast_compile", "fast_run", "gpuarray")
gpu_seqopt.register("gpuarray_cut_transfers", gpu_cut_copies, 2, "fast_compile", "fast_run", "gpuarray")

# do not add 'fast_run' to these two as this would always enable gpuarray mode
optdb.register("gpuarray_opt", gpu_seqopt, optdb.__position__.get("add_destroy_handler", 49.5) - 1, "gpuarray")


def register_opt(*tags, **kwargs):
    def f(local_opt):
        name = (kwargs and kwargs.pop("name")) or local_opt.__name__
        gpu_optimizer.register(name, local_opt, "fast_run", "gpuarray", *tags)
        return local_opt

    return f


register_opt("fast_compile")(theano.tensor.opt.local_track_shape_i)

gpu_optimizer.register("local_remove_all_assert", theano.tensor.opt.local_remove_all_assert, "unsafe")

Esempio n. 59

0

Mostra file

File: rng_mrg.py Progetto: Tanjay94/Theano

        return final_samples


from theano.sandbox.gpuarray.opt import register_opt as register_gpua, host_from_gpu as host_from_gpua


@register_gpua()
@local_optimizer([mrg_uniform])
def local_gpua_mrg(node):
    if type(node.op) == mrg_uniform and isinstance(node.inputs[0].type, GpuArrayType):
        outs = GPUA_mrg_uniform.new(node.inputs[0], node.op.output_type.ndim, node.op.output_type.dtype, node.inputs[1])
        return [outs[0], host_from_gpua(outs[1])]


MRG_RNGs = (mrg_uniform, GPU_mrg_uniform, GPUA_mrg_uniform)


@local_optimizer(MRG_RNGs)
def mrg_random_make_inplace(node):
    op = node.op
    if isinstance(op, MRG_RNGs) and not op.inplace:
        # op might be gpu version
        new_op = op.__class__(op.output_type, inplace=True)
        return new_op.make_node(*node.inputs).outputs
    return False


optdb.register(
    "random_make_inplace_mrg", opt.in2out(mrg_random_make_inplace, ignore_newtrees=True), 99, "fast_run", "inplace"
)