@gof.local_optimizer([SparseBlockGemv], inplace=True) def local_inplace_sparse_block_gemv(node): """ SparseBlockGemv(inplace=False) -> SparseBlockGemv(inplace=True) """ if isinstance(node.op, SparseBlockGemv) and not node.op.inplace: new_node = sparse_block_gemv_inplace(*node.inputs) return [new_node] return False compile.optdb.register('local_inplace_sparse_block_gemv', gof.TopoOptimizer( local_inplace_sparse_block_gemv, failure_callback=gof.TopoOptimizer.warn_inplace), 60, 'fast_run', 'inplace') # DEBUG @gof.local_optimizer([SparseBlockOuter], inplace=True) def local_inplace_sparse_block_outer(node): """ SparseBlockOuter(inplace=False) -> SparseBlockOuter(inplace=True) """ if isinstance(node.op, SparseBlockOuter) and not node.op.inplace: new_node = sparse_block_outer_inplace(*node.inputs) return [new_node] return False
def test_extending_2(self): ''' This test fails in DebugMode for the same reasons the test in tensor/tests/test_basic.py:T_scalarfromtensor.test0 fails on debug mode ( as much as I could tell - Razvan ) ''' from theano import gof class Double(gof.Type): def filter(self, x, strict=False, allow_downcast=None): if strict and not isinstance(x, float): raise TypeError('Expected a float!') return float(x) def values_eq_approx(self, x, y, tolerance=1e-4): return abs(x - y) / (abs(x) + abs(y)) < tolerance def __str__(self): return "double" double = Double() class BinaryDoubleOp(gof.Op): def __init__(self, name, fn): self.name = name self.fn = fn def __eq__(self, other): return type(self) == type(other) and ( self.name == other.name) and (self.fn == other.fn) def __hash__(self): return hash(type(self)) ^ hash(self.name) ^ hash(self.fn) def make_node(self, x, y): if isinstance(x, (int, float)): x = gof.Constant(double, x) if isinstance(y, (int, float)): y = gof.Constant(double, y) if x.type != double or y.type != double: raise TypeError('%s only works on doubles' % self.name) return gof.Apply(self, [x, y], [double()]) def perform(self, node, inp, out): x, y = inp z, = out z[0] = self.fn(x, y) def __str__(self): return self.name add = BinaryDoubleOp(name='add', fn=lambda x, y: x + y) sub = BinaryDoubleOp(name='sub', fn=lambda x, y: x - y) mul = BinaryDoubleOp(name='mul', fn=lambda x, y: x * y) div = BinaryDoubleOp(name='div', fn=lambda x, y: x / y) def c_declare(name, sub): return """ double %(name)s; """ % dict(name=name) double.c_declare = c_declare def c_init(name, sub): return """ %(name)s = 0.0; """ % dict(name=name) double.c_init = c_init def c_extract(name, sub): return """ if (!PyFloat_Check(py_%(name)s)) { PyErr_SetString(PyExc_TypeError, "expected a float"); %(fail)s } %(name)s = PyFloat_AsDouble(py_%(name)s); """ % dict(name=name, fail=sub['fail']) double.c_extract = c_extract def c_sync(name, sub): return """ Py_XDECREF(py_%(name)s); py_%(name)s = PyFloat_FromDouble(%(name)s); if (!py_%(name)s) { printf("PyFloat_FromDouble failed on: %%f\\n", %(name)s); Py_XINCREF(Py_None); py_%(name)s = Py_None; } """ % dict(name=name) double.c_sync = c_sync def c_cleanup(name, sub): return "" double.c_cleanup = c_cleanup from theano import function x, y, z = double('x'), double('y'), double('z') a = add(x, y) b = mul(a, z) f = function([x, y, z], b) assert f(1.0, 2.0, 3.0) == 9.0 from theano import gof class Double(gof.Type): def filter(self, x, strict=False, allow_downcast=None): if strict and not isinstance(x, float): raise TypeError('Expected a float!') return float(x) def values_eq_approx(self, x, y, tolerance=1e-4): return abs(x - y) / (x + y) < tolerance def __str__(self): return "double" def c_declare(self, name, sub): return """ double %(name)s; """ % dict(name=name) def c_init(self, name, sub): return """ %(name)s = 0.0; """ % dict(name=name) def c_extract(self, name, sub): return """ if (!PyFloat_Check(py_%(name)s)) { PyErr_SetString(PyExc_TypeError, "expected a float"); %(fail)s } %(name)s = PyFloat_AsDouble(py_%(name)s); """ % dict(sub, name=name) def c_sync(self, name, sub): return """ Py_XDECREF(py_%(name)s); py_%(name)s = PyFloat_FromDouble(%(name)s); if (!py_%(name)s) { printf("PyFloat_FromDouble failed on: %%f\\n", %(name)s); Py_XINCREF(Py_None); py_%(name)s = Py_None; } """ % dict(name=name) def c_cleanup(self, name, sub): return "" double = Double() def c_code(node, name, input_names, output_names, sub): x_name, y_name = input_names[0], input_names[1] output_name = output_names[0] return """ %(output_name)s = %(x_name)s * %(y_name)s; """ % locals() mul.c_code = c_code from theano import gof class BinaryDoubleOp(gof.Op): def __init__(self, name, fn, ccode): self.name = name self.fn = fn self.ccode = ccode def make_node(self, x, y): if isinstance(x, (int, float)): x = gof.Constant(double, x) if isinstance(y, (int, float)): y = gof.Constant(double, y) if x.type != double or y.type != double: raise TypeError('%s only works on doubles' % self.name) return gof.Apply(self, [x, y], [double()]) def perform(self, node, inp, out): x, y = inp z, = out z[0] = self.fn(x, y) def __str__(self): return self.name def c_code(self, node, name, inp, out, sub): x, y = inp z, = out return self.ccode % locals() add = BinaryDoubleOp(name='add', fn=lambda x, y: x + y, ccode="%(z)s = %(x)s + %(y)s;") sub = BinaryDoubleOp(name='sub', fn=lambda x, y: x - y, ccode="%(z)s = %(x)s - %(y)s;") mul = BinaryDoubleOp(name='mul', fn=lambda x, y: x * y, ccode="%(z)s = %(x)s * %(y)s;") div = BinaryDoubleOp(name='div', fn=lambda x, y: x / y, ccode="%(z)s = %(x)s / %(y)s;") from theano.gof import toolbox class Simplify(gof.Optimizer): def add_requirements(self, env): env.extend(toolbox.ReplaceValidate()) def apply(self, env): for node in env.toposort(): if node.op == div: x, y = node.inputs z = node.outputs[0] if x.owner and x.owner.op == mul: a, b = x.owner.inputs if y == a: env.replace_validate(z, b) elif y == b: env.replace_validate(z, a) simplify = Simplify() x = double('x') y = double('y') z = double('z') a = add(z, mul(div(mul(y, x), y), div(z, x))) e = gof.Env([x, y, z], [a]) simplify.optimize(e) class LocalSimplify(gof.LocalOptimizer): def transform(self, node): if node.op == div: x, y = node.inputs if x.owner and x.owner.op == mul: a, b = x.owner.inputs if y == a: return [b] elif y == b: return [a] return False def tracks(self): # This should be needed for the EquilibriumOptimizer # but it isn't now # TODO: do this and explain it return [] # that's not what you should do local_simplify = LocalSimplify() x = double('x') y = double('y') z = double('z') a = add(z, mul(div(mul(y, x), y), div(z, x))) e = gof.Env([x, y, z], [a]) simplify = gof.TopoOptimizer(local_simplify) simplify.optimize(e)
return [dout] @gof.local_optimizer([Contiguous], inplace=True) def opt_remove_contiguous(node): if isinstance(node.op, Contiguous): x, = node.inputs if x.owner and isinstance( x.owner.op, (T.Alloc, T.AllocEmpty, T.extra_ops.CpuContiguous)): return [x] return False optdb.register('opt_remove_contiguous', gof.TopoOptimizer(opt_remove_contiguous), 10, 'fast_run') # Theano will not do this optimization. So we register it now. # See: https://github.com/Theano/Theano/issues/4400 @try_register_gpu_opt(Contiguous) def local_gpu_Contiguous(node): if isinstance(node.op, Contiguous): # see also: https://github.com/Theano/Theano/blob/master/theano/sandbox/cuda/opt.py from theano.sandbox.cuda import host_from_gpu x, = node.inputs if x.owner and x.owner.op == host_from_gpu: from theano.sandbox.cuda.basic_ops import gpu_contiguous return [host_from_gpu(gpu_contiguous(x.owner.inputs[0]))]
@gof.local_optimizer([None]) def local_inplace_remove0(node): """ Optimization to insert inplace versions of Remove0. """ if isinstance(node.op, Remove0) and not node.op.inplace: new_op = node.op.__class__(inplace=True) new_node = new_op(*node.inputs) return [new_node] return False theano.compile.optdb.register( 'local_inplace_remove0', gof.TopoOptimizer(local_inplace_remove0, failure_callback=gof.TopoOptimizer.warn_inplace), 60, 'fast_run', 'inplace') @gof.local_optimizer([csm_properties]) def local_csm_properties_csm(node): """if we find csm_properties(CSM(*args)), then we can replace that with the *args directly""" if node.op == csm_properties: csm, = node.inputs if csm.owner and (csm.owner.op == CSC or csm.owner.op == CSR): # csm.owner.inputs could be broadcastable. In that case, we have # to adjust the broadcasting flag here. ret_var = [ theano.tensor.patternbroadcast(i, o.broadcastable) for i, o in izip(csm.owner.inputs, node.outputs)
grad_op = grad_op.__class__(**kwargs) else: old_grad_op_input0 = grad_op_v.owner.inputs[0] sum_inputs = [old_grad_op_input0] + sum_inputs assert len(sum_inputs) > 0 if len(sum_inputs) == 1: new_grad_op_input0 = sum_inputs[0] else: new_grad_op_input0 = T.add(*sum_inputs) new_grad_op_inputs = [new_grad_op_input0] + grad_op_v.owner.inputs[1:] new_v = grad_op(*new_grad_op_inputs) return [new_v] optdb.register('add_merge_MultiBatchBeamGradAddOp', gof.TopoOptimizer(add_merge_MultiBatchBeamGradAddOp), 0.1, 'fast_run') @gof.local_optimizer([MultiBatchBeamGradAddOp], inplace=True) def inplace_MultiBatchBeamGradAddOp(node): if isinstance(node.op, MultiBatchBeamGradAddOp ) and not node.op.inplace and not node.op.zero_with_shape: kwargs = {k: getattr(node.op, k) for k in node.op.__props__} kwargs["inplace"] = True new_op = node.op.__class__(**kwargs) new_v = new_op(*node.inputs) return [new_v] return False
any_inplace = True info["is_inplace"] = True if not any_inplace: return False new_op = node.op.__class__(**kwargs) from returnn.theano.util import make_var_tuple # noinspection PyCallingNonCallable new_v = make_var_tuple(new_op(*node.inputs)) return new_v return False try: optdb.register('inplace_NativeOp', gof.TopoOptimizer(_inplace_native_op , failure_callback=gof.TopoOptimizer.warn_inplace ), 60, 'fast_run', 'inplace') except ValueError: # can happen if it was already registered before, e.g. when we reload the module pass @try_register_gpu_opt(TheanoNativeOp) def _local_gpu_native_op(node): if isinstance(node.op, TheanoNativeOp): # see also: https://github.com/Theano/Theano/blob/master/theano/sandbox/cuda/opt.py # noinspection PyUnresolvedReferences,PyPackageRequirements from theano.sandbox.cuda import host_from_gpu, gpu_from_host, as_cuda_ndarray_variable args = node.inputs if any([(x.owner and x.owner.op == host_from_gpu) for x in args]): gpu_op = TheanoGpuNativeOp(**{key: getattr(node.op, key) for key in node.op.__props__})