예제 #1
0

@gof.local_optimizer([SparseBlockGemv], inplace=True)
def local_inplace_sparse_block_gemv(node):
    """
        SparseBlockGemv(inplace=False) -> SparseBlockGemv(inplace=True)
    """
    if isinstance(node.op, SparseBlockGemv) and not node.op.inplace:
        new_node = sparse_block_gemv_inplace(*node.inputs)
        return [new_node]
    return False


compile.optdb.register('local_inplace_sparse_block_gemv',
                       gof.TopoOptimizer(
                           local_inplace_sparse_block_gemv,
                           failure_callback=gof.TopoOptimizer.warn_inplace),
                       60, 'fast_run', 'inplace')  # DEBUG


@gof.local_optimizer([SparseBlockOuter], inplace=True)
def local_inplace_sparse_block_outer(node):
    """
        SparseBlockOuter(inplace=False) -> SparseBlockOuter(inplace=True)
    """
    if isinstance(node.op, SparseBlockOuter) and not node.op.inplace:
        new_node = sparse_block_outer_inplace(*node.inputs)
        return [new_node]
    return False

예제 #2
0
    def test_extending_2(self):
        '''
         This test fails in DebugMode for the same reasons the test in
         tensor/tests/test_basic.py:T_scalarfromtensor.test0
         fails on debug mode ( as much as I could tell - Razvan )
        '''
        from theano import gof

        class Double(gof.Type):
            def filter(self, x, strict=False, allow_downcast=None):
                if strict and not isinstance(x, float):
                    raise TypeError('Expected a float!')
                return float(x)

            def values_eq_approx(self, x, y, tolerance=1e-4):
                return abs(x - y) / (abs(x) + abs(y)) < tolerance

            def __str__(self):
                return "double"

        double = Double()

        class BinaryDoubleOp(gof.Op):
            def __init__(self, name, fn):
                self.name = name
                self.fn = fn

            def __eq__(self, other):
                return type(self) == type(other) and (
                    self.name == other.name) and (self.fn == other.fn)

            def __hash__(self):
                return hash(type(self)) ^ hash(self.name) ^ hash(self.fn)

            def make_node(self, x, y):
                if isinstance(x, (int, float)):
                    x = gof.Constant(double, x)
                if isinstance(y, (int, float)):
                    y = gof.Constant(double, y)
                if x.type != double or y.type != double:
                    raise TypeError('%s only works on doubles' % self.name)
                return gof.Apply(self, [x, y], [double()])

            def perform(self, node, inp, out):
                x, y = inp
                z, = out
                z[0] = self.fn(x, y)

            def __str__(self):
                return self.name

        add = BinaryDoubleOp(name='add', fn=lambda x, y: x + y)

        sub = BinaryDoubleOp(name='sub', fn=lambda x, y: x - y)

        mul = BinaryDoubleOp(name='mul', fn=lambda x, y: x * y)

        div = BinaryDoubleOp(name='div', fn=lambda x, y: x / y)

        def c_declare(name, sub):
            return """
            double %(name)s;
            """ % dict(name=name)

        double.c_declare = c_declare

        def c_init(name, sub):
            return """
            %(name)s = 0.0;
            """ % dict(name=name)

        double.c_init = c_init

        def c_extract(name, sub):
            return """
            if (!PyFloat_Check(py_%(name)s)) {
                PyErr_SetString(PyExc_TypeError, "expected a float");
                %(fail)s
            }
            %(name)s = PyFloat_AsDouble(py_%(name)s);
            """ % dict(name=name, fail=sub['fail'])

        double.c_extract = c_extract

        def c_sync(name, sub):
            return """
            Py_XDECREF(py_%(name)s);
            py_%(name)s = PyFloat_FromDouble(%(name)s);
            if (!py_%(name)s) {
                printf("PyFloat_FromDouble failed on: %%f\\n", %(name)s);
                Py_XINCREF(Py_None);
                py_%(name)s = Py_None;
            }
            """ % dict(name=name)

        double.c_sync = c_sync

        def c_cleanup(name, sub):
            return ""

        double.c_cleanup = c_cleanup

        from theano import function

        x, y, z = double('x'), double('y'), double('z')
        a = add(x, y)
        b = mul(a, z)
        f = function([x, y, z], b)
        assert f(1.0, 2.0, 3.0) == 9.0

        from theano import gof

        class Double(gof.Type):
            def filter(self, x, strict=False, allow_downcast=None):
                if strict and not isinstance(x, float):
                    raise TypeError('Expected a float!')
                return float(x)

            def values_eq_approx(self, x, y, tolerance=1e-4):
                return abs(x - y) / (x + y) < tolerance

            def __str__(self):
                return "double"

            def c_declare(self, name, sub):
                return """
                double %(name)s;
                """ % dict(name=name)

            def c_init(self, name, sub):
                return """
                %(name)s = 0.0;
                """ % dict(name=name)

            def c_extract(self, name, sub):
                return """
                if (!PyFloat_Check(py_%(name)s)) {
                    PyErr_SetString(PyExc_TypeError, "expected a float");
                    %(fail)s
                }
                %(name)s = PyFloat_AsDouble(py_%(name)s);
                """ % dict(sub, name=name)

            def c_sync(self, name, sub):
                return """
                Py_XDECREF(py_%(name)s);
                py_%(name)s = PyFloat_FromDouble(%(name)s);
                if (!py_%(name)s) {
                    printf("PyFloat_FromDouble failed on: %%f\\n", %(name)s);
                    Py_XINCREF(Py_None);
                    py_%(name)s = Py_None;
                }
                """ % dict(name=name)

            def c_cleanup(self, name, sub):
                return ""

        double = Double()

        def c_code(node, name, input_names, output_names, sub):
            x_name, y_name = input_names[0], input_names[1]
            output_name = output_names[0]
            return """
            %(output_name)s = %(x_name)s * %(y_name)s;
            """ % locals()

        mul.c_code = c_code

        from theano import gof

        class BinaryDoubleOp(gof.Op):
            def __init__(self, name, fn, ccode):
                self.name = name
                self.fn = fn
                self.ccode = ccode

            def make_node(self, x, y):
                if isinstance(x, (int, float)):
                    x = gof.Constant(double, x)
                if isinstance(y, (int, float)):
                    y = gof.Constant(double, y)
                if x.type != double or y.type != double:
                    raise TypeError('%s only works on doubles' % self.name)
                return gof.Apply(self, [x, y], [double()])

            def perform(self, node, inp, out):
                x, y = inp
                z, = out
                z[0] = self.fn(x, y)

            def __str__(self):
                return self.name

            def c_code(self, node, name, inp, out, sub):
                x, y = inp
                z, = out
                return self.ccode % locals()

        add = BinaryDoubleOp(name='add',
                             fn=lambda x, y: x + y,
                             ccode="%(z)s = %(x)s + %(y)s;")

        sub = BinaryDoubleOp(name='sub',
                             fn=lambda x, y: x - y,
                             ccode="%(z)s = %(x)s - %(y)s;")

        mul = BinaryDoubleOp(name='mul',
                             fn=lambda x, y: x * y,
                             ccode="%(z)s = %(x)s * %(y)s;")

        div = BinaryDoubleOp(name='div',
                             fn=lambda x, y: x / y,
                             ccode="%(z)s = %(x)s / %(y)s;")

        from theano.gof import toolbox

        class Simplify(gof.Optimizer):
            def add_requirements(self, env):
                env.extend(toolbox.ReplaceValidate())

            def apply(self, env):
                for node in env.toposort():
                    if node.op == div:
                        x, y = node.inputs
                        z = node.outputs[0]
                        if x.owner and x.owner.op == mul:
                            a, b = x.owner.inputs
                            if y == a:
                                env.replace_validate(z, b)
                            elif y == b:
                                env.replace_validate(z, a)

        simplify = Simplify()
        x = double('x')
        y = double('y')
        z = double('z')
        a = add(z, mul(div(mul(y, x), y), div(z, x)))
        e = gof.Env([x, y, z], [a])
        simplify.optimize(e)

        class LocalSimplify(gof.LocalOptimizer):
            def transform(self, node):
                if node.op == div:
                    x, y = node.inputs
                    if x.owner and x.owner.op == mul:
                        a, b = x.owner.inputs
                        if y == a:
                            return [b]
                        elif y == b:
                            return [a]
                return False

            def tracks(self):
                # This should be needed for the EquilibriumOptimizer
                # but it isn't now
                # TODO: do this and explain it
                return []  # that's not what you should do

        local_simplify = LocalSimplify()

        x = double('x')
        y = double('y')
        z = double('z')
        a = add(z, mul(div(mul(y, x), y), div(z, x)))
        e = gof.Env([x, y, z], [a])
        simplify = gof.TopoOptimizer(local_simplify)
        simplify.optimize(e)
예제 #3
0
        return [dout]


@gof.local_optimizer([Contiguous], inplace=True)
def opt_remove_contiguous(node):
    if isinstance(node.op, Contiguous):
        x, = node.inputs
        if x.owner and isinstance(
                x.owner.op,
            (T.Alloc, T.AllocEmpty, T.extra_ops.CpuContiguous)):
            return [x]
    return False


optdb.register('opt_remove_contiguous',
               gof.TopoOptimizer(opt_remove_contiguous), 10, 'fast_run')


# Theano will not do this optimization. So we register it now.
# See: https://github.com/Theano/Theano/issues/4400
@try_register_gpu_opt(Contiguous)
def local_gpu_Contiguous(node):
    if isinstance(node.op, Contiguous):
        # see also: https://github.com/Theano/Theano/blob/master/theano/sandbox/cuda/opt.py
        from theano.sandbox.cuda import host_from_gpu
        x, = node.inputs
        if x.owner and x.owner.op == host_from_gpu:
            from theano.sandbox.cuda.basic_ops import gpu_contiguous
            return [host_from_gpu(gpu_contiguous(x.owner.inputs[0]))]

예제 #4
0
파일: opt.py 프로젝트: jsalvatier/Theano-1
@gof.local_optimizer([None])
def local_inplace_remove0(node):
    """
    Optimization to insert inplace versions of Remove0.
    """
    if isinstance(node.op, Remove0) and not node.op.inplace:
        new_op = node.op.__class__(inplace=True)
        new_node = new_op(*node.inputs)
        return [new_node]
    return False


theano.compile.optdb.register(
    'local_inplace_remove0',
    gof.TopoOptimizer(local_inplace_remove0,
                      failure_callback=gof.TopoOptimizer.warn_inplace), 60,
    'fast_run', 'inplace')


@gof.local_optimizer([csm_properties])
def local_csm_properties_csm(node):
    """if we find csm_properties(CSM(*args)), then we can replace that with the
    *args directly"""
    if node.op == csm_properties:
        csm, = node.inputs
        if csm.owner and (csm.owner.op == CSC or csm.owner.op == CSR):
            # csm.owner.inputs could be broadcastable. In that case, we have
            # to adjust the broadcasting flag here.
            ret_var = [
                theano.tensor.patternbroadcast(i, o.broadcastable)
                for i, o in izip(csm.owner.inputs, node.outputs)
예제 #5
0
        grad_op = grad_op.__class__(**kwargs)
    else:
        old_grad_op_input0 = grad_op_v.owner.inputs[0]
        sum_inputs = [old_grad_op_input0] + sum_inputs
    assert len(sum_inputs) > 0
    if len(sum_inputs) == 1:
        new_grad_op_input0 = sum_inputs[0]
    else:
        new_grad_op_input0 = T.add(*sum_inputs)
    new_grad_op_inputs = [new_grad_op_input0] + grad_op_v.owner.inputs[1:]
    new_v = grad_op(*new_grad_op_inputs)
    return [new_v]


optdb.register('add_merge_MultiBatchBeamGradAddOp',
               gof.TopoOptimizer(add_merge_MultiBatchBeamGradAddOp), 0.1,
               'fast_run')


@gof.local_optimizer([MultiBatchBeamGradAddOp], inplace=True)
def inplace_MultiBatchBeamGradAddOp(node):
    if isinstance(node.op, MultiBatchBeamGradAddOp
                  ) and not node.op.inplace and not node.op.zero_with_shape:
        kwargs = {k: getattr(node.op, k) for k in node.op.__props__}
        kwargs["inplace"] = True
        new_op = node.op.__class__(**kwargs)
        new_v = new_op(*node.inputs)
        return [new_v]
    return False

예제 #6
0
        any_inplace = True
        info["is_inplace"] = True
    if not any_inplace:
      return False
    new_op = node.op.__class__(**kwargs)
    from returnn.theano.util import make_var_tuple
    # noinspection PyCallingNonCallable
    new_v = make_var_tuple(new_op(*node.inputs))
    return new_v
  return False


try:
  optdb.register('inplace_NativeOp',
                 gof.TopoOptimizer(_inplace_native_op
                                   , failure_callback=gof.TopoOptimizer.warn_inplace
                                   ),
                 60, 'fast_run', 'inplace')
except ValueError:  # can happen if it was already registered before, e.g. when we reload the module
  pass


@try_register_gpu_opt(TheanoNativeOp)
def _local_gpu_native_op(node):
  if isinstance(node.op, TheanoNativeOp):
    # see also: https://github.com/Theano/Theano/blob/master/theano/sandbox/cuda/opt.py
    # noinspection PyUnresolvedReferences,PyPackageRequirements
    from theano.sandbox.cuda import host_from_gpu, gpu_from_host, as_cuda_ndarray_variable
    args = node.inputs
    if any([(x.owner and x.owner.op == host_from_gpu) for x in args]):
      gpu_op = TheanoGpuNativeOp(**{key: getattr(node.op, key) for key in node.op.__props__})