def test_softmax_optimizations_w_bias2(self): x = tensor.matrix('x') b = tensor.vector('b') c = tensor.vector('c') one_of_n = tensor.lvector('one_of_n') op = crossentropy_categorical_1hot env = gof.Env([x, b, c, one_of_n], [op(softmax(T.add(x, b, c)), one_of_n)]) assert env.outputs[0].owner.op == op print 'BEFORE' for node in env.toposort(): print node.op print '----' theano.compile.mode.optdb.query( theano.compile.mode.OPT_FAST_RUN).optimize(env) print 'AFTER' for node in env.toposort(): print node.op print '====' assert len(env.toposort()) == 3 assert str(env.outputs[0].owner.op) == 'OutputGuard' assert env.outputs[0].owner.inputs[ 0].owner.op == crossentropy_softmax_argmax_1hot_with_bias
def test_argmax_pushdown_bias(): x = tensor.dmatrix() b = tensor.dvector() out = tensor.argmax(softmax_with_bias(x, b), axis=-1) env = gof.Env([x, b], [out]) theano.compile.mode.optdb.query( theano.compile.mode.OPT_FAST_RUN).optimize(env) #print 'AFTER' #for node in env.toposort(): # print node.op assert len(env.toposort()) == 4 assert isinstance(env.toposort()[0].op, tensor.DimShuffle) assert isinstance(env.toposort()[1].op, tensor.Elemwise) assert isinstance(env.toposort()[2].op, tensor.MaxAndArgmax) assert str(env.toposort()[3].op) == 'OutputGuard' x = tensor.dmatrix() b = tensor.dvector() out = tensor.max_and_argmax(softmax_with_bias(x, b), axis=-1)[0] env = gof.Env([x, b], [out]) backup = config.warn.argmax_pushdown_bug config.warn.argmax_pushdown_bug = False try: theano.compile.mode.optdb.query( theano.compile.mode.OPT_FAST_RUN).optimize(env) finally: config.warn.argmax_pushdown_bug = backup #print 'AFTER' #for node in env.toposort(): # print node.op assert len(env.toposort()) == 3 assert isinstance(env.toposort()[0].op, SoftmaxWithBias) assert isinstance(env.toposort()[1].op, tensor.CAReduce) assert isinstance(env.toposort()[1].op.scalar_op, theano.scalar.Maximum) assert str(env.toposort()[2].op) == 'OutputGuard'
def test_softmax_optimizations_vector(self): x = tensor.vector('x') one_of_n = tensor.lvector('one_of_n') op = crossentropy_categorical_1hot env = gof.Env([x, one_of_n], [op(softmax(x), one_of_n)]) assert env.outputs[0].owner.op == op theano.compile.mode.optdb.query( theano.compile.mode.OPT_FAST_RUN).optimize(env) assert str(env.outputs[0].owner.op) == 'OutputGuard' assert env.outputs[0].owner.inputs[ 0].owner.op == crossentropy_softmax_argmax_1hot_with_bias
def test_argmax_pushdown(): x = tensor.dmatrix() #test that the max_and_argmax is pushed down if the max is not used out = tensor.max_and_argmax(softmax(tensor.exp(tensor.tanh(sigmoid(x)))), axis=-1)[1] env = gof.Env([x], [out]) theano.compile.mode.optdb.query( theano.compile.mode.OPT_FAST_RUN).optimize(env) #print 'AFTER' #for node in env.toposort(): #print node.op assert len(env.toposort()) == 2 # an output_guard is second assert env.toposort()[0].op == tensor.basic._max_and_argmax assert str(env.toposort()[1].op) == 'OutputGuard' x = tensor.dmatrix() #test that the max_and_argmax is not pushed down if the max is used out = tensor.max_and_argmax(softmax(tensor.exp(tensor.tanh(sigmoid(x)))), axis=-1)[0] env = gof.Env([x], [out]) backup = config.warn.argmax_pushdown_bug config.warn.argmax_pushdown_bug = False try: theano.compile.mode.optdb.query( theano.compile.mode.OPT_FAST_RUN).optimize(env) finally: config.warn.argmax_pushdown_bug = backup #print 'AFTER' #for node in env.toposort(): #print node.op assert len(env.toposort()) == 4 # an output_guard is second assert isinstance(env.toposort()[0].op, tensor.Elemwise) assert isinstance(env.toposort()[1].op, Softmax) assert isinstance(env.toposort()[2].op, tensor.CAReduce) assert isinstance(env.toposort()[2].op.scalar_op, theano.scalar.Maximum) assert str(env.toposort()[3].op) == 'OutputGuard'
def test_softmax_grad_optimizations(self): x = tensor.matrix('x') one_of_n = tensor.lvector('one_of_n') op = crossentropy_categorical_1hot xe = op(softmax(x), one_of_n) sum_xe = tensor.sum(xe) g_x = tensor.grad(sum_xe, x) env = gof.Env([x, one_of_n], [g_x]) print 'BEFORE' for node in env.toposort(): print node.op, node.inputs print '----' theano.compile.mode.optdb.query( theano.compile.mode.OPT_FAST_RUN).optimize(env) print 'AFTER' for node in env.toposort(): print node.op, node.inputs # the function has 9 ops because the dimshuffle and elemwise{second} aren't getting # cleaned up as well as we'd like. has_cx1hot = False has_cx1hotdx = False has_softmax = False has_softmaxdx = False for node in env.toposort(): if node.op == crossentropy_softmax_argmax_1hot_with_bias: has_cx1hot = True if node.op == crossentropy_softmax_1hot_with_bias_dx: has_cx1hotdx = True if node.op == softmax: has_softmax = True if node.op == softmax_grad: has_softmaxdx = True assert has_cx1hot assert has_cx1hotdx assert not has_softmax assert not has_softmaxdx
def process_node(self, env, node): # this flag tells if there was any change during the last iterations changed = True clean_inputs, clean_outputs = scan_utils.reconstruct_graph( node.op.inputs, node.op.outputs) local_env = gof.Env(clean_inputs, clean_outputs) max_iterations = 2 * len(local_env.toposort()) + 3 counts = 0 to_remove = [] to_replace = [] replace_with_in = [] replace_with_out = [] op = node.op # Construct the list of non_sequences to simplify a few things st = op.n_seqs st += int(numpy.sum([len(x) for x in op.tap_array[:(op.n_mit_mot + op.n_mit_sot)]])) st += op.n_sit_sot st += op.n_shared_outs non_seqs = clean_inputs[st:] st = (op.n_seqs + op.n_mit_mot + op.n_mit_sot + op.n_sit_sot + op.n_nit_sot + op.n_shared_outs + 1) outer_non_seqs = node.inputs[st:] assert len(non_seqs) == len(outer_non_seqs) while changed and counts < max_iterations: counts += 1 changed = False for nd in local_env.toposort(): if (numpy.all([(x in non_seqs) or (x.owner in to_remove) or isinstance(x, tensor.Constant) for x in nd.inputs]) and # we can do this because the assumption is that a # viewOp or deepCopyOp will be just at the end of the # function and not somewhere in the middle .. not isinstance(nd.op, theano.compile.ViewOp) and not isinstance(nd.op, theano.compile.DeepCopyOp) and # and we didn't already looked at this node not nd in to_remove): # We have a candidate node to removable # Step 1. Reconstruct it on outside to_remove.append(nd) outside_ins = [] for x in nd.inputs: if x in non_seqs: outside_ins += [outer_non_seqs[non_seqs.index(x)]] elif x in to_replace: outside_ins += [ replace_with_out[to_replace.index(x)]] elif isinstance(x, theano.Constant): outside_ins += [x.clone()] else: raise Exception( ('Error in the `scan_pushout_non_seq_' 'operations`. The optimization tries ' 'to move some computation fron scan ' 'which is not allowed to move. Report ' 'this on theano-users list'), x) outside_ins = [x.type.filter_variable(y) for x,y in zip(nd.inputs, outside_ins)] nw_outer_node = nd.op.make_node(*outside_ins) # Step 2. Create variables for replacements for idx, y in enumerate(nd.outputs): y_place_holder = scan_utils.safe_new(y, '_replace') to_replace += [y] replace_with_in += [y_place_holder] assert type(y) == type(nw_outer_node.outputs[idx]) replace_with_out += [nw_outer_node.outputs[idx]] changed = True if counts >= max_iterations: raise Exception('Error in the `scan_pushout_non_seq_operations`.' ' The optimization exhausted the maximal number ' 'of iterations allowed!') # We need to check all candidate replacements and choose those that # make sense for us # Step 1. which elements of `to_replace` are used by remaining # components of the inner function clean_to_replace = [] clean_replace_with_in = [] clean_replace_with_out = [] existent_nodes = [nd for nd in local_env.toposort() if nd not in to_remove] to_keep = [] for nd in existent_nodes: to_keep += nd.inputs for idx, out in enumerate(to_replace): if out in to_keep and out.owner not in existent_nodes: clean_to_replace += [out] clean_replace_with_in += [replace_with_in[idx]] clean_replace_with_out += [replace_with_out[idx]] if len(clean_to_replace) > 0: # We can finally put an end to all this madness givens = {} nw_outer = [] nw_inner = [] for to_repl, repl_in, repl_out in zip(clean_to_replace, clean_replace_with_in, clean_replace_with_out): if isinstance(repl_out, theano.Constant): repl_in = repl_out.clone() else: nw_inner += [repl_in] nw_outer += [repl_out] givens[to_repl] = repl_in _op_outs = scan_utils.clone(clean_outputs, replace=givens) _op_ins = clean_inputs + nw_inner op_ins, op_outs = scan_utils.reconstruct_graph(_op_ins, _op_outs) # Reconstruct node nwScan = scan_op.Scan(op_ins, op_outs, op.info) nw_node = nwScan.make_node(* (node.inputs + nw_outer)) env.replace_all_validate(zip(node.outputs, nw_node.outputs), reason='scan_push_computation_out') return True elif to_keep == []: # Nothing in the inner graph should be kept replace_with = {} for idx, out in enumerate(to_replace): if out in local_env.outputs: x = node.outputs[local_env.outputs.index(out)] y = replace_with_out[idx] shape = [y.shape[idx] for idx in xrange(y.ndim)] replace_with[x] = tensor.alloc(y, node.inputs[0], *shape) # We need to add one extra dimension to the outputs env.replace_all_validate(replace_with.items(), reason='scan_push_computation_out') else: return False
def test_extending_2(self): ''' This test fails in DebugMode for the same reasons the test in tensor/tests/test_basic.py:T_scalarfromtensor.test0 fails on debug mode ( as much as I could tell - Razvan ) ''' from theano import gof class Double(gof.Type): def filter(self, x, strict=False, allow_downcast=None): if strict and not isinstance(x, float): raise TypeError('Expected a float!') return float(x) def values_eq_approx(self, x, y, tolerance=1e-4): return abs(x - y) / (abs(x) + abs(y)) < tolerance def __str__(self): return "double" double = Double() class BinaryDoubleOp(gof.Op): def __init__(self, name, fn): self.name = name self.fn = fn def __eq__(self, other): return type(self) == type(other) and ( self.name == other.name) and (self.fn == other.fn) def __hash__(self): return hash(type(self)) ^ hash(self.name) ^ hash(self.fn) def make_node(self, x, y): if isinstance(x, (int, float)): x = gof.Constant(double, x) if isinstance(y, (int, float)): y = gof.Constant(double, y) if x.type != double or y.type != double: raise TypeError('%s only works on doubles' % self.name) return gof.Apply(self, [x, y], [double()]) def perform(self, node, inp, out): x, y = inp z, = out z[0] = self.fn(x, y) def __str__(self): return self.name add = BinaryDoubleOp(name='add', fn=lambda x, y: x + y) sub = BinaryDoubleOp(name='sub', fn=lambda x, y: x - y) mul = BinaryDoubleOp(name='mul', fn=lambda x, y: x * y) div = BinaryDoubleOp(name='div', fn=lambda x, y: x / y) def c_declare(name, sub): return """ double %(name)s; """ % dict(name=name) double.c_declare = c_declare def c_init(name, sub): return """ %(name)s = 0.0; """ % dict(name=name) double.c_init = c_init def c_extract(name, sub): return """ if (!PyFloat_Check(py_%(name)s)) { PyErr_SetString(PyExc_TypeError, "expected a float"); %(fail)s } %(name)s = PyFloat_AsDouble(py_%(name)s); """ % dict(name=name, fail=sub['fail']) double.c_extract = c_extract def c_sync(name, sub): return """ Py_XDECREF(py_%(name)s); py_%(name)s = PyFloat_FromDouble(%(name)s); if (!py_%(name)s) { printf("PyFloat_FromDouble failed on: %%f\\n", %(name)s); Py_XINCREF(Py_None); py_%(name)s = Py_None; } """ % dict(name=name) double.c_sync = c_sync def c_cleanup(name, sub): return "" double.c_cleanup = c_cleanup from theano import function x, y, z = double('x'), double('y'), double('z') a = add(x, y) b = mul(a, z) f = function([x, y, z], b) assert f(1.0, 2.0, 3.0) == 9.0 from theano import gof class Double(gof.Type): def filter(self, x, strict=False, allow_downcast=None): if strict and not isinstance(x, float): raise TypeError('Expected a float!') return float(x) def values_eq_approx(self, x, y, tolerance=1e-4): return abs(x - y) / (x + y) < tolerance def __str__(self): return "double" def c_declare(self, name, sub): return """ double %(name)s; """ % dict(name=name) def c_init(self, name, sub): return """ %(name)s = 0.0; """ % dict(name=name) def c_extract(self, name, sub): return """ if (!PyFloat_Check(py_%(name)s)) { PyErr_SetString(PyExc_TypeError, "expected a float"); %(fail)s } %(name)s = PyFloat_AsDouble(py_%(name)s); """ % dict(sub, name=name) def c_sync(self, name, sub): return """ Py_XDECREF(py_%(name)s); py_%(name)s = PyFloat_FromDouble(%(name)s); if (!py_%(name)s) { printf("PyFloat_FromDouble failed on: %%f\\n", %(name)s); Py_XINCREF(Py_None); py_%(name)s = Py_None; } """ % dict(name=name) def c_cleanup(self, name, sub): return "" double = Double() def c_code(node, name, input_names, output_names, sub): x_name, y_name = input_names[0], input_names[1] output_name = output_names[0] return """ %(output_name)s = %(x_name)s * %(y_name)s; """ % locals() mul.c_code = c_code from theano import gof class BinaryDoubleOp(gof.Op): def __init__(self, name, fn, ccode): self.name = name self.fn = fn self.ccode = ccode def make_node(self, x, y): if isinstance(x, (int, float)): x = gof.Constant(double, x) if isinstance(y, (int, float)): y = gof.Constant(double, y) if x.type != double or y.type != double: raise TypeError('%s only works on doubles' % self.name) return gof.Apply(self, [x, y], [double()]) def perform(self, node, inp, out): x, y = inp z, = out z[0] = self.fn(x, y) def __str__(self): return self.name def c_code(self, node, name, inp, out, sub): x, y = inp z, = out return self.ccode % locals() add = BinaryDoubleOp(name='add', fn=lambda x, y: x + y, ccode="%(z)s = %(x)s + %(y)s;") sub = BinaryDoubleOp(name='sub', fn=lambda x, y: x - y, ccode="%(z)s = %(x)s - %(y)s;") mul = BinaryDoubleOp(name='mul', fn=lambda x, y: x * y, ccode="%(z)s = %(x)s * %(y)s;") div = BinaryDoubleOp(name='div', fn=lambda x, y: x / y, ccode="%(z)s = %(x)s / %(y)s;") from theano.gof import toolbox class Simplify(gof.Optimizer): def add_requirements(self, env): env.extend(toolbox.ReplaceValidate()) def apply(self, env): for node in env.toposort(): if node.op == div: x, y = node.inputs z = node.outputs[0] if x.owner and x.owner.op == mul: a, b = x.owner.inputs if y == a: env.replace_validate(z, b) elif y == b: env.replace_validate(z, a) simplify = Simplify() x = double('x') y = double('y') z = double('z') a = add(z, mul(div(mul(y, x), y), div(z, x))) e = gof.Env([x, y, z], [a]) simplify.optimize(e) class LocalSimplify(gof.LocalOptimizer): def transform(self, node): if node.op == div: x, y = node.inputs if x.owner and x.owner.op == mul: a, b = x.owner.inputs if y == a: return [b] elif y == b: return [a] return False def tracks(self): # This should be needed for the EquilibriumOptimizer # but it isn't now # TODO: do this and explain it return [] # that's not what you should do local_simplify = LocalSimplify() x = double('x') y = double('y') z = double('z') a = add(z, mul(div(mul(y, x), y), div(z, x))) e = gof.Env([x, y, z], [a]) simplify = gof.TopoOptimizer(local_simplify) simplify.optimize(e)
def Env(i, o): e = gof.Env(i, o) return e