def with_linker_inplace(self, linker): for xsh, ysh in [((5, 5), (5, 5)), ((5, 5), (1, 5)), ((5, 5), (5, 1)), ((1, 1), (1, 1)), ((2, 3, 4, 5), (2, 3, 4, 5)), ((2, 3, 4, 5), (1, 3, 1, 5)), ((2, 3, 4, 5), (1, 1, 1, 1)), ((), ())]: x = TensorType('float64', [(entry == 1) for entry in xsh])('x') y = TensorType('float64', [(entry == 1) for entry in ysh])('y') e = Elemwise(scalar.Add(scalar.transfer_type(0)), {0: 0})(x, y) f = copy(linker).accept(FunctionGraph([x, y], [e])).make_function() xv = numpy.asarray(numpy.random.rand(*xsh)) yv = numpy.asarray(numpy.random.rand(*ysh)) zv = xv + yv f(xv, yv) self.assertTrue((xv == zv).all()) #test Elemwise.infer_shape #the Shape op don't implement c_code! if isinstance(linker, gof.PerformLinker): x = TensorType('float64', [(entry == 1) for entry in xsh])('x') y = TensorType('float64', [(entry == 1) for entry in ysh])('y') e = Elemwise(scalar.Add(scalar.transfer_type(0)), {0: 0})(x, y) f = copy(linker).accept(FunctionGraph([x, y], [e.shape])).make_function() xv = numpy.asarray(numpy.random.rand(*xsh)) yv = numpy.asarray(numpy.random.rand(*ysh)) zv = xv + yv f(xv, yv) assert xv.shape == zv.shape
def with_linker_inplace(self, linker, op, type, rand_val): for xsh, ysh in [((5, 5), (5, 5)), ((5, 5), (1, 5)), ((5, 5), (5, 1)), ((1, 1), (1, 1)), ((2, 3, 4, 5), (2, 3, 4, 5)), ((2, 3, 4, 5), (1, 3, 1, 5)), ((2, 3, 4, 5), (1, 1, 1, 1)), ((), ())]: x = type('float64', [(entry == 1) for entry in xsh])('x') y = type('float64', [(entry == 1) for entry in ysh])('y') e = op(scalar.Add(scalar.transfer_type(0)), {0: 0})(x, y) f = copy(linker).accept(FunctionGraph([x, y], [e])).make_function() xv = rand_val(xsh) yv = rand_val(ysh) zv = xv + yv f(xv, yv) self.assertTrue((xv == zv).all()) #test Elemwise.infer_shape #the Shape op don't implement c_code! if isinstance(linker, gof.PerformLinker): x = type('float64', [(entry == 1) for entry in xsh])('x') y = type('float64', [(entry == 1) for entry in ysh])('y') e = op(scalar.Add(scalar.transfer_type(0)), {0: 0})(x, y) f = copy(linker).accept(FunctionGraph( [x, y], [e.shape])).make_function() xv = rand_val(xsh) yv = rand_val(ysh) zv = xv + yv f(xv, yv) assert xv.shape == zv.shape
def _scal_inplace(symbol): """Replace a symbol definition with an elementwise version of the corresponding scalar Op""" symbolname = symbol.__name__ inplace = symbolname.endswith('_inplace') if inplace: scalar_op = getattr(scal, symbolname[:-len('_inplace')]) inplace_scalar_op = scalar_op.__class__(scal.transfer_type(0)) rval = elemwise.Elemwise(inplace_scalar_op, {0: 0}, name=symbolname) else: scalar_op = getattr(scal, symbolname) rval = elemwise.Elemwise(scalar_op, name=symbolname) if getattr(symbol, '__doc__', False): rval.__doc__ = symbol.__doc__ + '\n' + rval.__doc__ # for the meaning of this see the ./epydoc script # it makes epydoc display rval as if it were a function, not an object rval.__epydoc_asRoutine = symbol rval.__module__ = 'theano.tensor.inplace' def chk(pstate, r): if not r.owner: return False return r.owner.op == rval pprint.assign( chk, printing.FunctionPrinter(symbolname.replace('_inplace', '='))) return rval
def construct(symbol): symbolname = symbol.__name__ inplace = symbolname.endswith('_inplace') if inplace: msg = "inplace" else: msg = "no_inplace" n = "Elemwise{%s,%s}" % (symbolname, msg) if inplace: scalar_op = getattr(scal, symbolname[:-len('_inplace')]) inplace_scalar_op = scalar_op.__class__(scal.transfer_type(0)) rval = elemwise.Elemwise(inplace_scalar_op, {0: 0}, name=n, nfunc_spec=(nfunc and (nfunc, nin, nout))) else: scalar_op = getattr(scal, symbolname) rval = elemwise.Elemwise(scalar_op, name=n, nfunc_spec=(nfunc and (nfunc, nin, nout))) if getattr(symbol, '__doc__', False): rval.__doc__ = symbol.__doc__ + '\n' + rval.__doc__ # for the meaning of this see the ./epydoc script # it makes epydoc display rval as if it were a function, not an object rval.__epydoc_asRoutine = symbol rval.__module__ = 'tensor' pprint.assign(rval, printing.FunctionPrinter(symbolname)) return rval
def _scal_inplace(symbol): """Replace a symbol definition with an elementwise version of the corresponding scalar Op""" symbolname = symbol.__name__ inplace = symbolname.endswith('_inplace') if inplace: scalar_op = getattr(scal, symbolname[:-len('_inplace')]) inplace_scalar_op = scalar_op.__class__(scal.transfer_type(0)) rval = elemwise.Elemwise(inplace_scalar_op, {0: 0}, name=symbolname) else: scalar_op = getattr(scal, symbolname) rval = elemwise.Elemwise(scalar_op, name=symbolname) if getattr(symbol, '__doc__', False): rval.__doc__ = symbol.__doc__ + '\n' + rval.__doc__ # for the meaning of this see the ./epydoc script # it makes epydoc display rval as if it were a function, not an object rval.__epydoc_asRoutine = symbol rval.__module__ = 'theano.tensor.inplace' def chk(pstate, r): if not r.owner: return False return r.owner.op == rval pprint.assign(chk, printing.FunctionPrinter(symbolname.replace('_inplace', '='))) return rval
def test_fill(self): x = TensorType('float64', [0, 0])('x') y = TensorType('float64', [1, 1])('y') e = Elemwise(scalar.Second(scalar.transfer_type(0)), {0: 0})(x, y) f = gof.CLinker().accept(FunctionGraph([x, y], [e])).make_function() xv = numpy.ones((5, 5)) yv = numpy.random.rand(1, 1) f(xv, yv) assert (xv == yv).all()
def test_fill(self): if not theano.config.cxx: raise SkipTest("G++ not available, so we need to skip this test.") x = TensorType('float64', [0, 0])('x') y = TensorType('float64', [1, 1])('y') e = Elemwise(scalar.Second(scalar.transfer_type(0)), {0: 0})(x, y) f = gof.CLinker().accept(FunctionGraph([x, y], [e])).make_function() xv = numpy.ones((5, 5)) yv = numpy.random.rand(1, 1) f(xv, yv) assert (xv == yv).all()
def test_fill(self): if not theano.config.cxx: raise SkipTest("G++ not available, so we need to skip this test.") x = self.ctype('float64', [0, 0])('x') y = self.ctype('float64', [1, 1])('y') e = self.cop(scalar.Second(scalar.transfer_type(0)), {0: 0})(x, y) f = gof.CLinker().accept(FunctionGraph([x, y], [e])).make_function() xv = self.rand_cval((5, 5)) yv = self.rand_cval((1, 1)) f(xv, yv) assert (xv == yv).all()
def test_fill(self): if not theano.config.cxx: raise SkipTest("G++ not available, so we need to skip this test.") x = self.ctype('float64', [0, 0])('x') y = self.ctype('float64', [1, 1])('y') for linker, op in zip(self.linkers, [self.op, self.cop]): e = op(scalar.Second(scalar.transfer_type(0)), {0: 0})(x, y) f = linker().accept(FunctionGraph([x, y], [e])).make_function() xv = self.rand_cval((5, 5)) yv = self.rand_cval((1, 1)) f(xv, yv) assert (xv == yv).all()
def test_fill(self): if not theano.config.cxx: raise SkipTest("G++ not available, so we need to skip this test.") for linker, op, t, rval in zip( self.linkers, [self.op, self.cop], [self.type, self.ctype], [self.rand_val, self.rand_cval] ): x = t(theano.config.floatX, [0, 0])("x") y = t(theano.config.floatX, [1, 1])("y") e = op(scalar.Second(scalar.transfer_type(0)), {0: 0})(x, y) f = linker().accept(FunctionGraph([x, y], [e])).make_function() xv = rval((5, 5)) yv = rval((1, 1)) f(xv, yv) assert (xv == yv).all()
def with_linker_inplace(self, linker, op, type, rand_val): for xsh, ysh in [ ((5, 5), (5, 5)), ((5, 5), (1, 5)), ((5, 5), (5, 1)), ((1, 1), (1, 1)), ((2, 3, 4, 5), (2, 3, 4, 5)), ((2, 3, 4, 5), (1, 3, 1, 5)), ((2, 3, 4, 5), (1, 1, 1, 1)), ((), ()), ]: x = type(theano.config.floatX, [(entry == 1) for entry in xsh])("x") y = type(theano.config.floatX, [(entry == 1) for entry in ysh])("y") e = op(scalar.Add(scalar.transfer_type(0)), {0: 0})(x, y) f = copy(linker).accept(FunctionGraph([x, y], [e])).make_function() xv = rand_val(xsh) yv = rand_val(ysh) zv = xv + yv f(xv, yv) self.assertTrue((xv == zv).all()) # test Elemwise.infer_shape # the Shape op don't implement c_code! if isinstance(linker, gof.PerformLinker): x = type(theano.config.floatX, [(entry == 1) for entry in xsh])("x") y = type(theano.config.floatX, [(entry == 1) for entry in ysh])("y") e = op(scalar.Add(scalar.transfer_type(0)), {0: 0})(x, y) f = copy(linker).accept(FunctionGraph([x, y], [e.shape])).make_function() xv = rand_val(xsh) yv = rand_val(ysh) zv = xv + yv f(xv, yv) assert xv.shape == zv.shape
def _scal_inplace(symbol): """Replace a symbol definition with an elementwise version of the corresponding scalar Op""" symbolname = symbol.__name__ inplace = symbolname.endswith("_inplace") if inplace: scalar_op = getattr(scal, symbolname[: -len("_inplace")]) inplace_scalar_op = scalar_op.__class__(scal.transfer_type(0)) rval = elemwise.Elemwise(inplace_scalar_op, {0: 0}, name=symbolname) else: scalar_op = getattr(scal, symbolname) rval = elemwise.Elemwise(scalar_op, name=symbolname) if getattr(symbol, "__doc__", False): rval.__doc__ = symbol.__doc__ + "\n" + rval.__doc__ # for the meaning of this see the ./epydoc script # it makes epydoc display rval as if it were a function, not an object rval.__epydoc_asRoutine = symbol rval.__module__ = "theano.tensor.inplace" pprint.assign(rval, printing.FunctionPrinter(symbolname.replace("_inplace", "="))) return rval
# def make_thunk(self, node, storage_map, _, _2): # pass # # def R_op(self, inputs, eval_points): # pass # # def infer_shape(node, input_shapes): # pass scalar_binarize = ScalarBinary(theano.scalar.convert_to_float32, name='scalar_binarize') binarize = T.elemwise.Elemwise(scalar_binarize, name='binarize') binarize_inplace = T.elemwise.Elemwise( ScalarBinary(t.transfer_type(0)), inplace_pattern={0: 0}, name='binarize_inplace', ) theano.printing.pprint.assign(binarize, theano.printing.FunctionPrinter('binarize')) ############################################# ##################Test Modules############### ############################################# class T_scalar_binarize(unittest.TestCase): def test_perform(self):
def inplace_elemwise_optimizer(fgraph): """ Usage: inplace_elemwise_optimizer.optimize(fgraph) Attempts to replace all Broadcast ops by versions of them that operate inplace. It operates greedily: for each Broadcast Op that is encountered, for each output, tries each input to see if it can operate inplace on that input. If so, makes the change and go to the next output or Broadcast Op. Examples -------- x + y + z -> x += y += z (x + y) * (x * y) -> (x += y) *= (x * y) or (x + y) *= (x *= y) """ # We should not validate too often as this takes too much time to # execute! # It is the _dfs_toposort() fct in theano/gof/destroyhandler.py # that takes so much time. # Should we try to use another lib that does toposort? # igraph: http://igraph.sourceforge.net/ # networkx: https://networkx.lanl.gov/ # Should we try to use cython? # Compiling only that fct is not enough, should we try to add the # deque class too? # And init the deque and other list to an upper bound number of # elements? # Maybe Theano should do online toposort as in # http://code.google.com/p/acyclic # # The next longest optimizer is the canonizer phase. # Then I think it is the [io_?]toposort (need to validate) so check if # the solution is also applicable there. # We execute `validate` after this number of change. check_each_change = config.tensor.insert_inplace_optimizer_validate_nb if check_each_change == -1: if len(fgraph.apply_nodes) > 500: check_each_change = 10 else: check_each_change = 1 nb_change_no_validate = 0 chk = fgraph.checkpoint() if fgraph.update_mapping: update_outs = [fgraph.outputs[i] for i in fgraph.update_mapping] else: update_outs = [] for node in list(graph.io_toposort(fgraph.inputs, fgraph.outputs)): op = node.op # gpuarray GpuElemwise inherit from Elemwise if not type(op) == OP: continue # If big graph and the outputs are scalar, do not make it # inplace. if (check_each_change != 1 and all([getattr(o.type, 'ndim', -1) == 0 for o in node.outputs])): continue baseline = op.inplace_pattern protected_inputs = [ f.protected for f in node.fgraph._features if isinstance(f, theano.compile.function_module.Supervisor)] protected_inputs = sum(protected_inputs, []) # flatten the list protected_inputs.extend(fgraph.outputs) candidate_outputs = [i for i in xrange(len(node.outputs)) if i not in baseline] # node inputs that are Constant, already destroyed, # fgraph protected inputs and fgraph outputs can't be used as inplace # target. # Remove here as faster. candidate_inputs = [i for i in xrange(len(node.inputs)) if i not in baseline.values() and not isinstance(node.inputs[i], Constant) and not fgraph.destroyers(node.inputs[i]) and node.inputs[i] not in protected_inputs] verbose = False raised_warning = not verbose for candidate_output in candidate_outputs: # If the output of the node can be established as an update # output of the fgraph, visit the candidate_inputs in an order # that will improve the chances of making the node operate # inplace on the input it's meant to update candidate_out_var = node.outputs[candidate_output] sorted_candidate_inputs = candidate_inputs if candidate_out_var in update_outs: # The candidate output is an update. Sort the # variables in candidate_inputs in the following order: # - Vars corresponding to the actual updated input # (best case scenario is for the node that procudes # an update to operate inplace on the variable to # update) # - Vars computed inplace on the updates input (second # best scenario if for the node to work inplace on # a variable obtained by a chain of inplace on the # variable to update. In some cases, this will be # equivalent to operating inplace on the variable to # update) # - Remaining variables updated_inputs = [] for i, f_out in enumerate(fgraph.outputs): if (f_out is candidate_out_var and i in fgraph.update_mapping): updated_inp_idx = fgraph.update_mapping[i] updated_inputs.append(fgraph.inputs[updated_inp_idx]) updated_vars = [] vars_from_inplace = [] other_vars = [] for inp_idx in candidate_inputs: inp = node.inputs[inp_idx] if inp in updated_inputs: # the candidate input is the actual updated input updated_vars.append(inp_idx) elif (hasattr(fgraph, 'destroy_handler') and inp.owner and any([fgraph.destroy_handler.root_destroyer.get(up_inp, None) is inp.owner for up_inp in updated_inputs])): # the candidate input is a variable computed # inplace on the updated input via a sequence of # one or more inplace operations vars_from_inplace.append(inp_idx) else: other_vars.append(inp_idx) sorted_candidate_inputs = (updated_vars + vars_from_inplace + other_vars) for candidate_input in sorted_candidate_inputs: # remove inputs that don't have the same dtype as the output if node.inputs[candidate_input].type != node.outputs[ candidate_output].type: continue inplace_pattern = dict(baseline) inplace_pattern[candidate_output] = candidate_input try: if hasattr(op.scalar_op, "make_new_inplace"): new_scal = op.scalar_op.make_new_inplace( scalar.transfer_type( *[inplace_pattern.get(i, o.dtype) for i, o in enumerate(node.outputs)])) else: new_scal = op.scalar_op.__class__( scalar.transfer_type( *[inplace_pattern.get(i, None) for i in xrange(len(node.outputs))])) new_outputs = OP(new_scal, inplace_pattern)( *node.inputs, **dict(return_list=True)) new_node = new_outputs[0].owner for r, new_r in zip(node.outputs, new_outputs): fgraph.replace(r, new_r, reason="inplace_elemwise_optimizer") nb_change_no_validate += 1 if nb_change_no_validate >= check_each_change: fgraph.validate() chk = fgraph.checkpoint() nb_change_no_validate = 0 except (ValueError, InconsistencyError) as e: if check_each_change != 1 and not raised_warning: print(("Some inplace optimization was not " "performed due to unexpected error:"), file=sys.stderr) print(e, file=sys.stderr) raised_warning = True fgraph.revert(chk) continue candidate_inputs.remove(candidate_input) node = new_node baseline = inplace_pattern break if nb_change_no_validate > 0: try: fgraph.validate() except Exception: if not raised_warning: print(("Some inplace optimization was not " "performed due to unexpected error"), file=sys.stderr) fgraph.revert(chk)
"library", "tensor", "nnet", "sigmoid_prec.png", ) plt.savefig(fname) print("New picture saved at", fname) print(val_ultra.max()) print(val_ultra.min()) scalar_sigmoid = ScalarSigmoid(scalar.upgrade_to_float, name="scalar_sigmoid") sigmoid = elemwise.Elemwise(scalar_sigmoid, name="sigmoid") sigmoid_inplace = elemwise.Elemwise( ScalarSigmoid(scalar.transfer_type(0)), inplace_pattern={0: 0}, name="sigmoid_inplace", ) pprint.assign(sigmoid, printing.FunctionPrinter("sigmoid")) class UltraFastScalarSigmoid(scalar.UnaryScalarOp): """ This is just speed opt. Not for stability. """ @staticmethod def st_impl(x): x = 0.5 * x
ax.grid(True) ax.legend(("sigmoid", "ultra_fast", "hard"), "upper left") fname = os.path.join( os.path.dirname(theano.__file__), "..", "doc", "library", "tensor", "nnet", "sigmoid_prec.png" ) plt.savefig(fname) print("New picture saved at", fname) print(val_ultra.max()) print(val_ultra.min()) scalar_sigmoid = ScalarSigmoid(scalar.upgrade_to_float, name="scalar_sigmoid") sigmoid = elemwise.Elemwise(scalar_sigmoid, name="sigmoid") sigmoid_inplace = elemwise.Elemwise( ScalarSigmoid(scalar.transfer_type(0)), inplace_pattern={0: 0}, name="sigmoid_inplace" ) pprint.assign(sigmoid, printing.FunctionPrinter("sigmoid")) class UltraFastScalarSigmoid(scalar.UnaryScalarOp): """ This is just speed opt. Not for stability. """ @staticmethod def st_impl(x): x = 0.5 * x # The if is a tanh approximate.
else: raise NotImplementedError('only floatingpoint is implemented') def c_code_cache_version(self): v = super(ScalarSigmoid, self).c_code_cache_version() if v: return (2, ) + v else: return v scalar_sigmoid = ScalarSigmoid(scalar.upgrade_to_float, name='scalar_sigmoid') sigmoid = elemwise.Elemwise(scalar_sigmoid, name='sigmoid') sigmoid_inplace = elemwise.Elemwise( ScalarSigmoid(scalar.transfer_type(0)), inplace_pattern={0: 0}, name='sigmoid_inplace', ) pprint.assign(sigmoid, printing.FunctionPrinter('sigmoid')) class ScalarSoftplus(scalar.UnaryScalarOp): @staticmethod def static_impl(x): if x < -30.0: return 0.0 if x > 30.0: return x return numpy.log1p(numpy.exp(x))
ax.grid(True) ax.legend(("sigmoid", "ultra_fast", "hard"), "upper left") fname = os.path.join(os.path.dirname(theano.__file__), '..', 'doc', 'library', 'tensor', 'nnet', 'sigmoid_prec.png') plt.savefig(fname) print "New picture saved at", fname print val_ultra.max() print val_ultra.min() scalar_sigmoid = ScalarSigmoid(scalar.upgrade_to_float, name='scalar_sigmoid') sigmoid = elemwise.Elemwise(scalar_sigmoid, name='sigmoid') sigmoid_inplace = elemwise.Elemwise( ScalarSigmoid(scalar.transfer_type(0)), inplace_pattern={0: 0}, name='sigmoid_inplace', ) pprint.assign(sigmoid, printing.FunctionPrinter('sigmoid')) class UltraFastScalarSigmoid(scalar.UnaryScalarOp): """ This is just speed opt. Not for stability. """ @staticmethod def st_impl(x): x = 0.5 * x # The if is a tanh approximate.
return """%(z)s = %(x)s < -88.0f ? 0.0 : %(x)s > 15.0f ? 1.0f : 1.0f /(1.0f + exp(-%(x)s));""" % locals() elif node.inputs[0].type == scalar.float64: return """%(z)s = %(x)s < -709.0 ? 0.0 : %(x)s > 19.0 ? 1.0 : 1.0 /(1.0+exp(-%(x)s));""" % locals() else: raise NotImplementedError('only floatingpoint is implemented') def c_code_cache_version(self): v = super(ScalarSigmoid, self).c_code_cache_version() if v: return (2,) + v else: return v scalar_sigmoid = ScalarSigmoid(scalar.upgrade_to_float, name='scalar_sigmoid') sigmoid = elemwise.Elemwise(scalar_sigmoid, name='sigmoid') sigmoid_inplace = elemwise.Elemwise( ScalarSigmoid(scalar.transfer_type(0)), inplace_pattern={0:0}, name='sigmoid_inplace', ) pprint.assign(sigmoid, printing.FunctionPrinter('sigmoid')) class ScalarSoftplus(scalar.UnaryScalarOp): @staticmethod def static_impl(x): if x < -30.0: return 0.0 if x > 30.0: return x return numpy.log1p(numpy.exp(x))