def test_shared_state2(self): a = T.scalar() # the a is for 'anonymous' (un-named). x, s = T.scalars('xs') f = function([ x, In(a, value=1.0, name='a'), In(s, value=0.0, update=s + a * x, mutable=False) ], s + a * x) g = function( [x, In(a, value=1.0, name='a'), In(s, value=f.container[s])], s + a * x) f(1, 2) self.assertTrue(f[s] == 2) self.assertTrue(g[s] == 2) f(1, 2) self.assertTrue(f[s] == 4) self.assertTrue(g[s] == 4) g(1, 2) # has no effect on state self.assertTrue(f[s] == 4) self.assertTrue(g[s] == 4)
def __init__(self): a = T.scalar() # the a is for 'anonymous' (un-named). x, s = T.scalars('xs') v = T.vector('v') self.s = s self.x = x self.v = v self.e = a * x + s self.f1 = function([ x, In(a, value=1.0, name='a'), In(s, value=0.0, update=s + a * x, mutable=True) ], s + a * x) self.f2 = function([ x, In(a, value=1.0, name='a'), In(s, value=self.f1.container[s], update=s + a * x, mutable=True) ], s + a * x)
def test_weird_names(self): a, x, s = T.scalars('xxx') checkfor(self, lambda: function([In(a, name=[])], []), TypeError) def t(): f = function([ In(a, name=set(['adsf', ()]), value=1.0), In(x, name=(), value=2.0), In(s, name=T.scalar(), value=3.0) ], a + x + s) checkfor(self, t, TypeError)
def test_naming_rule3(self): a = T.scalar() # the a is for 'anonymous' (un-named). x, s = T.scalars('xs') # x's name is not ignored (as in test_naming_rule2) because a has a default value. f = function([x, In(a, value=1.0), s], a / s + x) self.assertTrue(f(9, 2, 4) == 9.5) # can specify all args in order self.assertTrue(f(9, 2, s=4) == 9.5) # can give s as kwarg self.assertTrue(f(9, s=4) == 9.25) # can give s as kwarg, get default a self.assertTrue(f(x=9, s=4) == 9.25) # can give s as kwarg, omit a, x as kw checkfor(self, lambda: f(x=9, a=2, s=4), TypeError) # got unexpected keyword argument 'a' checkfor(self, lambda: f(), TypeError) # takes exactly 3 non-keyword arguments (0 given) checkfor(self, lambda: f(x=9), TypeError) # takes exactly 3 non-keyword arguments (1 given)
def test_deepcopy_shared_container(self): # Ensure that shared containers remain shared after a deep copy. a, x = T.scalars('ax') h = function([In(a, value=0.0)], a) f = function([x, In(a, value=h.container[a], implicit=True)], x + a) try: memo = {} ac = copy.deepcopy(a) memo.update({id(a): ac}) hc = copy.deepcopy(h, memo=memo) memo.update({id(h): hc}) fc = copy.deepcopy(f, memo=memo) except NotImplementedError as e: if e[0].startswith('DebugMode is not picklable'): return else: raise h[a] = 1 hc[ac] = 2 self.assertTrue(f[a] == 1) self.assertTrue(fc[ac] == 2)
def test_shared_state1(self): a = tt.scalar() # the a is for 'anonymous' (un-named). x, s = tt.scalars("xs") f = function( [ x, In(a, value=1.0, name="a"), In(s, value=0.0, update=s + a * x, mutable=True), ], s + a * x, ) g = function( [x, In(a, value=1.0, name="a"), In(s, value=f.container[s])], s + a * x ) f(1, 2) assert f[s] == 2 assert g[s] == 2 f(1, 2) g(1, 2) assert f[s] == 4 assert g[s] == 4
def test_in_allow_downcast_vector_floatX(self): a = theano.tensor.fvector('a') b = theano.tensor.fvector('b') c = theano.tensor.fvector('c') f = theano.function([ In(a, allow_downcast=True), In(b, allow_downcast=False), In(c, allow_downcast=None) ], (a + b + c)) # If the values can be accurately represented, everything is OK z = [0] assert numpy.all(f(z, z, z) == 0) # If allow_downcast is True, idem assert numpy.allclose(f([0.1], z, z), 0.1) # If allow_downcast is False, nope self.assertRaises(TypeError, f, z, [0.1], z) # If allow_downcast is None, like False self.assertRaises(TypeError, f, z, z, [0.1])
def test_naming_rule4(self): a = T.scalar() # the a is for 'anonymous' (un-named). x,s = T.scalars('xs') f = function([x, In(a, value=1.0,name='a'), s], a/s+x) self.assertTrue(f(9,2,4) == 9.5) #can specify all args in order self.assertTrue(f(9,2,s=4) == 9.5) # can give s as kwarg self.assertTrue(f(9,s=4) == 9.25) # can give s as kwarg, get default a self.assertTrue(f(9,a=2,s=4) == 9.5) # can give s as kwarg, a as kwarg self.assertTrue(f(x=9,a=2, s=4) == 9.5) # can give all kwargs self.assertTrue(f(x=9,s=4) == 9.25) # can give all kwargs checkfor(self, lambda:f(), TypeError) #takes exactly 3 non-keyword arguments (0 given) checkfor(self, lambda:f(5.0,x=9), TypeError) #got multiple values for keyword argument 'x'
def test_copy(self): a = T.scalar() # the a is for 'anonymous' (un-named). x, s = T.scalars('xs') f = function([x, In(a, value=1.0, name='a'), In(s, value=0.0, update=s + a * x, mutable=True)], s + a * x) g = copy.copy(f) # if they both return, assume that they return equivalent things. self.assertFalse(g.container[x].storage is f.container[x].storage) self.assertFalse(g.container[a].storage is f.container[a].storage) self.assertFalse(g.container[s].storage is f.container[s].storage) self.assertFalse(g.value[a] is not f.value[a]) # should not have been copied self.assertFalse(g.value[s] is f.value[s]) # should have been copied because it is mutable. self.assertFalse((g.value[s] != f.value[s]).any()) # its contents should be identical self.assertTrue(f(2, 1) == g(2)) # they should be in sync, default value should be copied. self.assertTrue(f(2, 1) == g(2)) # they should be in sync, default value should be copied. f(1, 2) # put them out of sync self.assertFalse(f(1, 2) == g(1, 2)) # they should not be equal anymore.
def test_in_allow_downcast_floatX(self): a = theano.tensor.fscalar('a') b = theano.tensor.fscalar('b') c = theano.tensor.fscalar('c') f = theano.function([In(a, allow_downcast=True), In(b, allow_downcast=False), In(c, allow_downcast=None)], (a + b + c)) # If the values can be accurately represented, everything is OK assert numpy.all(f(0, 0, 0) == 0) # If allow_downcast is True, idem assert numpy.allclose(f(0.1, 0, 0), 0.1) # If allow_downcast is False, nope self.assertRaises(TypeError, f, 0, 0.1, 0) # If allow_downcast is None, it should work iff floatX=float32 if theano.config.floatX == 'float32': assert numpy.allclose(f(0, 0, 0.1), 0.1) else: self.assertRaises(TypeError, f, 0, 0, 0.1)
def test_deepcopy_trust_input(self): a = T.dscalar() # the a is for 'anonymous' (un-named). x, s = T.dscalars('xs') f = function([ x, In(a, value=1.0, name='a'), In(s, value=0.0, update=s + a * x, mutable=True) ], s + a * x) f.trust_input = True try: g = copy.deepcopy(f) except NotImplementedError as e: if e[0].startswith('DebugMode is not picklable'): return else: raise self.assertTrue(f.trust_input is g.trust_input) f(np.asarray(2.)) self.assertRaises((ValueError, AttributeError, theano.compile.debugmode.InvalidValueError), f, 2.) g(np.asarray(2.)) self.assertRaises((ValueError, AttributeError, theano.compile.debugmode.InvalidValueError), g, 2.)
def test_in_update_shared(self): # Test that using both In() with updates and shared variables with # updates in the same function behaves as expected shared_var = theano.shared(1.0) a = theano.tensor.dscalar("a") a_wrapped = In(a, value=0.0, update=shared_var) f = function([a_wrapped], [], updates={shared_var: a}, mode="FAST_RUN") # Ensure that, through the executions of the function, the state of # the input and the shared variable are appropriate (after N execution, # the values have swapped N times). This allows testing that the # changes occur at the same time and one doesn't overwrite the other. for i in range(5): f() assert np.allclose(shared_var.get_value(), i % 2)
def test_multiple_functions(self): a = T.scalar() # the a is for 'anonymous' (un-named). x,s = T.scalars('xs') v = T.vector('v') # put in some inputs list_of_things = [s, x, v] # some derived thing, whose inputs aren't all in the list list_of_things.append(a * x + s ) f1 = function([x, In(a, value=1.0,name='a'), In(s, value=0.0, update=s+a*x, mutable=True)], s+a*x) list_of_things.append(f1) # now put in a function sharing container with the previous one f2 = function([x, In(a, value=1.0,name='a'), In(s, value=f1.container[s], update=s+a*x, mutable=True)], s+a*x) list_of_things.append(f2) assert isinstance(f2.container[s].storage, list) assert f2.container[s].storage is f1.container[s].storage # now put in a function with non-scalar v_value = numpy.asarray([2,3,4.], dtype=config.floatX) f3 = function([x, In(v, value=v_value)], x+v) list_of_things.append(f3) # try to pickle the entire things try: saved_format = cPickle.dumps(list_of_things, protocol=-1) new_list_of_things = cPickle.loads(saved_format) except NotImplementedError, e: if e[0].startswith('DebugMode is not picklable'): return else: raise
def test_state_access(self): a = tt.scalar() # the a is for 'anonymous' (un-named). x, s = tt.scalars("xs") f = function( [x, In(a, value=1.0, name="a"), In(s, value=0.0, update=s + a * x)], s + a * x, ) assert f[a] == 1.0 assert f[s] == 0.0 assert f(3.0) == 3.0 assert f(3.0, a=2.0) == 9.0 # 3.0 + 2*3.0 assert ( f[a] == 1.0 ) # state hasn't changed permanently, we just overrode it last line assert f[s] == 9.0 f[a] = 5.0 assert f[a] == 5.0 assert f(3.0) == 24.0 # 9 + 3*5 assert f[s] == 24.0
def test_weird_names(self): a, x, s = tt.scalars("xxx") checkfor(self, lambda: function([In(a, name=[])], []), TypeError) def t(): f = function( [ In(a, name={"adsf", ()}, value=1.0), In(x, name=(), value=2.0), In(s, name=tt.scalar(), value=3.0), ], a + x + s, ) return f checkfor(self, t, TypeError)
def pfunc( params, outputs=None, mode=None, updates=None, givens=None, no_default_updates=False, accept_inplace=False, name=None, rebuild_strict=True, allow_input_downcast=None, profile=None, on_unused_input=None, output_keys=None, ): """ Function-constructor for graphs with shared variables. Parameters ---------- params : list of either Variable or In instances Function parameters, these are not allowed to be shared variables. outputs : list of Variables or Out instances Expressions to compute. mode : string or `theano.compile.Mode` instance Compilation mode. updates : iterable over pairs (shared_variable, new_expression). List, tuple or dict. Update the values for SharedVariable inputs according to these expressions givens : iterable over pairs (Var1, Var2) of Variables. List, tuple or dict. The Var1 and Var2 in each pair must have the same Type. Specific substitutions to make in the computation graph (Var2 replaces Var1). no_default_updates : either bool or list of Variables If True, do not perform any automatic update on Variables. If False (default), perform them all. Else, perform automatic updates on all Variables that are neither in "updates" nor in "no_default_updates". accept_inplace : bool True iff the graph can contain inplace operations prior to the optimization phase (default is False). *Note* this parameter is unsupported, and its use is not recommended. name : None or string Attaches a name to the profiling result of this function. allow_input_downcast : bool True means that the values passed as inputs when calling the function can be silently downcasted to fit the dtype of the corresponding Variable, which may lose precision. False means that it will only be cast to a more general, or precise, type. None (default) is almost like False, but allows downcasting of Python float scalars to floatX. profile : None, True, str, or ProfileStats instance Accumulate profiling information into a given ProfileStats instance. None is the default, and means to use the value of config.profile. If argument is `True` then a new ProfileStats instance will be used. If argument is a string, a new ProfileStats instance will be created with that string as its `message` attribute. This profiling object will be available via self.profile. on_unused_input : {'raise', 'warn','ignore', None} What to do if a variable in the 'inputs' list is not used in the graph. Returns ------- theano.compile.Function A callable object that will compute the outputs (given the inputs) and update the implicit function arguments according to the `updates`. Notes ----- Regarding givens: Be careful to make sure that these substitutions are independent--behaviour when Var1 of one pair appears in the graph leading to Var2 in another expression is undefined. Replacements specified with givens are different from optimizations in that Var2 is not expected to be equivalent to Var1. """ # # This function works by cloning the graph (except for the # inputs), and then shipping it off to compile.function (There it # will be cloned again, unnecessarily, because it doesn't know # that we already cloned it.) # # First, it clones the replacements named in the givens argument, # and points each Var1 to the clone of Var2. Then it sets the # inputs in the clone dictionary. After these steps, we are # assuming that the clone dictionary contains all the inputs to # the computation graph. # # Then it clones the outputs and the update expressions. This # rebuilds a computation graph from the inputs and the givens. # if updates is None: updates = [] if givens is None: givens = [] if profile is None: profile = config.profile or config.print_global_stats # profile -> True or False if profile is False: profile = None if profile is True: profile = ProfileStats(message=name) # profile -> object elif type(profile) == str: profile = ProfileStats(message=profile) # profile is typically either False or an object at this point. # No need to block other objects being passed through though. It might be # useful. if not isinstance(params, (list, tuple)): raise Exception("in pfunc() the first argument must be a list or " "a tuple") if not isinstance(no_default_updates, bool) and not isinstance( no_default_updates, list ): raise TypeError("no_default_update should be either a boolean or " "a list") if len(updates) > 0 and any( isinstance(v, Variable) for v in iter_over_pairs(updates) ): raise ValueError( "The updates parameter must be an OrderedDict/dict or a list of " "lists/tuples with 2 elements" ) # transform params into theano.compile.In objects. inputs = [ _pfunc_param_to_in(p, allow_downcast=allow_input_downcast) for p in params ] # Check if some variable is present more than once in inputs in_variables = [input.variable for input in inputs] for i, v in enumerate(in_variables): if v in in_variables[(i + 1) :]: dup_v_i = in_variables.index(v, (i + 1)) raise UnusedInputError( "Variable %s is used twice in inputs to theano.function, " "at indices %i and %i. This would result in values " "provided for it being ignored. Please do not duplicate " "variables in the inputs list." % (v, i, dup_v_i) ) # Check that we are not using `givens` to replace input variables, because # this typically does nothing, contrary to what one may expect. in_var_set = set(in_variables) try: givens_pairs = list(givens.items()) except AttributeError: givens_pairs = givens for x, y in givens_pairs: if x in in_var_set: raise RuntimeError( "You are trying to replace variable '%s' through the " "`givens` parameter, but this variable is an input to your " "function. Replacing inputs is currently forbidden because it " "has no effect. One way to modify an input `x` to a function " "evaluating f(x) is to define a new input `y` and use " "`theano.function([y], f(x), givens={x: g(y)})`. Another " "solution consists in using `theano.clone`, e.g. like this: " "`theano.function([x], " "theano.clone(f(x), replace={x: g(x)}))`." % x ) # Extend the outputs with the updates on input variables so they are also # cloned additional_outputs = [i.update for i in inputs if i.update] if outputs is None: out_list = [] else: if isinstance(outputs, (list, tuple)): out_list = list(outputs) else: out_list = [outputs] extended_outputs = out_list + additional_outputs output_vars = rebuild_collect_shared( extended_outputs, in_variables, replace=givens, updates=updates, rebuild_strict=rebuild_strict, copy_inputs_over=True, no_default_updates=no_default_updates, ) # extracting the arguments input_variables, cloned_extended_outputs, other_stuff = output_vars clone_d, update_d, update_expr, shared_inputs = other_stuff # Recover only the clones of the original outputs if outputs is None: cloned_outputs = [] else: if isinstance(outputs, (list, tuple)): cloned_outputs = cloned_extended_outputs[: len(outputs)] else: cloned_outputs = cloned_extended_outputs[0] for i, iv in zip(inputs, input_variables): i.variable = iv # If needed, replace the input's update by its cloned equivalent if i.update: i.update = clone_d[i.update] for sv in shared_inputs: # pass value of None # value will be stored in the resulting functions' defaults # list but since the value of shared variables never needs to # be refed, it is not needed if sv in update_d: si = In( variable=sv, value=sv.container, mutable=True, borrow=True, update=update_d[sv], shared=True, ) else: si = In( variable=sv, value=sv.container, mutable=False, borrow=True, shared=True ) inputs.append(si) return orig_function( inputs, cloned_outputs, mode, accept_inplace=accept_inplace, name=name, profile=profile, on_unused_input=on_unused_input, output_keys=output_keys, )
def fn(): x, s = T.scalars('xs') function([In(x, update=((s * s) + x))], x)
def fn(): x, s = T.scalars('xs') function([In(x, update=s + x)], x)
def t(): f = function([ In(a, name=set(['adsf', ()]), value=1.0), In(x, name=(), value=2.0), In(s, name=T.scalar(), value=3.0) ], a + x + s)
def __init__(self, num_hidden, num_classes, context_win_size, embeddings, featdim=0, fine_tuning=False, truncate_gradient=-1): """ num_hidden :: dimension of the hidden layer num_classes :: number of classes context_win_size :: word window context size embeddings :: matrix """ # hyper parameters of the model self.hyperparams = {} # nh :: dimension of the hidden layer nh = num_hidden self.hyperparams['nh'] = nh # nc :: number of classes nc = num_classes self.hyperparams['nc'] = nc # de :: dimension of the word embeddings de = embeddings.shape[1] self.hyperparams['de'] = de # cs :: word window context size cs = context_win_size self.hyperparams['cs'] = cs self.hyperparams['featdim'] = featdim self.hyperparams['fine_tuning'] = fine_tuning self.hyperparams['truncate_gradient'] = truncate_gradient # parameters of the model self.emb = theano.shared(embeddings.astype(theano.config.floatX)) # inputs idxs = T.imatrix() w = T.fscalar('w') x = self.emb[idxs].reshape((idxs.shape[0], de * cs))*w y = T.iscalar('y') y_sentence = T.ivector('y_sentence') f = T.matrix('f') f.reshape((idxs.shape[0], featdim)) # forward parameters of the model self.fWx = theano.shared(0.2 * np.random.uniform(-1.0, 1.0, (de * cs, nh)).astype(theano.config.floatX)) self.fWh = theano.shared(0.2 * np.random.uniform(-1.0, 1.0, (nh, nh)).astype(theano.config.floatX)) self.fbh = theano.shared(np.zeros(nh, dtype=theano.config.floatX)) self.fh0 = theano.shared(np.zeros(nh, dtype=theano.config.floatX)) fparams = [self.fWx, self.fWh, self.fbh, self.fh0] fnames = ['fWx', 'fWh', 'fbh', 'fh0'] def frecurrence(x_t, h_tm1): h_t = T.nnet.sigmoid(T.dot(x_t, self.fWx) + T.dot(h_tm1, self.fWh) + self.fbh) return h_t fh, _ = theano.scan(fn=frecurrence, sequences=x, outputs_info=[self.fh0], n_steps=x.shape[0], truncate_gradient=truncate_gradient) # backwards parameters of the model self.bWx = theano.shared(0.2 * np.random.uniform(-1.0, 1.0, (de * cs, nh)).astype(theano.config.floatX)) self.bWh = theano.shared(0.2 * np.random.uniform(-1.0, 1.0, (nh, nh)).astype(theano.config.floatX)) self.bbh = theano.shared(np.zeros(nh, dtype=theano.config.floatX)) self.bh0 = theano.shared(np.zeros(nh, dtype=theano.config.floatX)) bparams = [self.bWx, self.bWh, self.bbh, self.bh0] bnames = ['bWx', 'bWh', 'bbh', 'bh0'] def brecurrence(x_t, h_tm1): h_t = T.nnet.sigmoid(T.dot(x_t, self.bWx) + T.dot(h_tm1, self.bWh) + self.bbh) return h_t bh, _ = theano.scan(fn=brecurrence, sequences=x, outputs_info=[self.bh0], n_steps=x.shape[0], go_backwards=True, truncate_gradient=truncate_gradient) # inverting backwards hidden bh = bh[::-1] # concatenation parameters self.bW = theano.shared(0.2 * np.random.uniform(-1.0, 1.0, (nh+featdim, nc)).astype(theano.config.floatX)) self.fW = theano.shared(0.2 * np.random.uniform(-1.0, 1.0, (nh+featdim, nc)).astype(theano.config.floatX)) self.b = theano.shared(np.zeros(nc, dtype=theano.config.floatX)) # adding features if featdim > 0: fh_final = T.concatenate([fh, f], axis=1) bh_final = T.concatenate([bh, f], axis=1) else: fh_final = fh bh_final = bh # "concatenating" forward and backward hidden states h = T.dot(bh_final, self.bW) + T.dot(fh_final, self.fW) s = T.nnet.softmax(h + self.b) p_y_given_x_lastword = s[-1, :] p_y_given_x_sentence = s self.params = fparams + bparams + [self.bW, self.fW, self.b] self.names = fnames + bnames + ['bW', 'fW', 'b'] if fine_tuning: self.params.append(self.emb) self.names.append("emb") # prediction y_pred = T.argmax(p_y_given_x_sentence, axis=1) # cost functions sentence_nll = -T.mean(T.log(p_y_given_x_sentence) [T.arange(x.shape[0]), y_sentence]) nll = -T.mean(T.log(p_y_given_x_lastword)[y]) # gradients sentence_gradients = T.grad(sentence_nll, self.params) gradients = T.grad(nll, self.params) # learning rate lr = T.scalar('lr') # updates sentence_updates = OrderedDict((p, p - lr * g) for p, g in zip(self.params, sentence_gradients)) updates = OrderedDict((p, p - lr * g) for p, g in zip(self.params, gradients)) # theano functions self.classify = theano.function(inputs=[idxs, f, In(w, value=1.0)], outputs=y_pred, on_unused_input='ignore') self.sentence_train = theano.function(inputs=[idxs, f, y_sentence, lr, In(w, value=1.0)], outputs=sentence_nll, updates=sentence_updates, on_unused_input='ignore') self.train = theano.function(inputs=[idxs, f, y, lr, In(w, value=1.0)], outputs=nll, updates=updates, on_unused_input='ignore') self.predict = theano.function(inputs=[idxs, f, In(w, value=1.0)], outputs=p_y_given_x_sentence, on_unused_input='ignore') self.normalize = theano.function(inputs=[], updates={self.emb:\ self.emb/T.sqrt((self.emb**2).sum(axis=1)).dimshuffle(0, 'x')})
def test_in_shared_variable(self): # Ensure that an error is raised if the In wrapped is used to wrap # a shared variable a = theano.shared(1.0) a_wrapped = In(a, update=a + 1) self.assertRaises(TypeError, theano.function, [a_wrapped])
def fn(): x,s = T.scalars('xs') fn = function([In(x, update=mul(s,s)+x)], x)
def __init__(self, num_hidden, num_classes, context_win_size, embeddings, featdim=0, fine_tuning=False, truncate_gradient=-1): """ num_hidden :: dimension of the hidden layer num_classes :: number of classes embeddings :: matrix featdim :: size of the features """ # hyper parameters of the model self.hyperparams = {} # nh :: dimension of the hidden layer nh = num_hidden self.hyperparams['nh'] = nh # nc :: number of classes nc = num_classes self.hyperparams['nc'] = nc # de :: dimension of the word embeddings de = embeddings.shape[1] self.hyperparams['de'] = de # cs :: word window context size cs = context_win_size self.hyperparams['cs'] = cs self.hyperparams['featdim'] = featdim self.hyperparams['fine_tuning'] = fine_tuning self.hyperparams['truncate_gradient'] = truncate_gradient # add one for PADDING at the end self.emb = theano.shared(embeddings.astype(theano.config.floatX)) n_in = de * cs n_hidden = n_i = n_c = n_o = n_f = nh n_y = nc idxs = T.imatrix() w = T.fscalar('w') # as many columns as context window size/lines as words in the sentence x = self.emb[idxs].reshape((idxs.shape[0], de * cs)) * w f = T.matrix('f') f.reshape((idxs.shape[0], featdim)) y = T.iscalar('y') # label y_sentence = T.ivector('y_sentence') # forward weights self.fW_xi = theano.shared(0.2 * uniform(-1.0, 1.0, (n_in, n_i)).astype(dtype)) self.fW_hi = theano.shared( 0.2 * uniform(-1.0, 1.0, (n_hidden, n_i)).astype(dtype)) self.fW_ci = theano.shared(0.2 * uniform(-1.0, 1.0, (n_c, n_i)).astype(dtype)) self.fb_i = theano.shared(np.cast[dtype](uniform(-0.5, .5, size=n_i))) self.fW_xf = theano.shared(0.2 * uniform(-1.0, 1.0, (n_in, n_f)).astype(dtype)) self.fW_hf = theano.shared( 0.2 * uniform(-1.0, 1.0, (n_hidden, n_f)).astype(dtype)) self.fW_cf = theano.shared(0.2 * uniform(-1.0, 1.0, (n_c, n_f)).astype(dtype)) self.fb_f = theano.shared(np.cast[dtype](uniform(0, 1., size=n_f))) self.fW_xc = theano.shared(0.2 * uniform(-1.0, 1.0, (n_in, n_c)).astype(dtype)) self.fW_hc = theano.shared( 0.2 * uniform(-1.0, 1.0, (n_hidden, n_c)).astype(dtype)) self.fb_c = theano.shared(np.zeros(n_c, dtype=dtype)) self.fW_xo = theano.shared(0.2 * uniform(-1.0, 1.0, (n_in, n_o)).astype(dtype)) self.fW_ho = theano.shared( 0.2 * uniform(-1.0, 1.0, (n_hidden, n_o)).astype(dtype)) self.fW_co = theano.shared(0.2 * uniform(-1.0, 1.0, (n_c, n_o)).astype(dtype)) self.fb_o = theano.shared(np.cast[dtype](uniform(-0.5, .5, size=n_o))) self.fc0 = theano.shared(np.zeros(n_hidden, dtype=dtype)) self.fh0 = T.tanh(self.fc0) fparams = [ self.fW_xi, self.fW_hi, self.fW_ci, self.fb_i, self.fW_xf, self.fW_hf, self.fW_cf, self.fb_f, self.fW_xc, self.fW_hc, self.fb_c, self.fW_xo, self.fW_ho, self.fW_co, self.fb_o, self.fc0 ] fnames = [ 'fW_xi', 'fW_hi', 'fW_ci', 'fb_i', 'fW_xf', 'fW_hf', 'fW_cf', 'fb_f', 'fW_xc', 'fW_hc', 'fb_c', 'fW_xo', 'fW_ho', 'fW_co', 'fb_o', 'fc0' ] def frecurrence(x_t, h_tm1, c_tm1): i_t = sigma( theano.dot(x_t, self.fW_xi) + theano.dot(h_tm1, self.fW_hi) + theano.dot(c_tm1, self.fW_ci) + self.fb_i) f_t = sigma( theano.dot(x_t, self.fW_xf) + theano.dot(h_tm1, self.fW_hf) + theano.dot(c_tm1, self.fW_cf) + self.fb_f) c_t = f_t * c_tm1 + i_t * T.tanh( theano.dot(x_t, self.fW_xc) + theano.dot(h_tm1, self.fW_hc) + self.fb_c) o_t = sigma( theano.dot(x_t, self.fW_xo) + theano.dot(h_tm1, self.fW_ho) + theano.dot(c_t, self.fW_co) + self.fb_o) h_t = o_t * T.tanh(c_t) return [h_t, c_t] [ fh, _, ], _ = theano.scan(fn=frecurrence, sequences=[x], outputs_info=[self.fh0, self.fc0], n_steps=x.shape[0], truncate_gradient=truncate_gradient) # backward weights self.bW_xi = theano.shared(0.2 * uniform(-1.0, 1.0, (n_in, n_i)).astype(dtype)) self.bW_hi = theano.shared( 0.2 * uniform(-1.0, 1.0, (n_hidden, n_i)).astype(dtype)) self.bW_ci = theano.shared(0.2 * uniform(-1.0, 1.0, (n_c, n_i)).astype(dtype)) self.bb_i = theano.shared(np.cast[dtype](uniform(-0.5, .5, size=n_i))) self.bW_xf = theano.shared(0.2 * uniform(-1.0, 1.0, (n_in, n_f)).astype(dtype)) self.bW_hf = theano.shared( 0.2 * uniform(-1.0, 1.0, (n_hidden, n_f)).astype(dtype)) self.bW_cf = theano.shared(0.2 * uniform(-1.0, 1.0, (n_c, n_f)).astype(dtype)) self.bb_f = theano.shared(np.cast[dtype](uniform(0, 1., size=n_f))) self.bW_xc = theano.shared(0.2 * uniform(-1.0, 1.0, (n_in, n_c)).astype(dtype)) self.bW_hc = theano.shared( 0.2 * uniform(-1.0, 1.0, (n_hidden, n_c)).astype(dtype)) self.bb_c = theano.shared(np.zeros(n_c, dtype=dtype)) self.bW_xo = theano.shared(0.2 * uniform(-1.0, 1.0, (n_in, n_o)).astype(dtype)) self.bW_ho = theano.shared( 0.2 * uniform(-1.0, 1.0, (n_hidden, n_o)).astype(dtype)) self.bW_co = theano.shared(0.2 * uniform(-1.0, 1.0, (n_c, n_o)).astype(dtype)) self.bb_o = theano.shared(np.cast[dtype](uniform(-0.5, .5, size=n_o))) self.bc0 = theano.shared(np.zeros(n_hidden, dtype=dtype)) self.bh0 = T.tanh(self.bc0) bparams = [ self.bW_xi, self.bW_hi, self.bW_ci, self.bb_i, self.bW_xf, self.bW_hf, self.bW_cf, self.bb_f, self.bW_xc, self.bW_hc, self.bb_c, self.bW_xo, self.bW_ho, self.bW_co, self.bb_o, self.bc0 ] bnames = [ 'bW_xi', 'bW_hi', 'bW_ci', 'bb_i', 'bW_xf', 'bW_hf', 'bW_cf', 'bb_f', 'bW_xc', 'bW_hc', 'bb_c', 'bW_xo', 'bW_ho', 'bW_co', 'bb_o', 'bc0' ] def brecurrence(x_t, h_tm1, c_tm1): i_t = sigma( theano.dot(x_t, self.bW_xi) + theano.dot(h_tm1, self.bW_hi) + theano.dot(c_tm1, self.bW_ci) + self.bb_i) f_t = sigma( theano.dot(x_t, self.bW_xf) + theano.dot(h_tm1, self.bW_hf) + theano.dot(c_tm1, self.bW_cf) + self.bb_f) c_t = f_t * c_tm1 + i_t * T.tanh( theano.dot(x_t, self.bW_xc) + theano.dot(h_tm1, self.bW_hc) + self.bb_c) o_t = sigma( theano.dot(x_t, self.bW_xo) + theano.dot(h_tm1, self.bW_ho) + theano.dot(c_t, self.bW_co) + self.bb_o) h_t = o_t * T.tanh(c_t) return [h_t, c_t] [ bh, _, ], _ = theano.scan(fn=brecurrence, sequences=[x], outputs_info=[self.bh0, self.bc0], n_steps=x.shape[0], go_backwards=True, truncate_gradient=truncate_gradient) # concatenation weights self.bW = theano.shared( 0.2 * uniform(-1.0, 1.0, (n_hidden + featdim, n_y)).astype(dtype)) self.fW = theano.shared( 0.2 * uniform(-1.0, 1.0, (n_hidden + featdim, n_y)).astype(dtype)) self.b = theano.shared(np.zeros(n_y, dtype=dtype)) # reversing backwards hidden bh = bh[::-1] # adding features if featdim > 0: fh_final = T.concatenate([fh, f], axis=1) bh_final = T.concatenate([bh, f], axis=1) else: fh_final = fh bh_final = bh # "concatenating" hidden states h = T.dot(bh_final, self.bW) + T.dot(fh_final, self.fW) s = T.nnet.softmax(h + self.b) p_y_given_x_lastword = s[-1, :] p_y_given_x_sentence = s # params and names self.params = fparams + bparams + [self.fW, self.bW, self.b] self.names = fnames + bnames + ["fW", "bW", "b"] if fine_tuning: self.params.append(self.emb) self.names.append("embeddings") y_pred = T.argmax(p_y_given_x_sentence, axis=1) # learning rate lr = T.scalar('lr') # cost functions sentence_nll = -T.mean( T.log(p_y_given_x_sentence)[T.arange(x.shape[0]), y_sentence]) nll = -T.mean(T.log(p_y_given_x_lastword)[y]) # gradients gradients = T.grad(nll, self.params) sentence_gradients = T.grad(sentence_nll, self.params) # updates updates = OrderedDict( (p, p - lr * g) for p, g in zip(self.params, gradients)) sentence_updates = OrderedDict( (p, p - lr * g) for p, g in zip(self.params, sentence_gradients)) # theano functions self.classify = theano.function(inputs=[idxs, f, In(w, value=1.0)], outputs=y_pred, on_unused_input='ignore') self.train = theano.function(inputs=[idxs, f, y, lr, In(w, value=1.0)], outputs=nll, updates=updates, on_unused_input='ignore') self.sentence_train = theano.function( inputs=[idxs, f, y_sentence, lr, In(w, value=1.0)], outputs=sentence_nll, updates=sentence_updates, on_unused_input='ignore') self.predict = theano.function(inputs=[idxs, f, In(w, value=1.0)], outputs=p_y_given_x_sentence, on_unused_input='ignore') self.normalize = theano.function( inputs=[], updates={ self.emb: self.emb / T.sqrt( (self.emb**2).sum(axis=1)).dimshuffle(0, 'x') })
def fn(): x, s = tt.scalars("xs") function([In(x, update=((s * s) + x))], x)
def __init__(self, num_hidden, num_classes, context_win_size, embeddings, featdim=0, fine_tuning=False, truncate_gradient=-1): """ num_hidden :: dimension of the hidden layer num_classes :: number of classes context_win_size :: word window context size embeddings :: matrix featdim :: size of the features """ # hyper parameters of the model self.hyperparams = {} # nh :: dimension of the hidden layer nh = num_hidden self.hyperparams['nh'] = nh # nc :: number of classes nc = num_classes self.hyperparams['nc'] = nc # de :: dimension of the word embeddings de = embeddings.shape[1] self.hyperparams['de'] = de # cs :: word window context size cs = context_win_size self.hyperparams['cs'] = cs self.hyperparams['featdim'] = featdim self.hyperparams['fine_tuning'] = fine_tuning self.hyperparams['truncate_gradient'] = truncate_gradient # parameters of the model self.emb = theano.shared(embeddings.astype(theano.config.floatX)) # weights for LSTM n_in = de * cs n_hidden = n_i = n_c = n_o = n_f = nh n_y = nc self.W_xi = theano.shared(0.2 * uniform(-1.0, 1.0, (n_in, n_i)).astype(dtype)) self.W_hi = theano.shared(0.2 * uniform(-1.0, 1.0, (n_hidden, n_i)).astype(dtype)) self.W_ci = theano.shared(0.2 * uniform(-1.0, 1.0, (n_c, n_i)).astype(dtype)) self.b_i = theano.shared(np.cast[dtype](uniform(-0.5, .5, size=n_i))) self.W_xf = theano.shared(0.2 * uniform(-1.0, 1.0, (n_in, n_f)).astype(dtype)) self.W_hf = theano.shared(0.2 * uniform(-1.0, 1.0, (n_hidden, n_f)).astype(dtype)) self.W_cf = theano.shared(0.2 * uniform(-1.0, 1.0, (n_c, n_f)).astype(dtype)) self.b_f = theano.shared(np.cast[dtype](uniform(0, 1., size=n_f))) self.W_xc = theano.shared(0.2 * uniform(-1.0, 1.0, (n_in, n_c)).astype(dtype)) self.W_hc = theano.shared(0.2 * uniform(-1.0, 1.0, (n_hidden, n_c)).astype(dtype)) self.b_c = theano.shared(np.zeros(n_c, dtype=dtype)) self.W_xo = theano.shared(0.2 * uniform(-1.0, 1.0, (n_in, n_o)).astype(dtype)) self.W_ho = theano.shared(0.2 * uniform(-1.0, 1.0, (n_hidden, n_o)).astype(dtype)) self.W_co = theano.shared(0.2 * uniform(-1.0, 1.0, (n_c, n_o)).astype(dtype)) self.b_o = theano.shared(np.cast[dtype](uniform(-0.5, .5, size=n_o))) self.W_hy = theano.shared(0.2 * uniform(-1.0, 1.0, (n_hidden + featdim, n_y)).astype(dtype)) self.b_y = theano.shared(np.zeros(n_y, dtype=dtype)) self.c0 = theano.shared(np.zeros(n_hidden, dtype=dtype)) self.h0 = T.tanh(self.c0) # bundle weights self.params = [self.W_xi, self.W_hi, self.W_ci, self.b_i, self.W_xf, self.W_hf, self.W_cf, self.b_f, self.W_xc, self.W_hc, self.b_c, self.W_xo, self.W_ho, self.W_co, self.b_o, self.W_hy, self.b_y, self.c0] self.names = ['W_xi', 'W_hi', 'W_ci', 'b_i', 'W_xf', 'W_hf', 'W_cf', 'b_f', 'W_xc', 'W_hc', 'b_c', 'W_xo', 'W_ho', 'W_co', 'b_o', 'W_hy', 'b_y', 'c0'] if fine_tuning: self.params.append(self.emb) self.names.append("embeddings") idxs = T.imatrix() w = T.fscalar('w') # as many columns as context window size/lines as words in the sentence x = self.emb[idxs].reshape((idxs.shape[0], de * cs))*w f = T.matrix('f') f.reshape((idxs.shape[0], featdim)) y = T.iscalar('y') # label y_sentence = T.ivector('y_sentence') def recurrence(x_t, feat_t, h_tm1, c_tm1): i_t = sigma(theano.dot(x_t, self.W_xi) + theano.dot(h_tm1, self.W_hi) + theano.dot(c_tm1, self.W_ci) + self.b_i) f_t = sigma(theano.dot(x_t, self.W_xf) + theano.dot(h_tm1, self.W_hf) + theano.dot(c_tm1, self.W_cf) + self.b_f) c_t = f_t * c_tm1 + i_t * T.tanh(theano.dot(x_t, self.W_xc) + theano.dot(h_tm1, self.W_hc) + self.b_c) o_t = sigma(theano.dot(x_t, self.W_xo) + theano.dot(h_tm1, self.W_ho) + theano.dot(c_t, self.W_co) + self.b_o) h_t = o_t * T.tanh(c_t) if featdim > 0: all_t = T.concatenate([h_t, feat_t]) else: all_t = h_t s_t = softmax(theano.dot(all_t, self.W_hy) + self.b_y) return [h_t, c_t, s_t] [h, _, s], _ = theano.scan(fn=recurrence, sequences=[x, f], outputs_info=[self.h0, self.c0, None], n_steps=x.shape[0], truncate_gradient=truncate_gradient) p_y_given_x_lastword = s[-1, 0, :] p_y_given_x_sentence = s[:, 0, :] y_pred = T.argmax(p_y_given_x_sentence, axis=1) # cost and gradients and learning rate lr = T.scalar('lr') # cost functions sentence_nll = -T.mean(T.log(p_y_given_x_sentence) [T.arange(x.shape[0]), y_sentence]) nll = -T.mean(T.log(p_y_given_x_lastword)[y]) # gradients sentence_gradients = T.grad(sentence_nll, self.params) gradients = T.grad(nll, self.params) # updates updates = OrderedDict((p, p - lr * g) for p, g in zip(self.params, gradients)) sentence_updates = OrderedDict((p, p - lr * g) for p, g in zip(self.params, sentence_gradients)) # theano functions self.classify = theano.function(inputs=[idxs, f, In(w, value=1.0)], outputs=y_pred) self.sentence_train = theano.function(inputs=[idxs, f, y_sentence, lr, In(w, value=1.0)], outputs=sentence_nll, updates=sentence_updates) self.train = theano.function(inputs=[idxs, f, y, lr, In(w, value=1.0)], outputs=nll, updates=updates) self.predict = theano.function(inputs=[idxs, f, In(w, value=1.0)], outputs=p_y_given_x_sentence) self.normalize = theano.function(inputs=[], updates={self.emb: self.emb / T.sqrt((self.emb ** 2).sum(axis=1)).dimshuffle(0, 'x')})
def test_random_function_ndim_added(self): # Test that random_function helper function accepts ndim_added as # keyword argument # If using numpy's uniform distribution, ndim_added should be 0, # because the shape provided as argument is the output shape. # Specifying a different ndim_added will change the Op's output ndim, # so np.uniform will produce a result of incorrect shape, # and a ValueError should be raised. def ndim_added_deco(ndim_added): def randomfunction(random_state, size=(), low=0.0, high=0.0, ndim=None): ndim, size, bcast = raw_random._infer_ndim_bcast(ndim, size) if ndim_added < 0: bcast = bcast[:ndim_added] else: bcast = bcast + ((False, ) * ndim_added) assert len(bcast) == ndim + ndim_added op = RandomFunction( "uniform", tensor.TensorType(dtype="float64", broadcastable=bcast), ndim_added=ndim_added, ) return op(random_state, size, low, high) return randomfunction uni_1 = ndim_added_deco(1) uni_0 = ndim_added_deco(0) uni_m1 = ndim_added_deco(-1) rng_R = random_state_type() p_uni11, uni11 = uni_1(rng_R, size=(4, )) p_uni12, uni12 = uni_1(rng_R, size=(3, 4)) p_uni01, uni01 = uni_0(rng_R, size=(4, )) p_uni02, uni02 = uni_0(rng_R, size=(3, 4)) p_unim11, unim11 = uni_m1(rng_R, size=(4, )) p_unim12, unim12 = uni_m1(rng_R, size=(3, 4)) assert uni11.ndim == 2 assert uni12.ndim == 3 assert uni01.ndim == 1 assert uni02.ndim == 2 assert unim11.ndim == 0 assert unim12.ndim == 1 f11 = function( [ In( rng_R, value=np.random.RandomState(utt.fetch_seed()), update=p_uni11, mutable=True, ) ], [uni11], accept_inplace=True, ) f12 = function( [ In( rng_R, value=np.random.RandomState(utt.fetch_seed()), update=p_uni12, mutable=True, ) ], [uni12], accept_inplace=True, ) fm11 = function( [ In( rng_R, value=np.random.RandomState(utt.fetch_seed()), update=p_unim11, mutable=True, ) ], [unim11], accept_inplace=True, ) fm12 = function( [ In( rng_R, value=np.random.RandomState(utt.fetch_seed()), update=p_unim12, mutable=True, ) ], [unim12], accept_inplace=True, ) f0 = function( [ In( rng_R, value=np.random.RandomState(utt.fetch_seed()), update=p_uni02, mutable=True, ) ], [uni01, uni02], accept_inplace=True, ) with pytest.raises(ValueError): f11() with pytest.raises(ValueError): f12() with pytest.raises(ValueError): fm11() with pytest.raises(ValueError): fm12() u01, u02 = f0() assert np.allclose(u01, u02[0])
def __init__(self, num_hidden, num_classes, context_win_size, embeddings, featdim=0, fine_tuning=False, truncate_gradient=-1): """ num_hidden :: dimension of the hidden layer num_classes :: number of classes context_win_size :: word window context size embeddings :: matrix """ # hyper parameters of the model self.hyperparams = {} # nh :: dimension of the hidden layer nh = num_hidden self.hyperparams['nh'] = nh # nc :: number of classes nc = num_classes self.hyperparams['nc'] = nc # de :: dimension of the word embeddings de = embeddings.shape[1] self.hyperparams['de'] = de # cs :: word window context size cs = context_win_size self.hyperparams['cs'] = cs self.hyperparams['featdim'] = featdim self.hyperparams['fine_tuning'] = fine_tuning self.hyperparams['truncate_gradient'] = truncate_gradient # parameters self.Wx = theano.shared( 0.2 * np.random.uniform(-1.0, 1.0, (de * cs, nh)).astype(theano.config.floatX)) self.Ws = theano.shared( 0.2 * np.random.uniform(-1.0, 1.0, (nc, nh)).astype(theano.config.floatX)) # V matrix self.W = theano.shared( 0.2 * np.random.uniform(-1.0, 1.0, (nh + featdim, nc)).astype(theano.config.floatX)) self.bh = theano.shared(np.zeros(nh, dtype=theano.config.floatX)) self.b = theano.shared(np.zeros(nc, dtype=theano.config.floatX)) self.s0 = theano.shared(np.zeros(nc, dtype=theano.config.floatX)) self.emb = theano.shared(embeddings.astype(theano.config.floatX)) # bundle self.params = [self.Wx, self.Ws, self.W, self.bh, self.b, self.s0] self.names = ['Wx', 'Wh', 'W', 'bh', 'b', 's0'] if fine_tuning: self.params.append(self.emb) self.names.append("emb") idxs = T.imatrix() w = T.fscalar('w') x = self.emb[idxs].reshape((idxs.shape[0], de * cs)) * w y = T.iscalar('y') y_sentence = T.ivector('y_sentence') f = T.matrix('f') f.reshape((idxs.shape[0], featdim)) def recurrence(x_t, feat_t, s_tm1): h_t = T.nnet.sigmoid( T.dot(x_t, self.Wx) + T.dot(s_tm1, self.Ws) + self.bh) if featdim > 0: all_t = T.concatenate([h_t, feat_t]) else: all_t = h_t s_t = T.nnet.softmax(T.dot(all_t, self.W) + self.b)[0] return [h_t, s_t] [h, s], _ = theano.scan(fn=recurrence, sequences=[x, f], outputs_info=[None, self.s0], n_steps=x.shape[0], truncate_gradient=truncate_gradient) # probabilities p_y_given_x_sentence = s p_y_given_x_lastword = s[-1, :] # prediction y_pred = T.argmax(p_y_given_x_sentence, axis=1) # cost functions sentence_nll = -T.mean( T.log(p_y_given_x_sentence)[T.arange(x.shape[0]), y_sentence]) nll = -T.mean(T.log(p_y_given_x_lastword)[y]) # gradients sentence_gradients = T.grad(sentence_nll, self.params) gradients = T.grad(nll, self.params) # learning rate lr = T.scalar('lr') # updates sentence_updates = OrderedDict( (p, p - lr * g) for p, g in zip(self.params, sentence_gradients)) updates = OrderedDict( (p, p - lr * g) for p, g in zip(self.params, gradients)) # theano functions self.classify = theano.function(inputs=[idxs, f, In(w, value=1.0)], outputs=y_pred) self.sentence_train = theano.function( inputs=[idxs, f, y_sentence, lr, In(w, value=1.0)], outputs=sentence_nll, updates=sentence_updates) self.train = theano.function(inputs=[idxs, f, y, lr, In(w, value=1.0)], outputs=nll, updates=updates) self.predict = theano.function(inputs=[idxs, f, In(w, value=1.0)], outputs=p_y_given_x_sentence) self.normalize = theano.function( inputs=[], updates={ self.emb: self.emb / T.sqrt( (self.emb**2).sum(axis=1)).dimshuffle(0, 'x') })
def test_permutation_helper(self): # Test that raw_random.permutation_helper generates the same # results as numpy, # and that the 'ndim_added' keyword behaves correctly. # permutation_helper needs "ndim_added=1", because its output # is one dimension more than its "shape" argument (and there's # no way to determine that automatically). # Check the working case, over two calls to see if the random # state is correctly updated. rf = RandomFunction(permutation_helper, tensor.imatrix, 8, ndim_added=1) rng_R = random_state_type() post_r, out = rf(rng_R, (7, ), 8) f = function( [ In( rng_R, value=np.random.RandomState(utt.fetch_seed()), update=post_r, mutable=True, ) ], [out], accept_inplace=True, ) numpy_rng = np.random.RandomState(utt.fetch_seed()) val0 = f() val1 = f() # numpy_rng.permutation outputs one vector at a time, # so we call it iteratively to generate all the samples. numpy_val0 = np.asarray([numpy_rng.permutation(8) for i in range(7)]) numpy_val1 = np.asarray([numpy_rng.permutation(8) for i in range(7)]) assert np.all(val0 == numpy_val0) assert np.all(val1 == numpy_val1) # This call lacks "ndim_added=1", so ndim_added defaults to 0. # A ValueError should be raised. rf0 = RandomFunction(permutation_helper, tensor.imatrix, 8) post_r0, out0 = rf0(rng_R, (7, ), 8) f0 = function( [ In( rng_R, value=np.random.RandomState(utt.fetch_seed()), update=post_r0, mutable=True, ) ], [out0], accept_inplace=True, ) with pytest.raises(ValueError): f0() # Here, ndim_added is 2 instead of 1. A ValueError should be raised. rf2 = RandomFunction(permutation_helper, tensor.imatrix, 8, ndim_added=2) post_r2, out2 = rf2(rng_R, (7, ), 8) f2 = function( [ In( rng_R, value=np.random.RandomState(utt.fetch_seed()), update=post_r2, mutable=True, ) ], [out2], accept_inplace=True, ) with pytest.raises(ValueError): f2()
def test_multiple_functions(self): a = T.scalar() # the a is for 'anonymous' (un-named). x, s = T.scalars('xs') v = T.vector('v') # put in some inputs list_of_things = [s, x, v] # some derived thing, whose inputs aren't all in the list list_of_things.append(a * x + s) f1 = function([ x, In(a, value=1.0, name='a'), In(s, value=0.0, update=s + a * x, mutable=True) ], s + a * x) list_of_things.append(f1) # now put in a function sharing container with the previous one f2 = function([ x, In(a, value=1.0, name='a'), In(s, value=f1.container[s], update=s + a * x, mutable=True) ], s + a * x) list_of_things.append(f2) assert isinstance(f2.container[s].storage, list) assert f2.container[s].storage is f1.container[s].storage # now put in a function with non-scalar v_value = np.asarray([2, 3, 4.], dtype=config.floatX) f3 = function([x, In(v, value=v_value)], x + v) list_of_things.append(f3) # try to pickle the entire things try: saved_format = pickle.dumps(list_of_things, protocol=-1) new_list_of_things = pickle.loads(saved_format) except NotImplementedError as e: if e[0].startswith('DebugMode is not picklable'): return else: raise # now test our recovered new_list_of_things # it should be totally unrelated to the original # it should be interdependent in the same way as the original ol = list_of_things nl = new_list_of_things for i in range(4): assert nl[i] != ol[i] assert nl[i].type == ol[i].type assert nl[i].type is not ol[i].type # see if the implicit input got stored assert ol[3].owner.inputs[1] is s assert nl[3].owner.inputs[1] is not s assert nl[3].owner.inputs[1].type == s.type # moving on to the functions... for i in range(4, 7): assert nl[i] != ol[i] # looking at function number 1, input 's' assert nl[4][nl[0]] is not ol[4][ol[0]] assert nl[4][nl[0]] == ol[4][ol[0]] assert nl[4](3) == ol[4](3) # looking at function number 2, input 's' # make sure it's shared with the first function assert ol[4].container[ol[0]].storage is ol[5].container[ol[0]].storage assert nl[4].container[nl[0]].storage is nl[5].container[nl[0]].storage assert nl[5](3) == ol[5](3) assert nl[4].value[nl[0]] == 6 assert np.all(nl[6][nl[2]] == np.asarray([2, 3., 4]))
def fn(): x, s = tt.scalars("xs") function([In(x, update=s + x)], x)