Ejemplo n.º 1
0
    def refresh_droot_impact(self):
        """
        Makes sure self.droot, self.impact, and self.root_destroyer are
        up to date, and returns them.
        (see docstrings for these properties above)
        """
        if self.stale_droot:
            droot = OrderedDict()   # destroyed view + nonview variables -> foundation
            impact = OrderedDict()  # destroyed nonview variable -> it + all views of it
            root_destroyer = OrderedDict()  # root -> destroyer apply

            for app in self.destroyers:
                for output_idx, input_idx_list in app.op.destroy_map.items():
                    if len(input_idx_list) != 1:
                        raise NotImplementedError()
                    input_idx = input_idx_list[0]
                    input = app.inputs[input_idx]
                    input_root = getroot(input, self.view_i)
                    if input_root in droot:
                        raise InconsistencyError(
                            "Multiple destroyers of %s" % input_root)
                    droot[input_root] = input_root
                    root_destroyer[input_root] = app
                    input_impact = get_impact(input_root, self.view_o)
                    for v in input_impact:
                        assert v not in droot
                        droot[v] = input_root

                    impact[input_root] = input_impact
                    impact[input_root].add(input_root)
            self.droot, self.impact, self.root_destroyer = droot, impact, root_destroyer
            self.stale_droot = False
        return self.droot, self.impact, self.root_destroyer
    def get_monitoring_channels(self, data):
        rval = OrderedDict()

        g_ch = self.generator.get_monitoring_channels(data)
        d_ch = self.discriminator.get_monitoring_channels((data, None))

        samples, _, conditional_data, _ = self.generator.sample_and_noise(100)
        d_samp_ch = self.discriminator.get_monitoring_channels(
            ((samples, conditional_data), None))

        i_ch = OrderedDict()
        if self.inferer is not None:
            batch_size = self.inference_monitoring_batch_size
            sample, noise, conditional_data, _ = self.generator.sample_and_noise(
                batch_size)
            i_ch.update(
                self.inferer.get_monitoring_channels(
                    ((sample, conditional_data), noise)))

        if self.monitor_generator:
            for key in g_ch:
                rval['gen_' + key] = g_ch[key]
        if self.monitor_discriminator:
            for key in d_ch:
                rval['dis_on_data_' + key] = d_samp_ch[key]
            for key in d_ch:
                rval['dis_on_samp_' + key] = d_ch[key]
        if self.monitor_inference:
            for key in i_ch:
                rval['inf_' + key] = i_ch[key]
        return rval
Ejemplo n.º 3
0
    def on_prune(self, fgraph, app, reason):
        """Remove Apply instance from set which must be computed"""
        if app not in self.debug_all_apps:
            raise ProtocolError("prune without import")
        self.debug_all_apps.remove(app)

        # UPDATE self.clients
        for i, input in enumerate(OrderedSet(app.inputs)):
            del self.clients[input][app]

        if getattr(app.op, 'destroy_map', OrderedDict()):
            self.destroyers.remove(app)

        # Note: leaving empty client dictionaries in the struct.
        # Why? It's a pain to remove them. I think they aren't doing any harm, they will be
        # deleted on_detach().

        # UPDATE self.view_i, self.view_o
        for o_idx, i_idx_list in iteritems(
                getattr(app.op, 'view_map', OrderedDict())):
            if len(i_idx_list) > 1:
                # destroying this output invalidates multiple inputs
                raise NotImplementedError()
            o = app.outputs[o_idx]
            i = app.inputs[i_idx_list[0]]

            del self.view_i[o]

            self.view_o[i].remove(o)
            if not self.view_o[i]:
                del self.view_o[i]

        self.stale_droot = True
Ejemplo n.º 4
0
    def __init__(self, do_imports_on_attach=True):
        self.fgraph = None
        self.do_imports_on_attach = do_imports_on_attach

        """
        Maps every variable in the graph to its "foundation" (deepest
        ancestor in view chain).
        TODO: change name to var_to_vroot.

        """
        self.droot = OrderedDict()

        """
        Maps a variable to all variables that are indirect or direct views of it
        (including itself) essentially the inverse of droot.
        TODO: do all variables appear in this dict, or only those that are
              foundations?
        TODO: do only destroyed variables go in here? one old docstring said so.
        TODO: rename to x_to_views after reverse engineering what x is

        """
        self.impact = OrderedDict()

        """
        If a var is destroyed, then this dict will map
        droot[var] to the apply node that destroyed var
        TODO: rename to vroot_to_destroyer

        """
        self.root_destroyer = OrderedDict()
Ejemplo n.º 5
0
    def on_import(self, fgraph, app, reason):
        """
        Add Apply instance to set which must be computed.

        """

        if app in self.debug_all_apps:
            raise ProtocolError("double import")
        self.debug_all_apps.add(app)
        # print 'DH IMPORT', app, id(app), id(self), len(self.debug_all_apps)

        # If it's a destructive op, add it to our watch list
        if getattr(app.op, 'destroy_map', {}):
            self.destroyers.add(app)

        # add this symbol to the forward and backward maps
        for o_idx, i_idx_list in iteritems(getattr(app.op, 'view_map', {})):
            if len(i_idx_list) > 1:
                raise NotImplementedError(
                    'destroying this output invalidates multiple inputs',
                    (app. op))
            o = app.outputs[o_idx]
            i = app.inputs[i_idx_list[0]]
            self.view_i[o] = i
            self.view_o.setdefault(i, OrderedSet()).add(o)

        # update self.clients
        for i, input in enumerate(app.inputs):
            self.clients.setdefault(input, OrderedDict()).setdefault(app, 0)
            self.clients[input][app] += 1

        for i, output in enumerate(app.outputs):
            self.clients.setdefault(output, OrderedDict())

        self.stale_droot = True
Ejemplo n.º 6
0
    def on_attach(self, fgraph):
        """
        When attaching to a new fgraph, check that
            1) This DestroyHandler wasn't already attached to some fgraph
               (its data structures are only set up to serve one).
            2) The FunctionGraph doesn't already have a DestroyHandler.
               This would result in it validating everything twice, causing
               compilation to be slower.

        Give the FunctionGraph instance:
            1) A new method "destroyers(var)"
               TODO: what does this do exactly?
            2) A new attribute, "destroy_handler"
        TODO: WRITEME: what does this do besides the checks?

        """

        # Do the checking #
        already_there = False
        if self.fgraph is fgraph:
            already_there = True
        if self.fgraph is not None:
            raise Exception("A DestroyHandler instance can only serve one"
                            " FunctionGraph. (Matthew 6:24)")
        for attr in ('destroyers', 'destroy_handler'):
            if hasattr(fgraph, attr):
                already_there = True

        if already_there:
            # FunctionGraph.attach_feature catches AlreadyThere and cancels the attachment
            raise toolbox.AlreadyThere(
                "DestroyHandler feature is already present"
                " or in conflict with another plugin.")

        # Annotate the FunctionGraph #
        self.unpickle(fgraph)
        fgraph.destroy_handler = self

        self.fgraph = fgraph
        self.destroyers = OrderedSet(
        )  # set of Apply instances with non-null destroy_map
        self.view_i = OrderedDict()  # variable -> variable used in calculation
        self.view_o = OrderedDict(
        )  # variable -> set of variables that use this one as a direct input
        # clients: how many times does an apply use a given variable
        self.clients = OrderedDict()  # variable -> apply -> ninputs
        self.stale_droot = True

        self.debug_all_apps = OrderedSet()
        if self.do_imports_on_attach:
            toolbox.Bookkeeper.on_attach(self, fgraph)
Ejemplo n.º 7
0
def test_constrain_filter_max_norm():
    """
    Test that ConstrainFilterNorm matches a manual implementation.
    """

    limit = 1.
    ext = ConstrainFilterMaxNorm(limit)

    W = np.zeros((2, 4))
    # Column 0 tests the case where an element has zero norm
    # Column 1 tests the case where an element is smaller than the limit
    W[0, 1] = .5
    # Column 2 tests the case where an element is on the limit
    W[0, 2] = 1.
    # Column 3 tests the case where an element is too big
    W[0, 3] = 2.

    W = sharedX(W / 2.)
    model = ModelWithW(W)
    model.extensions.append(ext)

    updates = OrderedDict()
    updates[W] = W * 2.
    model.modify_updates(updates)
    f = function([], updates=updates)
    f()
    W = W.get_value()

    assert W.shape == (2, 4)
    assert np.abs(W[1, :]).max() == 0
    assert W[0, 0] == 0.
    assert W[0, 1] == 0.5
    assert W[0, 2] == 1.
    assert W[0, 3] == 1., W[0, 3]
def test_pickle_unpickle_without_reoptimization():
    mode = theano.config.mode
    if mode in ["DEBUG_MODE", "DebugMode"]:
        mode = "FAST_RUN"
    x1 = T.fmatrix('x1')
    x2 = T.fmatrix('x2')
    x3 = theano.shared(numpy.ones((10, 10), dtype=floatX))
    x4 = theano.shared(numpy.ones((10, 10), dtype=floatX))
    y = T.sum(T.sum(T.sum(x1**2 + x2) + x3) + x4)

    updates = OrderedDict()
    updates[x3] = x3 + 1
    updates[x4] = x4 + 1
    f = theano.function([x1, x2], y, updates=updates, mode=mode)

    # now pickle the compiled theano fn
    string_pkl = cPickle.dumps(f, -1)

    # compute f value
    in1 = numpy.ones((10, 10), dtype=floatX)
    in2 = numpy.ones((10, 10), dtype=floatX)

    # test unpickle without optimization
    default = theano.config.reoptimize_unpickled_function
    try:
        # the default is True
        theano.config.reoptimize_unpickled_function = False
        f_ = cPickle.loads(string_pkl)
        assert f(in1, in2) == f_(in1, in2)
    finally:
        theano.config.reoptimize_unpickled_function = default
Ejemplo n.º 9
0
    def make_layer_to_symbolic_state(self, num_examples, rng=None):
        """
        .. todo::

            Explain the difference with `make_layer_to_state`

        Makes and returns a dictionary mapping layers to states. By states, we
        mean here a real assignment, not a mean field state. For example, for a
        layer containing binary random variables, the state will be a shared
        variable containing values in {0,1}, not [0,1]. The visible layer will
        be included.

        Uses a dictionary so it is easy to unambiguously index a layer without
        needing to remember rules like vis layer = 0, hiddens start at 1, etc.

        Parameters
        ----------
        num_examples : int
            WRITEME
        rng : WRITEME
        """

        # Make a list of all layers
        layers = [self.visible_layer] + self.hidden_layers

        assert rng is not None

        states = [layer.make_symbolic_state(num_examples, rng) for layer in layers]

        zipped = safe_zip(layers, states)

        rval = OrderedDict(zipped)

        return rval
Ejemplo n.º 10
0
    def __init__(self, valid=None, invalid=None, valid_equivalent=None):
        '''
        Check if variables can be expressed without using variables in invalid.

        init_valid_equivalent provides a dictionary mapping some invalid
        variables to valid ones that can be used instead.
        '''

        if valid is None:
            valid = []
        if invalid is None:
            invalid = []
        if valid_equivalent is None:
            valid_equivalent = OrderedDict()

        # Nodes that are valid to have in the graph computing outputs
        self.valid = set(valid)

        # Nodes that are NOT valid to have in the graph computing outputs
        self.invalid = set(invalid)

        # Mapping from invalid variables to equivalent valid ones.
        self.valid_equivalent = valid_equivalent.copy()
        self.valid.update(valid_equivalent.values())
        self.invalid.update(valid_equivalent.keys())
Ejemplo n.º 11
0
    def get_updates(self, params, loss):
        grads = self.get_gradients(params, loss)
        self.updates = OrderedDict()

        if isinstance(self.learning_rate, LearningRateDecay):
            lr = self.learning_rate.learning_rate
            self.updates.update(self.learning_rate.get_updates())
        else:
            lr = self.learning_rate
        beta1 = self.beta1
        beta2 = self.beta2
        eps = self.eps

        one = T.constant(utils.floatX(1.))
        beta1_t = theano.shared(utils.floatX(beta1), name='beta1_t')
        beta2_t = theano.shared(utils.floatX(beta2), name='beta2_t')
        for param, grad in zip(params, grads):
            momentum = theano.shared(param.get_value() * 0.,
                                     broadcastable=param.broadcastable,
                                     name='momentum')
            velocity = theano.shared(param.get_value() * 0.,
                                     broadcastable=param.broadcastable,
                                     name='velocity')
            m_t = beta1 * momentum + (one - beta1) * grad
            v_t = beta2 * velocity + (one - beta2) * grad**2
            m_hat = m_t / (1. - beta1_t)
            v_hat = v_t / (1. - beta2_t)
            step = lr * m_hat / (T.sqrt(v_hat) + eps)
            self.updates[momentum] = m_t
            self.updates[velocity] = v_t
            self.updates[param] = param - step
        self.updates[beta1_t] = beta1_t * beta1
        self.updates[beta2_t] = beta2_t * beta2
        return self.updates
    def get_monitoring_channels(self, model, data, **kwargs):

        rval = OrderedDict()

        m = data.shape[0]

        g = model.generator
        d = model.discriminator

        y_hat = d.fprop(data)

        rval['false_negatives'] = T.cast((y_hat < 0.5).mean(), 'float32')

        samples = g.sample(m)
        y_hat = d.fprop(samples)
        rval['false_positives'] = T.cast((y_hat > 0.5).mean(), 'float32')
        # y = T.alloc(0., m, 1)
        cost = d.cost_from_X((samples, y_hat))
        sample_grad = T.grad(-cost, samples)
        rval['sample_grad_norm'] = T.sqrt(T.sqr(sample_grad).sum())
        _S, d_obj, g_obj, i_obj = self.get_samples_and_objectives(model, data)
        if model.monitor_inference and i_obj != 0:
            rval['objective_i'] = i_obj
        if model.monitor_discriminator:
            rval['objective_d'] = d_obj
        if model.monitor_generator:
            rval['objective_g'] = g_obj

        rval['now_train_generator'] = self.now_train_generator
        return rval
Ejemplo n.º 13
0
    def orderings(self):
        """
        Return dict d s.t. d[node] is a list of nodes that must be evaluated
        before node itself can be evaluated.

        This is used primarily by the destroy_handler feature to ensure that
        all clients of any destroyed inputs have already computed their outputs.

        Notes
        -----
        This only calls the orderings() fct on all features. It does not
        take care of computing dependencies by itself.

        """
        ords = OrderedDict()
        assert isinstance(self._features, list)
        for feature in self._features:
            if hasattr(feature, 'orderings'):
                orderings = feature.orderings(self)
                if not isinstance(orderings, OrderedDict):
                    raise TypeError("Non-deterministic return value from " +
                                    str(feature.orderings) +
                                    ". Nondeterministic object is " +
                                    str(orderings))
                for node, prereqs in iteritems(orderings):
                    if not isinstance(prereqs, (list, OrderedSet)):
                        raise TypeError(
                            "prereqs must be a type with a "
                            "deterministic iteration order, or toposort "
                            " will be non-deterministic.")
                    ords.setdefault(node, []).extend(prereqs)
        # eliminate duplicate prereqs
        for (node, prereqs) in iteritems(ords):
            ords[node] = list(OrderedSet(prereqs))
        return ords
Ejemplo n.º 14
0
def forced_replace(out, x, y):
    """
    :param out: Theano Variable
    :param x: Theano Variable
    :param y: Theano Variable

    This function checks all internal values of the graph that computes the
    variable ``out`` for occurances of values identical with ``x``. If such
    occurances are encountered then they are replaced with variable ``y``.
    For example:
        out := sigmoid(wu)*(1-sigmoid(wu))
        x := sigmoid(wu)
        forced_replace(out, x, y) := y*(1-y)
    """
    if out is None:
        return None

    # ``visited`` is a set of nodes that are already known and don't need to be
    # checked again, speeding up the traversal of multiply-connected graphs.
    visited = set()
    def local_traverse(graph, x):
        if graph in visited:
            return []
        visited.add(graph)
        if equal_computations([graph], [x]):
            return [graph]
        elif not graph.owner:
            return []
        else:
            rval = []
            for inp in graph.owner.inputs:
                rval += local_traverse(inp, x)
            return rval
    to_replace = local_traverse(out, x)
    return clone(out, replace=OrderedDict((v, y) for v in to_replace))
Ejemplo n.º 15
0
def test_hash_from_dict():
    dicts = [{}, {
        0: 0
    }, {
        0: 1
    }, {
        1: 0
    }, {
        1: 1
    }, {
        0: (0, )
    }, {
        0: [1]
    }, {
        0: (0, 1)
    }, {
        0: [1, 0]
    }]
    for elem in dicts[:]:
        dicts.append(OrderedDict(elem))
    hashs = []
    for idx, d in enumerate(dicts):
        h = hash_from_dict(d)
        assert h not in hashs
        hashs.append(h)

    # List are not hashable. So they are transformed into tuple.
    assert hash_from_dict({0: (0, )}) == hash_from_dict({0: [0]})
    def get_monitoring_channels(self, data):
        if data is None:
            m = 100
            conditional_data = self.condition_distribution.sample(m)
        else:
            _, conditional_data = data
            m = conditional_data.shape[0]

        noise = self.get_noise((m, self.noise_dim))
        rval = OrderedDict()

        sampled_data = (noise, conditional_data)
        try:
            rval.update(self.mlp.get_monitoring_channels((sampled_data, None)))
        except Exception:
            warnings.warn(
                "something went wrong with generator.mlp's monitoring channels"
            )

        if self.monitor_ll:
            rval['ll'] = T.cast(
                self.ll(data, self.ll_n_samples, self.ll_sigma),
                theano.config.floatX).mean()
            rval['nll'] = -rval['ll']
        return rval
    def get_monitoring_channels(self, model, data, **kwargs):
        rval = OrderedDict()

        space, sources = self.get_data_specs(model)
        X_data, X_condition = data
        m = X_data.shape[space.get_batch_axis()]

        G, D = model.generator, model.discriminator

        # Compute false negatives w/ empirical samples
        y_hat = D.fprop((X_data, X_condition))
        rval['false_negatives'] = T.cast((y_hat < 0.5).mean(), 'float32')

        # Compute false positives w/ generated sample
        G_conditional_data = self.condition_distribution.sample(m)
        samples = G.sample(G_conditional_data)
        y_hat = D.fprop((samples, G_conditional_data))
        rval['false_positives'] = T.cast((y_hat > 0.5).mean(), 'float32')

        # y = T.alloc(0., m, 1)
        cost = D.cost_from_X(((samples, G_conditional_data), y_hat))
        sample_grad = T.grad(-cost, samples)
        rval['sample_grad_norm'] = T.sqrt(T.sqr(sample_grad).sum())

        _S, d_obj, g_obj, i_obj = self.get_samples_and_objectives(model, data)
        if model.monitor_inference and i_obj != 0:
            rval['objective_i'] = i_obj
        if model.monitor_discriminator:
            rval['objective_d'] = d_obj
        if model.monitor_generator:
            rval['objective_g'] = g_obj

        rval['now_train_generator'] = self.now_train_generator
        return rval
Ejemplo n.º 18
0
 def get_updates(self):
     self.updates = OrderedDict()
     epoch = theano.shared(1, name='epoch_t')
     self.updates[epoch] = epoch + 1
     self.updates[self.learning_rate] = T.cast(
         self.initial_rate * self.decay_rate**(epoch / self.step),
         theano.config.floatX)
     return self.updates
Ejemplo n.º 19
0
    def get_layer_monitoring_channels(self):
        rval = OrderedDict()

        for param in self.get_params():
            rval[param.name + "_min"] = param.min()
            rval[param.name + "_max"] = param.max()
            rval[param.name + "_mean"] = param.mean()

        return rval
Ejemplo n.º 20
0
 def get_monitoring_channels(self, data):
     rval = OrderedDict()
     try:
         rval.update(self.mlp.get_monitoring_channels(data))
     except Exception:
         warnings.warn(
             "something went wrong with compressor.mlp's monitoring channels"
         )
     return rval
Ejemplo n.º 21
0
    def get_monitoring_channels(self, data):
        """
        .. todo::

            WRITEME
        """
        space, source = self.get_monitoring_data_specs()
        space.validate(data)
        X = data
        history = self.mf(X, return_history=True)
        q = history[-1]

        rval = OrderedDict()

        ch = self.visible_layer.get_monitoring_channels()
        for key in ch:
            rval['vis_' + key] = ch[key]

        for state, layer in safe_zip(q, self.hidden_layers):
            ch = layer.get_monitoring_channels()
            for key in ch:
                rval[layer.layer_name + '_' + key] = ch[key]
            ch = layer.get_monitoring_channels_from_state(state)
            for key in ch:
                rval['mf_' + layer.layer_name + '_' + key] = ch[key]

        if len(history) > 1:
            prev_q = history[-2]

            flat_q = flatten(q)
            flat_prev_q = flatten(prev_q)

            mx = None
            for new, old in safe_zip(flat_q, flat_prev_q):
                cur_mx = abs(new - old).max()
                if new is old:
                    print new, 'is', old
                    assert False
                if mx is None:
                    mx = cur_mx
                else:
                    mx = T.maximum(mx, cur_mx)

            rval['max_var_param_diff'] = mx

            for layer, new, old in safe_zip(self.hidden_layers, q, prev_q):
                sum_diff = 0.
                for sub_new, sub_old in safe_zip(flatten(new), flatten(old)):
                    sum_diff += abs(sub_new - sub_old).sum()
                denom = self.batch_size * layer.get_total_state_space(
                ).get_total_dimension()
                denom = np.cast[config.floatX](denom)
                rval['mean_' + layer.layer_name +
                     '_var_param_diff'] = sum_diff / denom

        return rval
Ejemplo n.º 22
0
    def orderings(self, function_graph):
        """
        Called by toposort. It should return a dictionary of
        {node: predecessors} where predecessors is a list of
        nodes that should be computed before the key node.

        If you raise an exception in this function, the state of the graph
        might be broken for all intents and purposes.
        """
        return OrderedDict()
Ejemplo n.º 23
0
    def on_change_input(self, fgraph, app, i, old_r, new_r, reason):
        """
        app.inputs[i] changed from old_r to new_r.

        """
        if app == 'output':
            # app == 'output' is special key that means FunctionGraph is redefining which nodes are being
            # considered 'outputs' of the graph.
            pass
        else:
            if app not in self.debug_all_apps:
                raise ProtocolError("change without import")

            # UPDATE self.clients
            self.clients[old_r][app] -= 1
            if self.clients[old_r][app] == 0:
                del self.clients[old_r][app]

            self.clients.setdefault(new_r, OrderedDict()).setdefault(app, 0)
            self.clients[new_r][app] += 1

            # UPDATE self.view_i, self.view_o
            for o_idx, i_idx_list in iteritems(getattr(app.op, 'view_map',
                                                       OrderedDict())):
                if len(i_idx_list) > 1:
                    # destroying this output invalidates multiple inputs
                    raise NotImplementedError()
                i_idx = i_idx_list[0]
                output = app.outputs[o_idx]
                if i_idx == i:
                    if app.inputs[i_idx] is not new_r:
                        raise ProtocolError("wrong new_r on change")

                    self.view_i[output] = new_r

                    self.view_o[old_r].remove(output)
                    if not self.view_o[old_r]:
                        del self.view_o[old_r]

                    self.view_o.setdefault(new_r, OrderedSet()).add(output)

        self.stale_droot = True
Ejemplo n.º 24
0
def test_known_grads():

    # Tests that the grad method with no known_grads
    # matches what happens if you put its own known_grads
    # in for each variable

    full_range = theano.tensor.arange(10)
    x = theano.tensor.scalar('x')
    t = theano.tensor.iscalar('t')
    ft = full_range[t]
    ft.name = 'ft'
    coeffs = theano.tensor.vector('c')
    ct = coeffs[t]
    ct.name = 'ct'
    p = x**ft
    p.name = 'p'
    y = ct * p
    y.name = 'y'
    cost = theano.tensor.sqr(y)
    cost.name = 'cost'

    layers = [[cost], [y], [ct, p], [ct, x, ft], [coeffs, t, full_range, x]]

    inputs = [coeffs, t, x]

    rng = np.random.RandomState([2012, 11, 15])
    values = [rng.randn(10), rng.randint(10), rng.randn()]
    values = [np.cast[ipt.dtype](value) for ipt, value in zip(inputs, values)]

    true_grads = theano.tensor.grad(cost, inputs, disconnected_inputs='ignore')
    true_grads = theano.function(inputs, true_grads)
    true_grads = true_grads(*values)

    for layer in layers:
        first = theano.tensor.grad(cost, layer, disconnected_inputs='ignore')
        known = OrderedDict(izip(layer, first))
        full = theano.tensor.grad(cost=None,
                                  known_grads=known,
                                  wrt=inputs,
                                  disconnected_inputs='ignore')
        full = theano.function(inputs, full)
        full = full(*values)
        assert len(true_grads) == len(full)
        for a, b, var in zip(true_grads, full, inputs):
            if not np.allclose(a, b):
                print('Failure')
                print(a)
                print(b)
                print(var)
                print(layer)
                for v in known:
                    print(v, ':', theano.function(inputs, known[v])(*values))
                assert False
Ejemplo n.º 25
0
    def __init__(self, *axis):
        # Sort them to make sure we merge all possible case.
        items = sorted(axis)
        self.axis = OrderedDict(items)
        for axis, broad in iteritems(self.axis):
            if not isinstance(axis, (numpy.integer, integer_types)):
                raise TypeError("Rebroadcast needs integer axes. "
                                "Got {}".format(axis))

            if not isinstance(broad, (numpy.bool_, bool)):
                raise TypeError("Rebroadcast needs bool for new broadcast "
                                "pattern. Got {}".format(broad))
Ejemplo n.º 26
0
 def __init__(self, *axis):
     # Sort them to make sure we merge all possible case.
     items = sorted(axis)
     self.axis = OrderedDict(items)
     for axis, broad in iteritems(self.axis):
         assert isinstance(
             axis,
             (numpy.integer, int)), ("Rebroadcast needs integer axes. Got ",
                                     axis)
         assert isinstance(broad, bool), (
             "Rebroadcast needs bool for new broadcast pattern. Got ",
             broad)
Ejemplo n.º 27
0
    def run(replay, log=None):

        if not replay:
            log = StringIO()
        else:
            log = StringIO(log)
        record = Record(replay=replay, file_object=log)

        disturb_mem.disturb_mem()

        mode = RecordMode(record=record)

        b = sharedX(np.zeros((2, )), name='b')
        channels = OrderedDict()

        disturb_mem.disturb_mem()

        v_max = b.max(axis=0)
        v_min = b.min(axis=0)
        v_range = v_max - v_min

        updates = []
        for i, val in enumerate([
                v_max.max(),
                v_max.min(),
                v_range.max(),
        ]):
            disturb_mem.disturb_mem()
            s = sharedX(0., name='s_' + str(i))
            updates.append((s, val))

        for var in theano.gof.graph.ancestors(update for _, update in updates):
            if var.name is not None and var.name is not 'b':
                if var.name[0] != 's' or len(var.name) != 2:
                    var.name = None

        for key in channels:
            updates.append((s, channels[key]))
        f = theano.function([],
                            mode=mode,
                            updates=updates,
                            on_unused_input='ignore',
                            name='f')
        for output in f.maker.fgraph.outputs:
            mode.record.handle_line(var_descriptor(output) + '\n')
        disturb_mem.disturb_mem()
        f()

        mode.record.f.flush()

        if not replay:
            return log.getvalue()
Ejemplo n.º 28
0
    def make_layer_to_state(self, num_examples, rng=None):
        """
        Makes and returns a dictionary mapping layers to states.

        By states, we mean here a real assignment, not a mean field
        state. For example, for a layer containing binary random
        variables, the state will be a shared variable containing
        values in {0,1}, not [0,1]. The visible layer will be included.

        Uses a dictionary so it is easy to unambiguously index a layer
        without needing to remember rules like vis layer = 0, hiddens
        start at 1, etc.

        Parameters
        ----------
        num_examples : int
            WRITEME
        rng : WRITEME
        """

        # Make a list of all layers
        layers = [self.visible_layer] + self.hidden_layers

        if rng is None:
            rng = self.rng

        states = [layer.make_state(num_examples, rng) for layer in layers]

        zipped = safe_zip(layers, states)

        def recurse_check(layer, state):
            if isinstance(state, (list, tuple)):
                for elem in state:
                    recurse_check(layer, elem)
            else:
                val = state.get_value()
                m = val.shape[0]
                if m != num_examples:
                    raise ValueError(layer.layer_name + " gave state with " +
                                     str(m) + " examples in some component."
                                     "We requested " + str(num_examples))

        for layer, state in zipped:
            recurse_check(layer, state)

        rval = OrderedDict(zipped)

        return rval
 def gradient_descent(self, loss):
     """Momentum GD with gradient clipping."""
     grad = T.grad(loss, self.params)
     self.momentum_velocity_ = [0.] * len(grad)
     grad_norm = T.sqrt(sum(map(lambda x: T.sqr(x).sum(), grad)))
     updates = OrderedDict()
     not_finite = T.or_(T.isnan(grad_norm), T.isinf(grad_norm))
     scaling_den = T.maximum(5.0, grad_norm)
     for n, (param, grad) in enumerate(zip(self.params, grad)):
         grad = T.switch(not_finite, 0.1 * param,
                         grad * (5.0 / scaling_den))
         velocity = self.momentum_velocity_[n]
         update_step = self.momentum * velocity - self.learning_rate * grad
         self.momentum_velocity_[n] = update_step
         updates[param] = param + update_step
     return updates
Ejemplo n.º 30
0
    def get_layer_monitoring_channels(self,
                                      state_below=None,
                                      state=None,
                                      targets=None):
        rval = OrderedDict()

        if state is None:
            state = self.fprop(state_below)
        vars_and_prefixes = [(state, '')]

        for var, prefix in vars_and_prefixes:

            # print "average output: ", var.ndim, type(var)
            # if not hasattr(var, 'ndim') or var.ndim != 4:
            # print "expected 4D tensor, got "
            #     print var
            #     print type(var)
            #     if isinstance(var, tuple):
            #         print "tuple length: ", len(var)
            #     assert False
            v_max = var.max(axis=1)
            v_min = var.min(axis=1)
            v_mean = var.mean(axis=1)
            v_range = v_max - v_min

            # max_x.mean_u is "the mean over *u*nits of the max over
            # e*x*amples" The x and u are included in the name because
            # otherwise its hard to remember which axis is which when reading
            # the monitor I use inner.outer rather than outer_of_inner or
            # something like that because I want mean_x.* to appear next to
            # each other in the alphabetical list, as these are commonly
            # plotted together
            for key, val in [('max_x.max_u', v_max.max()),
                             ('max_x.mean_u', v_max.mean()),
                             ('max_x.min_u', v_max.min()),
                             ('min_x.max_u', v_min.max()),
                             ('min_x.mean_u', v_min.mean()),
                             ('min_x.min_u', v_min.min()),
                             ('range_x.max_u', v_range.max()),
                             ('range_x.mean_u', v_range.mean()),
                             ('range_x.min_u', v_range.min()),
                             ('mean_x.max_u', v_mean.max()),
                             ('mean_x.mean_u', v_mean.mean()),
                             ('mean_x.min_u', v_mean.min())]:
                rval[prefix + key] = val

        return rval