Esempio n. 1
0
    def get_gradients(self, X, Y, weights=1.0):
        cost = -(weights * self.log_prob(X, Y)).sum()

        params = Selector(self).get_parameters()

        gradients = OrderedDict()
        if isinstance(weights, float):
            for pname, param in params.iteritems():
                gradients[param] = tensor.grad(cost, param, consider_constant=[X, Y])
        else:
            for pname, param in params.iteritems():
                gradients[param] = tensor.grad(cost, param, consider_constant=[X, Y, weights])

        return gradients
Esempio n. 2
0
    def get_gradients(self, X, Y, weights=1.0):
        W_mean, W_ls, b_mean, b_ls = self.parameters

        mean, log_sigma = self.sample_expected(Y)
        sigma = tensor.exp(log_sigma)

        cost = -log_sigma - 0.5 * (X - mean) ** 2 / tensor.exp(2 * log_sigma)
        if weights != 1.0:
            cost = -weights.dimshuffle(0, "x") * cost

        cost_scaled = sigma ** 2 * cost
        cost_gscale = (sigma ** 2).sum(axis=1).dimshuffle([0, "x"])
        cost_gscale = cost_gscale * cost

        gradients = OrderedDict()

        params = Selector(self.mlp).get_parameters()
        for pname, param in params.iteritems():
            gradients[param] = tensor.grad(cost_gscale.sum(), param, consider_constant=[X, Y])

        gradients[W_mean] = tensor.grad(cost_scaled.sum(), W_mean, consider_constant=[X, Y])
        gradients[b_mean] = tensor.grad(cost_scaled.sum(), b_mean, consider_constant=[X, Y])

        gradients[W_ls] = tensor.grad(cost_scaled.sum(), W_ls, consider_constant=[X, Y])
        gradients[b_ls] = tensor.grad(cost_scaled.sum(), b_ls, consider_constant=[X, Y])

        return gradients
Esempio n. 3
0
    def unify_parameters(self, source_id, dest_id):
        source = self.children[source_id]
        source_name = self.children[source_id].name
        source_prefix = '/' + source_name + '/'
        dest_name = self.children[dest_id].name
        dest_prefix = '/' + self.name + '/' + dest_name + '/'

        source_params = Selector(source).get_parameters()

        replaced = []

        self.unified_parameters = []

        for param, var in source_params.iteritems():
            if not param.startswith(source_prefix):
                continue
            source_param = '/' + self.name + param
            param = param[len(source_prefix):]
            for unification in self.parameter_unifications_include:
                if unification.match(param):
                    exclude = False
                    for ex_unification in self.parameter_unifications_exclude:
                        if ex_unification.match(param):
                            exclude = True
                            break
                    if exclude:
                        continue
                    self.replace_parameter(dest_prefix + param, var)
                    replaced += [dest_prefix + param]
                    self.unified_parameters += [source_param]
        self.unified_parameters = self.convert_names_to_bricks(
            set(self.unified_parameters) | set(replaced))
        return replaced
Esempio n. 4
0
    def get_gradients(self, X, Y, weights=1.):
        cost = -(weights * self.log_prob(X, Y)).sum()

        params = Selector(self).get_parameters()

        gradients = OrderedDict()
        if isinstance(weights, float):
            for pname, param in params.iteritems():
                gradients[param] = tensor.grad(cost,
                                               param,
                                               consider_constant=[X, Y])
        else:
            for pname, param in params.iteritems():
                gradients[param] = tensor.grad(
                    cost, param, consider_constant=[X, Y, weights])

        return gradients
Esempio n. 5
0
    def get_gradients(self, features, n_samples):
        """Perform inference and calculate gradients.

        Returns
        -------
        log_px : T.fvector
        log_psx : T.fvector
        gradients : OrderedDict
        """
        p_layers = self.p_layers
        q_layers = self.q_layers
        n_layers = len(p_layers)

        batch_size = features.shape[0]

        x = replicate_batch(features, n_samples)

        # Get Q-samples
        samples, log_p, log_q = self.sample_q(x)

        # Reshape and sum
        samples = unflatten_values(samples, batch_size, n_samples)
        log_p = unflatten_values(log_p, batch_size, n_samples)
        log_q = unflatten_values(log_q, batch_size, n_samples)

        log_p_all = sum(log_p)
        log_q_all = sum(log_q)

        # Approximate log(p(x))
        log_px = logsumexp(log_p_all - log_q_all, axis=-1) - tensor.log(n_samples)
        log_psx = (logsumexp((log_p_all - log_q_all) / 2, axis=-1) - tensor.log(n_samples)) * 2.

        # Approximate log p(x) and calculate IS weights
        w = self.importance_weights(log_p, log_q)

        wp = w.reshape((batch_size * n_samples, ))
        wq = w.reshape((batch_size * n_samples, ))
        wq = wq - (1. / n_samples)

        samples = flatten_values(samples, batch_size * n_samples)

        gradients = OrderedDict()
        for l in xrange(n_layers - 1):
            gradients = merge_gradients(gradients, p_layers[l].get_gradients(samples[l], samples[l + 1], weights=wp))
            gradients = merge_gradients(gradients, q_layers[l].get_gradients(samples[l + 1], samples[l], weights=wq))
        gradients = merge_gradients(gradients, p_layers[-1].get_gradients(samples[-1], weights=wp))

        if (self.l1reg > 0.) or (self.l2reg > 0.):
            reg_gradients = OrderedDict()
            params = Selector(self).get_parameters()
            for pname, param in params.iteritems():
                if has_roles(param, (WEIGHT,)):
                    reg_cost = self.l1reg * tensor.sum(abs(param)) + self.l2reg * tensor.sum(param ** 2)
                    reg_gradients[param] = tensor.grad(reg_cost, param)
            gradients = merge_gradients(gradients, reg_gradients)

        return log_px, log_psx, gradients
Esempio n. 6
0
    def get_gradients(self, features, n_samples):
        log_p_bound = self.log_likelihood_bound(features, n_samples)

        gradients = OrderedDict()
        params = Selector(self).get_parameters()
        for pname, param in params.iteritems():
            cost = -log_p_bound.mean() + self.l2reg * tensor.sum(param ** 2)
            gradients[param] = tensor.grad(cost, param)

        return log_p_bound, gradients
Esempio n. 7
0
    def get_gradients(self, X, Y, weights=1.):
        W_mean, W_ls, b_mean, b_ls = self.parameters

        mean, log_sigma = self.sample_expected(Y)
        sigma = tensor.exp(log_sigma)

        cost = -log_sigma - 0.5 * (X - mean)**2 / tensor.exp(2 * log_sigma)
        if weights != 1.:
            cost = -weights.dimshuffle(0, 'x') * cost

        cost_scaled = sigma**2 * cost
        cost_gscale = (sigma**2).sum(axis=1).dimshuffle([0, 'x'])
        cost_gscale = cost_gscale * cost

        gradients = OrderedDict()

        params = Selector(self.mlp).get_parameters()
        for pname, param in params.iteritems():
            gradients[param] = tensor.grad(cost_gscale.sum(),
                                           param,
                                           consider_constant=[X, Y])

        gradients[W_mean] = tensor.grad(cost_scaled.sum(),
                                        W_mean,
                                        consider_constant=[X, Y])
        gradients[b_mean] = tensor.grad(cost_scaled.sum(),
                                        b_mean,
                                        consider_constant=[X, Y])

        gradients[W_ls] = tensor.grad(cost_scaled.sum(),
                                      W_ls,
                                      consider_constant=[X, Y])
        gradients[b_ls] = tensor.grad(cost_scaled.sum(),
                                      b_ls,
                                      consider_constant=[X, Y])

        return gradients
Esempio n. 8
0
    def get_gradients(self, features, n_samples):
        """Perform inference and calculate gradients.

        Returns
        -------
            log_px : T.fvector
            log_psx : T.fvector
            gradients : OrderedDict
        """
        p_layers = self.p_layers
        q_layers = self.q_layers
        n_layers = len(p_layers)

        batch_size = features.shape[0]

        x = replicate_batch(features, n_samples)

        # Get Q-samples
        samples, log_p, log_q = self.sample_q(x)

        # Reshape and sum
        samples = unflatten_values(samples, batch_size, n_samples)
        log_p = unflatten_values(log_p, batch_size, n_samples)
        log_q = unflatten_values(log_q, batch_size, n_samples)

        log_p_all = sum(log_p)
        log_q_all = sum(log_q)

        # Approximate log p(x)
        log_px_bound = log_p_all[:,0] - log_q_all[:,0]
        log_px  = logsumexp(log_p_all-log_q_all, axis=-1) - tensor.log(n_samples)
        log_psx = (logsumexp((log_p_all-log_q_all)/2, axis=-1) - tensor.log(n_samples)) * 2.

        # Calculate IS weights
        w = self.importance_weights(log_p, log_q)

        wp = w.reshape( (batch_size*n_samples, ) )
        wq = w.reshape( (batch_size*n_samples, ) )
        wq = wq - (1./n_samples)

        samples = flatten_values(samples, batch_size*n_samples)

        gradients = OrderedDict()
        for l in xrange(n_layers-1):
            gradients = merge_gradients(gradients, p_layers[l].get_gradients(samples[l], samples[l+1], weights=wp))
            gradients = merge_gradients(gradients, q_layers[l].get_gradients(samples[l+1], samples[l], weights=wq))
        gradients = merge_gradients(gradients, p_layers[-1].get_gradients(samples[-1], weights=wp))

        if (self.l1reg > 0.) or (self.l2reg > 0.):
            reg_gradients = OrderedDict()
            params = Selector(self).get_parameters()
            for pname, param in params.iteritems():
                if has_roles(param, (WEIGHT,)):
                    reg_cost = self.l1reg * tensor.sum(abs(param)) + self.l2reg * tensor.sum(param**2)
                    reg_gradients[param] = tensor.grad(reg_cost, param)
            gradients = merge_gradients(gradients, reg_gradients)

        self.log_p_bound = log_px_bound
        self.log_p = log_px
        self.log_ph = log_psx

        return log_px, log_psx, gradients
Esempio n. 9
0
    print 'Parsing dataset file...'
    vocab = Vocab(dataset_path=args.dataset_path)

    source_sentence = tensor.lmatrix('source')
    
    encoder = BidirectionalEncoder(vocab.sequenceLength(), args.embed, args.nhidden)

    encoder.weights_init = IsotropicGaussian(args.weight_scale)
    encoder.biases_init = Constant(0)
    encoder.push_initialization_config()
    encoder.bidir.prototype.weights_init = Orthogonal()
    encoder.initialize()

    print 'Parameter names: '
    enc_param_dict = Selector(encoder).get_params()
    for name, value in enc_param_dict.iteritems():
        print '    {:15}: {}'.format(value.get_value().shape, name)

    representation = encoder.apply(source_sentence)

    print 'Compiling theano function'
    f = theano.function([source_sentence], representation)

    reps = np.empty(len(vocab.dataset), dtype=object)

    bar = Bar('Encoding', max=len(vocab.dataset))
    for idx, sentence in enumerate(vocab.dataset):
        reps[idx] = f(sentence).transpose((1, 2, 0))
        bar.next()
    bar.finish()