Example #1
0
def marginal_likelihood(e_corpus: Corpus, f_corpus: Corpus, model: ConditionalModel):

    PL, PM, PAj, PFj = model.components
    ll = 0.0
    for e_snt, f_snt in zip(e_corpus.itersentences(), f_corpus.itersentences()):
        # observations
        l = e_snt.shape[0]
        m = f_snt.shape[0]
        log_pl = np.log(PL.generate(l))
        log_pm = np.log(PM.generate(m))

        # P(f|e) = \prod_j P(f_j|e)
        #          = \prod_j \sum_i P(f_j,a_j=i|e)
        log_pf_e = 0.0
        for j, f in enumerate(f_snt):
            # P(f_j|e) = \sum_i P(f_j,a_j=i|e)
            pfj_e = 0.0  # contribution of this French word
            for i, e in enumerate(e_snt):
                # P(f_j, a_j=i | e) = P(a_j=i) P(f_j|e_i, l, m)
                pfj_e += PAj.generate((j, i), e_snt, 0, l, m) * PFj.generate((j, f), (j, i), e_snt, 0, l, m)
            # P(f|z,e) = \prod_j P(f_j|z,e)
            log_pf_e += np.log(pfj_e)
        # \sum_{f,e} P(l)P(m)P(f|e,l,m)
        ll += log_pl + log_pm + log_pf_e
    return - ll / e_corpus.n_sentences()
Example #2
0
def marginal_likelihood(e_corpus: Corpus, f_corpus: Corpus, model: JointModel):

    PL, PM, PZ, PEi, PAj, PFj = model.components
    n_clusters = PZ.n_clusters
    ll = 0.0
    for e_snt, f_snt in zip(e_corpus.itersentences(),
                            f_corpus.itersentences()):
        # observations
        l = e_snt.shape[0]
        m = f_snt.shape[0]
        log_pl = np.log(PL.generate(l))
        log_pm = np.log(PM.generate(m))
        # 0-order alignments
        # P(f,e) = \sum_z P(z) P(e|z) P(f|z,e)
        log_pfe = -np.inf  # contribution of this sentence
        for z in range(n_clusters):
            # contribution of the cluster
            log_pz = np.log(PZ.generate(z, l, m))
            # compute the contribution of the entire English sentence
            log_pe_z = 0.0
            # P(e|z) = \prod_i P(e_i|z)
            for i, e in enumerate(e_snt):
                log_pe_z += np.log(PEi.generate((i, e), z, l, m))

            # P(f|z,e) = \prod_j P(f_j|z,e)
            #          = \prod_j \sum_i P(f_j,a_j=i|z,e)
            log_pf_ze = 0.0
            for j, f in enumerate(f_snt):
                # P(f_j|z,e) = \sum_i P(f_j,a_j=i|z,e)
                pfj_ze = 0.0  # contribution of this French word
                for i, e in enumerate(e_snt):
                    pfj_ze += PAj.generate(
                        (j, i), e_snt, z, l, m) * PFj.generate(
                            (j, f), (j, i), e_snt, z, l, m)
                # P(f|z,e) = \prod_j P(f_j|z,e)
                log_pf_ze += np.log(pfj_ze)
            # \sum_z P(z) P(e|z) P(f|z,e)
            log_pfe = np.logaddexp(log_pfe, log_pz + log_pe_z + log_pf_ze)
        # \sum_{f,e} P(l)P(m)P(f,e|l,m)
        ll += log_pl + log_pm + log_pfe
    return -ll / e_corpus.n_sentences()
Example #3
0
    def __init__(self, e_corpus: Corpus,
                 f_corpus: Corpus,
                 name: str = "lexlr",
                 rng=np.random.RandomState(1234),
                 hidden=[100],
                 learning_rate=0.1,
                 max_iterations=100,
                 patience=10,
                 patience_increase=2,
                 improvement_threshold=0.995):
        """

        :param e_corpus: data we condition on
        :param f_corpus: data we generate
        :param name: name of the component
        :param rng: numpy random state
        :param hidden: dimensionality of hidden layers
        :param learning_rate: initial learning rate
        :param max_iterations: maximum number of updates
        :param patience: minimum number of updates
        :param patience_increase:
        :param improvement_threshold:
        """
        super(LRComponent, self).__init__(name, LexEventSpace(e_corpus, f_corpus))

        # TODO: generalise to batches?
        self._corpus_size = e_corpus.n_sentences()
        self._learning_rate = learning_rate
        self._max_iterations = max_iterations
        self._patience = patience
        self._patience_increase = patience_increase
        self._improvement_threshold = improvement_threshold

        # The event space determines the input and output dimensionality
        vE, vF = self.event_space.shape
        # TODO: Featurize(event_space)
        # for now my features are (English word identity concatenated with French word identity)
        # TODO: create a better matrix where we have
        # vE * vF rows but we have d1 + d2 + d3 columns where d1 is the E embedding, d2 is the F embedding and d3 is whatever else
        self._X = np.zeros((vE * vF, vE + vF), dtype=theano.config.floatX)
        for e, f in product(range(vE), range(vF)):
            self._X[e * vF + f, e] = 1.0
            self._X[e * vF + f, vE + f] = 1.0

        # Create MLP
        builder = NNBuilder(rng)
        # ... the embedding layer
        builder.add_layer(vE + vF, hidden[0])
        # ... additional hidden layers
        for di, do in zip(hidden, hidden[1:]):
            builder.add_layer(di, do)
        # The Logistic Regression adds the final scoring layer and is responsible for normalisation over vF classes
        self._nn = LR(builder, vE, vF)  # type: MLP

        # Create Theano variables for the MLP input
        nn_input = T.matrix('mlp_input')
        # ... and the expected output
        nn_expected = T.matrix('mlp_expected')
        learning_rate = T.scalar('learning_rate')

        # Learning rate and momentum hyperparameter values
        # Again, for non-toy problems these values can make a big difference
        # as to whether the network (quickly) converges on a good local minimum.
        #learning_rate = 0.01
        momentum = 0

        # Create a theano function for computing the MLP's output given some input
        self._nn_output = theano.function([nn_input], self._nn.output(nn_input))
        # Create a function for computing the cost of the network given an input
        cost = - self._nn.expected_logprob(nn_input, nn_expected)
        # Create a theano function for training the network
        self._train = theano.function([nn_input, nn_expected, learning_rate],
                                      # cost function
                                      cost,
                                      updates=gradient_updates_momentum(cost,
                                                                        self._nn.params,
                                                                        learning_rate,
                                                                        momentum))

        # table to store the CPDs (output of LR reshaped into a (vE, vF) matrix)
        self._cpds = self._nn_output(self._X).reshape(self.event_space.shape)
        # table to gather expected counts
        self._counts = np.zeros(self.event_space.shape, dtype=theano.config.floatX)
        self._i = 0
Example #4
0
    def __init__(self,
                 e_corpus: Corpus,
                 f_corpus: Corpus,
                 name: str = "lexmlp",
                 rng=np.random.RandomState(1234),
                 hidden=[100],
                 learning_rate=0.1,
                 max_iterations=100,
                 patience=10,
                 patience_increase=2,
                 improvement_threshold=0.995):
        """

        :param e_corpus: data we condition on
        :param f_corpus: data we generate
        :param name: name of the component
        :param rng: numpy random state
        :param hidden: dimensionality of hidden layers
        :param learning_rate: initial learning rate
        :param max_iterations: maximum number of updates
        :param patience: minimum number of updates
        :param patience_increase:
        :param improvement_threshold:
        """
        super(MLPComponent, self).__init__(name,
                                           LexEventSpace(e_corpus, f_corpus))

        # TODO: generalise to batches?
        self._corpus_size = e_corpus.n_sentences()
        self._learning_rate = learning_rate
        self._max_iterations = max_iterations
        self._patience = patience
        self._patience_increase = patience_increase
        self._improvement_threshold = improvement_threshold

        # The event space determines the input and output dimensionality
        self.n_input, self.n_output = self.event_space.shape
        # Input for the classifiers (TODO: should depend on the event space more closely)
        self._X = np.identity(self.n_input, dtype=theano.config.floatX)

        # Create MLP
        builder = NNBuilder(rng)
        # ... the embedding layer
        builder.add_layer(self.n_input, hidden[0])
        # ... additional hidden layers
        for di, do in zip(hidden, hidden[1:]):
            builder.add_layer(di, do)
        # ... and the output layer (a softmax layer)
        #builder.add_layer(hidden[-1], self.n_output, activation=T.nnet.softmax)
        # The MLP adds the softmax layer over n_classes
        self._mlp = MLP(builder, n_classes=self.n_output)  # type: MLP

        # Create Theano variables for the MLP input
        mlp_input = T.matrix('mlp_input')
        # ... and the expected output
        mlp_expected = T.matrix('mlp_expected')
        learning_rate = T.scalar('learning_rate')

        # Learning rate and momentum hyperparameter values
        # Again, for non-toy problems these values can make a big difference
        # as to whether the network (quickly) converges on a good local minimum.
        #learning_rate = 0.01
        momentum = 0

        # Create a theano function for computing the MLP's output given some input
        self._mlp_output = theano.function([mlp_input],
                                           self._mlp.output(mlp_input))

        # Create a function for computing the cost of the network given an input
        cost = -self._mlp.expected_logprob(mlp_input, mlp_expected)
        # Create a theano function for training the network
        self._train = theano.function(
            [mlp_input, mlp_expected, learning_rate],
            # cost function
            cost,
            updates=gradient_updates_momentum(cost, self._mlp.params,
                                              learning_rate, momentum))

        # table to store the CPDs (output of MLP)
        self._cpds = self._mlp_output(self._X)
        # table to gather expected counts
        self._counts = np.zeros(self.event_space.shape,
                                dtype=theano.config.floatX)
        self._i = 0
Example #5
0
    def __init__(self, e_corpus: Corpus,
                 f_corpus: Corpus,
                 name: str = "lexmlp",
                 rng=np.random.RandomState(1234),
                 hidden=[100],
                 learning_rate=0.1,
                 max_iterations=100,
                 patience=10,
                 patience_increase=2,
                 improvement_threshold=0.995):
        """

        :param e_corpus: data we condition on
        :param f_corpus: data we generate
        :param name: name of the component
        :param rng: numpy random state
        :param hidden: dimensionality of hidden layers
        :param learning_rate: initial learning rate
        :param max_iterations: maximum number of updates
        :param patience: minimum number of updates
        :param patience_increase:
        :param improvement_threshold:
        """

        self._corpus_size = e_corpus.n_sentences()
        self._learning_rate = learning_rate
        self._max_iterations = max_iterations
        self._patience = patience
        self._patience_increase = patience_increase
        self._improvement_threshold = improvement_threshold

        # The event space determines the input and output dimensionality
        self.n_input, self.n_output = e_corpus.vocab_size(), f_corpus.vocab_size()
        # Input for the classifiers
        self._X = np.identity(self.n_input, dtype=theano.config.floatX)

        # Create MLP
        builder = NNBuilder(rng)
        # ... the embedding layer
        builder.add_layer(self.n_input, hidden[0])
        # ... additional hidden layers
        for di, do in zip(hidden, hidden[1:]):
            builder.add_layer(di, do)
        # The MLP adds a softmax layer over n_classes
        self._mlp = MLP(builder, n_classes=f_corpus.vocab_size())  # type: MLP

        # Create Theano variables for the MLP input
        mlp_input = T.matrix('mlp_input')
        # ... and the expected output
        mlp_expected = T.matrix('mlp_expected')
        learning_rate = T.scalar('learning_rate')

        # Learning rate and momentum hyperparameter values
        # Again, for non-toy problems these values can make a big difference
        # as to whether the network (quickly) converges on a good local minimum.
        #learning_rate = 0.01
        momentum = 0

        # Create a theano function for computing the MLP's output given some input
        self._mlp_output = theano.function([mlp_input], self._mlp.output(mlp_input))

        # Create a function for computing the cost of the network given an input
        cost = - self._mlp.expected_logprob(mlp_input, mlp_expected)

        # Create a theano function for training the network
        self._train = theano.function([mlp_input, mlp_expected, learning_rate],
                                      # cost function
                                      cost,
                                      updates=gradient_updates_momentum(cost,
                                                                        self._mlp.params,
                                                                        learning_rate,
                                                                        momentum))

        # table to store the CPDs (output of MLP)
        self._cpds = self._mlp_output(self._X)
        # table to gather expected counts
        self._counts = np.zeros((self.n_input, self.n_output), dtype=theano.config.floatX)
        self._i = 0