def marginal_likelihood(e_corpus: Corpus, f_corpus: Corpus, model: ConditionalModel): PL, PM, PAj, PFj = model.components ll = 0.0 for e_snt, f_snt in zip(e_corpus.itersentences(), f_corpus.itersentences()): # observations l = e_snt.shape[0] m = f_snt.shape[0] log_pl = np.log(PL.generate(l)) log_pm = np.log(PM.generate(m)) # P(f|e) = \prod_j P(f_j|e) # = \prod_j \sum_i P(f_j,a_j=i|e) log_pf_e = 0.0 for j, f in enumerate(f_snt): # P(f_j|e) = \sum_i P(f_j,a_j=i|e) pfj_e = 0.0 # contribution of this French word for i, e in enumerate(e_snt): # P(f_j, a_j=i | e) = P(a_j=i) P(f_j|e_i, l, m) pfj_e += PAj.generate((j, i), e_snt, 0, l, m) * PFj.generate((j, f), (j, i), e_snt, 0, l, m) # P(f|z,e) = \prod_j P(f_j|z,e) log_pf_e += np.log(pfj_e) # \sum_{f,e} P(l)P(m)P(f|e,l,m) ll += log_pl + log_pm + log_pf_e return - ll / e_corpus.n_sentences()
def marginal_likelihood(e_corpus: Corpus, f_corpus: Corpus, model: JointModel): PL, PM, PZ, PEi, PAj, PFj = model.components n_clusters = PZ.n_clusters ll = 0.0 for e_snt, f_snt in zip(e_corpus.itersentences(), f_corpus.itersentences()): # observations l = e_snt.shape[0] m = f_snt.shape[0] log_pl = np.log(PL.generate(l)) log_pm = np.log(PM.generate(m)) # 0-order alignments # P(f,e) = \sum_z P(z) P(e|z) P(f|z,e) log_pfe = -np.inf # contribution of this sentence for z in range(n_clusters): # contribution of the cluster log_pz = np.log(PZ.generate(z, l, m)) # compute the contribution of the entire English sentence log_pe_z = 0.0 # P(e|z) = \prod_i P(e_i|z) for i, e in enumerate(e_snt): log_pe_z += np.log(PEi.generate((i, e), z, l, m)) # P(f|z,e) = \prod_j P(f_j|z,e) # = \prod_j \sum_i P(f_j,a_j=i|z,e) log_pf_ze = 0.0 for j, f in enumerate(f_snt): # P(f_j|z,e) = \sum_i P(f_j,a_j=i|z,e) pfj_ze = 0.0 # contribution of this French word for i, e in enumerate(e_snt): pfj_ze += PAj.generate( (j, i), e_snt, z, l, m) * PFj.generate( (j, f), (j, i), e_snt, z, l, m) # P(f|z,e) = \prod_j P(f_j|z,e) log_pf_ze += np.log(pfj_ze) # \sum_z P(z) P(e|z) P(f|z,e) log_pfe = np.logaddexp(log_pfe, log_pz + log_pe_z + log_pf_ze) # \sum_{f,e} P(l)P(m)P(f,e|l,m) ll += log_pl + log_pm + log_pfe return -ll / e_corpus.n_sentences()
def __init__(self, e_corpus: Corpus, f_corpus: Corpus, name: str = "lexlr", rng=np.random.RandomState(1234), hidden=[100], learning_rate=0.1, max_iterations=100, patience=10, patience_increase=2, improvement_threshold=0.995): """ :param e_corpus: data we condition on :param f_corpus: data we generate :param name: name of the component :param rng: numpy random state :param hidden: dimensionality of hidden layers :param learning_rate: initial learning rate :param max_iterations: maximum number of updates :param patience: minimum number of updates :param patience_increase: :param improvement_threshold: """ super(LRComponent, self).__init__(name, LexEventSpace(e_corpus, f_corpus)) # TODO: generalise to batches? self._corpus_size = e_corpus.n_sentences() self._learning_rate = learning_rate self._max_iterations = max_iterations self._patience = patience self._patience_increase = patience_increase self._improvement_threshold = improvement_threshold # The event space determines the input and output dimensionality vE, vF = self.event_space.shape # TODO: Featurize(event_space) # for now my features are (English word identity concatenated with French word identity) # TODO: create a better matrix where we have # vE * vF rows but we have d1 + d2 + d3 columns where d1 is the E embedding, d2 is the F embedding and d3 is whatever else self._X = np.zeros((vE * vF, vE + vF), dtype=theano.config.floatX) for e, f in product(range(vE), range(vF)): self._X[e * vF + f, e] = 1.0 self._X[e * vF + f, vE + f] = 1.0 # Create MLP builder = NNBuilder(rng) # ... the embedding layer builder.add_layer(vE + vF, hidden[0]) # ... additional hidden layers for di, do in zip(hidden, hidden[1:]): builder.add_layer(di, do) # The Logistic Regression adds the final scoring layer and is responsible for normalisation over vF classes self._nn = LR(builder, vE, vF) # type: MLP # Create Theano variables for the MLP input nn_input = T.matrix('mlp_input') # ... and the expected output nn_expected = T.matrix('mlp_expected') learning_rate = T.scalar('learning_rate') # Learning rate and momentum hyperparameter values # Again, for non-toy problems these values can make a big difference # as to whether the network (quickly) converges on a good local minimum. #learning_rate = 0.01 momentum = 0 # Create a theano function for computing the MLP's output given some input self._nn_output = theano.function([nn_input], self._nn.output(nn_input)) # Create a function for computing the cost of the network given an input cost = - self._nn.expected_logprob(nn_input, nn_expected) # Create a theano function for training the network self._train = theano.function([nn_input, nn_expected, learning_rate], # cost function cost, updates=gradient_updates_momentum(cost, self._nn.params, learning_rate, momentum)) # table to store the CPDs (output of LR reshaped into a (vE, vF) matrix) self._cpds = self._nn_output(self._X).reshape(self.event_space.shape) # table to gather expected counts self._counts = np.zeros(self.event_space.shape, dtype=theano.config.floatX) self._i = 0
def __init__(self, e_corpus: Corpus, f_corpus: Corpus, name: str = "lexmlp", rng=np.random.RandomState(1234), hidden=[100], learning_rate=0.1, max_iterations=100, patience=10, patience_increase=2, improvement_threshold=0.995): """ :param e_corpus: data we condition on :param f_corpus: data we generate :param name: name of the component :param rng: numpy random state :param hidden: dimensionality of hidden layers :param learning_rate: initial learning rate :param max_iterations: maximum number of updates :param patience: minimum number of updates :param patience_increase: :param improvement_threshold: """ super(MLPComponent, self).__init__(name, LexEventSpace(e_corpus, f_corpus)) # TODO: generalise to batches? self._corpus_size = e_corpus.n_sentences() self._learning_rate = learning_rate self._max_iterations = max_iterations self._patience = patience self._patience_increase = patience_increase self._improvement_threshold = improvement_threshold # The event space determines the input and output dimensionality self.n_input, self.n_output = self.event_space.shape # Input for the classifiers (TODO: should depend on the event space more closely) self._X = np.identity(self.n_input, dtype=theano.config.floatX) # Create MLP builder = NNBuilder(rng) # ... the embedding layer builder.add_layer(self.n_input, hidden[0]) # ... additional hidden layers for di, do in zip(hidden, hidden[1:]): builder.add_layer(di, do) # ... and the output layer (a softmax layer) #builder.add_layer(hidden[-1], self.n_output, activation=T.nnet.softmax) # The MLP adds the softmax layer over n_classes self._mlp = MLP(builder, n_classes=self.n_output) # type: MLP # Create Theano variables for the MLP input mlp_input = T.matrix('mlp_input') # ... and the expected output mlp_expected = T.matrix('mlp_expected') learning_rate = T.scalar('learning_rate') # Learning rate and momentum hyperparameter values # Again, for non-toy problems these values can make a big difference # as to whether the network (quickly) converges on a good local minimum. #learning_rate = 0.01 momentum = 0 # Create a theano function for computing the MLP's output given some input self._mlp_output = theano.function([mlp_input], self._mlp.output(mlp_input)) # Create a function for computing the cost of the network given an input cost = -self._mlp.expected_logprob(mlp_input, mlp_expected) # Create a theano function for training the network self._train = theano.function( [mlp_input, mlp_expected, learning_rate], # cost function cost, updates=gradient_updates_momentum(cost, self._mlp.params, learning_rate, momentum)) # table to store the CPDs (output of MLP) self._cpds = self._mlp_output(self._X) # table to gather expected counts self._counts = np.zeros(self.event_space.shape, dtype=theano.config.floatX) self._i = 0
def __init__(self, e_corpus: Corpus, f_corpus: Corpus, name: str = "lexmlp", rng=np.random.RandomState(1234), hidden=[100], learning_rate=0.1, max_iterations=100, patience=10, patience_increase=2, improvement_threshold=0.995): """ :param e_corpus: data we condition on :param f_corpus: data we generate :param name: name of the component :param rng: numpy random state :param hidden: dimensionality of hidden layers :param learning_rate: initial learning rate :param max_iterations: maximum number of updates :param patience: minimum number of updates :param patience_increase: :param improvement_threshold: """ self._corpus_size = e_corpus.n_sentences() self._learning_rate = learning_rate self._max_iterations = max_iterations self._patience = patience self._patience_increase = patience_increase self._improvement_threshold = improvement_threshold # The event space determines the input and output dimensionality self.n_input, self.n_output = e_corpus.vocab_size(), f_corpus.vocab_size() # Input for the classifiers self._X = np.identity(self.n_input, dtype=theano.config.floatX) # Create MLP builder = NNBuilder(rng) # ... the embedding layer builder.add_layer(self.n_input, hidden[0]) # ... additional hidden layers for di, do in zip(hidden, hidden[1:]): builder.add_layer(di, do) # The MLP adds a softmax layer over n_classes self._mlp = MLP(builder, n_classes=f_corpus.vocab_size()) # type: MLP # Create Theano variables for the MLP input mlp_input = T.matrix('mlp_input') # ... and the expected output mlp_expected = T.matrix('mlp_expected') learning_rate = T.scalar('learning_rate') # Learning rate and momentum hyperparameter values # Again, for non-toy problems these values can make a big difference # as to whether the network (quickly) converges on a good local minimum. #learning_rate = 0.01 momentum = 0 # Create a theano function for computing the MLP's output given some input self._mlp_output = theano.function([mlp_input], self._mlp.output(mlp_input)) # Create a function for computing the cost of the network given an input cost = - self._mlp.expected_logprob(mlp_input, mlp_expected) # Create a theano function for training the network self._train = theano.function([mlp_input, mlp_expected, learning_rate], # cost function cost, updates=gradient_updates_momentum(cost, self._mlp.params, learning_rate, momentum)) # table to store the CPDs (output of MLP) self._cpds = self._mlp_output(self._X) # table to gather expected counts self._counts = np.zeros((self.n_input, self.n_output), dtype=theano.config.floatX) self._i = 0