def run(): fine_tune_weights = fine_tune.fine_tune(lm, init_weights, seqs, num_epochs=1, batch_size=2, shuffle=False, learning_rate=0.001) return _compute_logprob(seqs, lm, fine_tune_weights).mean()
def run(n_copies): fine_tune_weights = fine_tune.fine_tune(lm, init_weights, np.tile( seq, [n_copies, 1]), num_epochs=1, batch_size=n_copies, shuffle=False, learning_rate=0.001) return _compute_logprob(seq, lm, fine_tune_weights).mean()
def test_fine_tuning_zero_learning_rate(self, model_cls): domain = _test_domain() seqs = domain.sample_uniformly(6, seed=0) lm = _get_lm(model_cls, domain=domain) init_weights = lm.get_weights() init_logprob = _compute_logprob(seqs, lm, init_weights).mean() fine_tune_weights = fine_tune.fine_tune( lm, init_weights, seqs, num_epochs=1, batch_size=2, learning_rate=0.) final_logprob = _compute_logprob(seqs, lm, fine_tune_weights).mean() self.assertAllClose(final_logprob, init_logprob)
def test_fine_tuning_with_zero_example_weight(self, model_cls): domain = _test_domain() lm = _get_lm(model_cls, domain=domain, use_dropout=False) init_weights = lm.get_weights() # Use 2 sequences of equal length, such that their contribution to # the loss (which is averages over non-pad tokens) is equal). seqs = domain.sample_uniformly(2, seed=0) # Fine tune with a weight of zero on the second example. fine_tune_weights_from_zero_weight = fine_tune.fine_tune( lm, init_weights, seqs, num_epochs=1, batch_size=2, example_weights=np.array([1., 0.]), shuffle=False, learning_rate=0.001) logprobs_from_zero_weight = _compute_logprob( seqs, lm, fine_tune_weights_from_zero_weight) # Fine tune using only the first example. Use an example weight of 2.0 # to compensate for halving the number of tokens that the per-token # loss is computed over vs. above. fine_tune_weights_from_one_example = fine_tune.fine_tune( lm, init_weights, seqs[:1], num_epochs=1, batch_size=1, example_weights=np.array([2.]), shuffle=False, learning_rate=0.001) logprobs_from_one_example = _compute_logprob( seqs, lm, fine_tune_weights_from_one_example) self.assertAllClose(logprobs_from_zero_weight, logprobs_from_one_example)
def test_fine_tuning_increases_likelihood(self): domain = _test_domain() seqs = domain.sample_uniformly(6, seed=0) lm = _get_lm(domain=domain) init_weights = lm.get_weights() init_logprob = _compute_logprob(seqs, lm, init_weights).mean() fine_tune_weights = fine_tune.fine_tune(lm, init_weights, seqs, num_epochs=1, batch_size=2, learning_rate=0.001) final_logprob = _compute_logprob(seqs, lm, fine_tune_weights).mean() self.assertGreater(final_logprob, init_logprob)
def test_fine_tuning_with_example_weights(self, model_cls): domain = _test_domain() seqs = domain.sample_uniformly(2, seed=0) lm = _get_lm(model_cls, domain=domain) init_weights = lm.get_weights() init_logprobs = _compute_logprob(seqs, lm, init_weights) # Select the sequence that has higher initial likelihood and push # down its likelihood and pull up the likelihood of the other sequence. if init_logprobs[0] > init_logprobs[1]: seqs = seqs[::-1] example_weights = np.array([-1., 1.]) fine_tune_weights = fine_tune.fine_tune( lm, init_weights, seqs, num_epochs=1, batch_size=2, example_weights=example_weights, learning_rate=0.001) final_logprobs = _compute_logprob(seqs, lm, fine_tune_weights) # Check that the ranking of their likelihoods is reversed. self.assertGreater(final_logprobs[1], final_logprobs[0])