Example #1
0
def test_learning_rate_resuming(self, args):
    mdl = args['model']
    with testing_utils.tempdir() as tmpdir:
        model_file = os.path.join(tmpdir, 'model')
        stdout1, valid1, test1 = testing_utils.train_model(
            dict(model_file=model_file, lr_scheduler='invsqrt', **args))
        stdout2, valid2, test2 = testing_utils.train_model(
            dict(model_file=model_file, lr_scheduler='invsqrt', **args))
        # make sure the number of updates is being tracked correctly
        self.assertGreater(
            valid2['num_updates'],
            valid1['num_updates'],
            '({}) Number of updates is not increasing'.format(mdl),
        )
        # make sure the learning rate is decreasing
        self.assertLess(
            valid2['lr'],
            valid1['lr'],
            '({}) Learning rate is not decreasing'.format(mdl),
        )
        # but make sure we're not loading the scheduler if we're fine
        # tuning
        stdout3, valid3, test3 = testing_utils.train_model(
            dict(
                init_model=os.path.join(tmpdir, 'model'),
                model_file=os.path.join(tmpdir, 'newmodel'),
                lr_scheduler='invsqrt',
                **args,
            ))
        self.assertEqual(
            valid3['num_updates'],
            valid1['num_updates'],
            '({}) Finetuning LR scheduler reset failed '
            '(num_updates).'.format(mdl),
        )
        self.assertEqual(
            valid3['lr'],
            valid1['lr'],
            '({}) Finetuning LR scheduler reset failed '
            '(lr).'.format(mdl),
        )
        # and make sure we're not loading the scheduler if it changes
        stdout4, valid4, test4 = testing_utils.train_model(
            dict(
                init_model=os.path.join(tmpdir, 'model'),
                model_file=os.path.join(tmpdir, 'newmodel2'),
                lr_scheduler='reduceonplateau',
                **args,
            ))
        self.assertEqual(
            valid4['num_updates'],
            valid1['num_updates'],
            '({}) LR scheduler change reset failed (num_updates).'
            '\n{}'.format(mdl, stdout4),
        )
        self.assertEqual(
            valid4['lr'],
            1e-3,
            '({}) LR is not correct in final resume.\n{}'.format(mdl, stdout4),
        )
    def test_resuming(self):
        with testing_utils.tempdir() as tmpdir:
            model_file = os.path.join(tmpdir, 'model')

            stdout1, valid1, test1 = testing_utils.train_model(
                dict(
                    model_file=model_file,
                    task='integration_tests:candidate',
                    model='transformer/ranker',
                    optimizer='adamax',
                    learningrate=7e-3,
                    batchsize=32,
                    num_epochs=1,
                    n_layers=1,
                    n_heads=1,
                    ffn_size=32,
                    embedding_size=32,
                    warmup_updates=1,
                    lr_scheduler='invsqrt',
                ))

            stdout2, valid2, test2 = testing_utils.train_model(
                dict(
                    model_file=model_file,
                    task='integration_tests:candidate',
                    model='transformer/ranker',
                    num_epochs=1,
                ))
            # make sure the number of updates is being tracked correctly
            self.assertGreater(valid2['num_updates'], valid1['num_updates'],
                               'Number of updates is not increasing')
            # make sure the learning rate is decreasing
            self.assertLess(valid2['lr'], valid1['lr'],
                            'Learning rate is not decreasing')
Example #3
0
    def test_generation(self):
        """This test uses a single-turn sequence repitition task."""
        stdout, valid, test = testing_utils.train_model(
            dict(
                task='integration_tests:nocandidate',
                model='seq2seq',
                learningrate=LR,
                batchsize=BATCH_SIZE,
                num_epochs=NUM_EPOCHS,
                numthreads=1,
                no_cuda=True,
                embeddingsize=16,
                hiddensize=16,
                rnn_class='gru',
                attention='general',
                gradient_clip=1.0,
                dropout=0.0,
                lookuptable='all',
            ))

        self.assertTrue(
            valid['ppl'] < 1.2,
            "valid ppl = {}\nLOG:\n{}".format(valid['ppl'], stdout))
        self.assertTrue(test['ppl'] < 1.2,
                        "test ppl = {}\nLOG:\n{}".format(test['ppl'], stdout))
Example #4
0
    def test_beamsearch(self):
        """Ensures beam search can generate the correct response"""
        stdout, valid, test = testing_utils.train_model(
            dict(
                task='integration_tests:nocandidate',
                model='seq2seq',
                learningrate=LR,
                batchsize=BATCH_SIZE,
                num_epochs=NUM_EPOCHS,
                numthreads=1,
                no_cuda=True,
                embeddingsize=16,
                hiddensize=16,
                rnn_class='gru',
                attention='general',
                gradient_clip=1.0,
                dropout=0.0,
                lookuptable='all',
                inference='beam',
                beam_size=4,
            ))

        self.assertTrue(
            valid['bleu'] > 0.95,
            "valid bleu = {}\nLOG:\n{}".format(valid['bleu'], stdout),
        )
        self.assertTrue(
            test['bleu'] > 0.95,
            "test bleu = {}\nLOG:\n{}".format(test['bleu'], stdout))
        self.assertTrue(
            valid['ppl'] < 1.2,
            "valid ppl = {}\nLOG:\n{}".format(valid['ppl'], stdout))
        self.assertTrue(test['ppl'] < 1.2,
                        "test ppl = {}\nLOG:\n{}".format(test['ppl'], stdout))
Example #5
0
    def test_labelcands_nomemnn(self):
        """This test uses a single-turn task, so doesn't test memories."""

        stdout, valid, test = testing_utils.train_model(
            dict(
                task='integration_tests:candidate',
                model='memnn',
                lr=LR,
                batchsize=BATCH_SIZE,
                num_epochs=NUM_EPOCHS,
                numthreads=1,
                no_cuda=True,
                embedding_size=32,
                gradient_clip=1.0,
                hops=1,
                position_encoding=True,
                use_time_features=False,
                memsize=0,
                rank_candidates=True,
            ))

        self.assertTrue(
            valid['hits@1'] > 0.95,
            "valid hits@1 = {}\nLOG:\n{}".format(valid['hits@1'], stdout))
        self.assertTrue(
            test['hits@1'] > 0.95,
            "test hits@1 = {}\nLOG:\n{}".format(test['hits@1'], stdout))
Example #6
0
    def test_hogwild_train(self):
        """Test the trainer eval with numthreads > 1 and batchsize in [1,2,3]."""
        opt = dict(
            task='tasks.repeat:RepeatTeacher:{}'.format(1),
            evaltask='tasks.repeat:RepeatTeacher:{}'.format(NUM_EXS),
            model='repeat_label',
            display_examples=False,
            num_epochs=10,
        )
        for nt in NUM_THREADS_CHOICES:
            for bs in BATCHSIZE_CHOICES:
                opt['num_threads'] = nt
                opt['batchsize'] = bs

                stdout, valid, test = testing_utils.train_model(opt)
                self.assertEqual(
                    valid['exs'],
                    NUM_EXS,
                    'LOG:\n{}'.format(stdout),
                )
                self.assertEqual(
                    test['exs'],
                    NUM_EXS,
                    'LOG:\n{}'.format(stdout),
                )
Example #7
0
    def test_alt_reduction(self):
        """Test a transformer ranker reduction method other than `mean`."""
        stdout, valid, test = testing_utils.train_model(
            dict(
                task='integration_tests:candidate',
                model='transformer/ranker',
                optimizer='adamax',
                learningrate=7e-3,
                batchsize=16,
                validation_every_n_epochs=5,
                validation_patience=2,
                n_layers=1,
                n_heads=4,
                ffn_size=64,
                embedding_size=32,
                candidates='batch',
                eval_candidates='inline',
                gradient_clip=0.5,
                variant='xlm',
                activation='gelu',
                reduction_type=
                'first',  # this is really what we're trying to test for
            ))

        self.assertGreaterEqual(
            valid['hits@1'],
            0.90,
            "valid hits@1 = {}\nLOG:\n{}".format(valid['hits@1'], stdout),
        )
        self.assertGreaterEqual(
            test['hits@1'],
            0.90,
            "test hits@1 = {}\nLOG:\n{}".format(test['hits@1'], stdout),
        )
Example #8
0
    def test_repeater(self):
        stdout, valid, test = testing_utils.train_model(
            dict(
                task='integration_tests:candidate',
                model='transformer/ranker',
                optimizer='adamax',
                learningrate=7e-3,
                batchsize=16,
                validation_every_n_epochs=5,
                validation_patience=2,
                n_layers=1,
                n_heads=4,
                ffn_size=64,
                embedding_size=32,
                candidates='batch',
                eval_candidates='inline',
                gradient_clip=0.5,
            ))

        self.assertGreaterEqual(
            valid['hits@1'],
            0.90,
            "valid hits@1 = {}\nLOG:\n{}".format(valid['hits@1'], stdout),
        )
        self.assertGreaterEqual(
            test['hits@1'],
            0.90,
            "test hits@1 = {}\nLOG:\n{}".format(test['hits@1'], stdout),
        )
Example #9
0
    def test_xlm(self):
        stdout, valid, test = testing_utils.train_model(
            dict(
                task='integration_tests:nocandidate',
                model='transformer/generator',
                optimizer='adamax',
                learningrate=7e-3,
                batchsize=32,
                num_epochs=20,
                n_layers=1,
                n_heads=1,
                ffn_size=32,
                embedding_size=32,
                beam_size=1,
                variant='xlm',
                activation='gelu',
                n_segments=8,  # doesn't do anything but still good to test
            ))

        self.assertLessEqual(
            valid['ppl'], 1.30,
            "valid ppl = {}\nLOG:\n{}".format(valid['ppl'], stdout))
        self.assertGreaterEqual(
            valid['bleu'],
            0.90,
            "valid blue = {}\nLOG:\n{}".format(valid['bleu'], stdout),
        )
        self.assertLessEqual(
            test['ppl'], 1.30,
            "test ppl = {}\nLOG:\n{}".format(test['ppl'], stdout))
        self.assertGreaterEqual(
            test['bleu'], 0.90,
            "test bleu = {}\nLOG:\n{}".format(test['bleu'], stdout))
Example #10
0
    def test_beamsearch(self):
        stdout, valid, test = testing_utils.train_model(
            dict(
                task='integration_tests:nocandidate',
                model='transformer/generator',
                optimizer='adamax',
                learningrate=7e-3,
                batchsize=32,
                num_epochs=20,
                n_layers=1,
                n_heads=1,
                ffn_size=32,
                embedding_size=32,
                beam_size=5,
            ))

        self.assertLessEqual(
            valid['ppl'], 1.20,
            "valid ppl = {}\nLOG:\n{}".format(valid['ppl'], stdout))
        self.assertGreaterEqual(
            valid['bleu'],
            0.95,
            "valid blue = {}\nLOG:\n{}".format(valid['bleu'], stdout),
        )
        self.assertLessEqual(
            test['ppl'], 1.20,
            "test ppl = {}\nLOG:\n{}".format(test['ppl'], stdout))
        self.assertGreaterEqual(
            test['bleu'], 0.95,
            "test bleu = {}\nLOG:\n{}".format(test['bleu'], stdout))
Example #11
0
    def test_labelcands_multi(self):
        """This test uses a multi-turn task and multithreading."""
        stdout, valid, test = testing_utils.train_model(
            dict(
                task='integration_tests:multiturn_candidate',
                model='memnn',
                lr=LR,
                batchsize=BATCH_SIZE,
                num_epochs=NUM_EPOCHS * 3,
                numthreads=4,
                no_cuda=True,
                embedding_size=32,
                gradient_clip=1.0,
                hops=2,
                position_encoding=False,
                use_time_features=True,
                memsize=5,
                rank_candidates=True,
            )
        )

        self.assertTrue(
            valid['hits@1'] > 0.95,
            "valid hits@1 = {}\nLOG:\n{}".format(valid['hits@1'], stdout),
        )
        self.assertTrue(
            test['hits@1'] > 0.95,
            "test hits@1 = {}\nLOG:\n{}".format(test['hits@1'], stdout),
        )
Example #12
0
 def test_crossencoder(self):
     stdout, valid, test = testing_utils.train_model(
         dict(
             task='convai2',
             model='bert_ranker/cross_encoder_ranker',
             num_epochs=0.002,
             batchsize=1,
             candidates="inline",
             type_optimization="all_encoder_layers",
             warmup_updates=100,
             text_truncate=32,
             label_truncate=32,
             validation_max_exs=20,
             short_final_eval=True,
         ))
     # The cross encoder reaches an interesting state MUCH faster
     # accuracy should be present and somewhere between 0.2 and 0.8
     # (large interval so that it doesn't flake.)
     self.assertGreaterEqual(
         test['accuracy'],
         0.03,
         'test accuracy = {}\nLOG:\n{}'.format(test['accuracy'], stdout),
     )
     self.assertLessEqual(
         test['accuracy'],
         0.8,
         'test accuracy = {}\nLOG:\n{}'.format(test['accuracy'], stdout),
     )
    def test_pyt_batchsort_train(self):
        """
        Tests the functionality of training with batchsort
        under the following conditions:

        1. -dt train --pytorch_preprocess False
        2. -dt train:stream --pytorch_preprocess False
        3. -dt train --pytorch_preprocess True --batch_sort_field text_vec
        """
        # Next, check that training works
        dt_and_preprocess = [('train', False), ('train:stream', False),
                             ('train', True)]
        for dt, preprocess in dt_and_preprocess:
            defaults = parser_defaults.copy()
            defaults['datatype'] = dt
            defaults['pytorch_preprocess'] = preprocess
            defaults['pytorch_teacher_batch_sort'] = True
            defaults['batchsize'] = 32
            if preprocess:
                defaults['batch_sort_field'] = 'text_vec'
            str_output, _, _ = testing_utils.train_model(defaults)
            self.assertTrue(
                solved_task(str_output),
                'Teacher could not teach seq2seq with batch sort '
                'and args {} and output {}'.format((dt, preprocess),
                                                   str_output))
    def test_multitasking_metrics(self):
        stdout, valid, test = testing_utils.train_model({
            'task':
            'integration_tests:candidate,'
            'integration_tests:multiturnCandidate',
            'model':
            'random_candidate',
            'num_epochs':
            0.5,
            'aggregate_micro':
            True,
        })

        task1_acc = valid['tasks']['integration_tests:candidate']['accuracy']
        task2_acc = valid['tasks']['integration_tests:multiturnCandidate'][
            'accuracy']
        total_acc = valid['accuracy']
        # task 2 is 4 times the size of task 1
        self.assertAlmostEqual(
            total_acc,
            (task1_acc + 4 * task2_acc) / 5,
            4,
            'Task accuracy is averaged incorrectly',
        )

        stdout, valid, test = testing_utils.train_model({
            'task':
            'integration_tests:candidate,'
            'integration_tests:multiturnCandidate',
            'model':
            'random_candidate',
            'num_epochs':
            0.5,
            'aggregate_micro':
            False,
        })
        task1_acc = valid['tasks']['integration_tests:candidate']['accuracy']
        task2_acc = valid['tasks']['integration_tests:multiturnCandidate'][
            'accuracy']
        total_acc = valid['accuracy']
        # metrics should be averaged equally across tasks
        self.assertAlmostEqual(
            total_acc,
            (task1_acc + task2_acc) / 2,
            4,
            'Task accuracy is averaged incorrectly',
        )
Example #15
0
 def test_fast_final_eval(self):
     stdout, valid, test = testing_utils.train_model({
         'task': 'integration_tests',
         'validation_max_exs': 10,
         'model': 'repeat_label',
         'short_final_eval': True,
         'num_epochs': 1.0,
     })
     self.assertEqual(valid['exs'], 10, 'Validation exs is wrong')
     self.assertEqual(test['exs'], 10, 'Test exs is wrong')
Example #16
0
    def test_train_batch_all(self):
        args = self._get_args()
        args['candidates'] = 'batch-all-cands'
        stdout, valid, test = testing_utils.train_model(args)
        threshold = self._get_threshold()

        self.assertGreaterEqual(
            valid['hits@1'],
            threshold,
            "valid hits@1 = {}\nLOG:\n{}".format(valid['hits@1'], stdout),
        )
 def test_topk(self):
     """Test topk generation."""
     # Topk is inherently stochastic, just ensure no crash.
     testing_utils.train_model(
         dict(
             task='integration_tests:nocandidate',
             model='transformer/generator',
             optimizer='adamax',
             learningrate=7e-3,
             batchsize=32,
             num_epochs=20,
             n_layers=1,
             n_heads=1,
             ffn_size=32,
             embedding_size=32,
             inference='topk',
             topk=5,
             beam_size=5,
         )
     )
Example #18
0
    def test_eval_inline(self):
        args = self._get_args()
        args['eval_candidates'] = 'inline'
        stdout, valid, test = testing_utils.train_model(args)
        threshold = self._get_threshold()

        self.assertGreaterEqual(
            valid['hits@1'],
            threshold,
            "valid hits@1 = {}\nLOG:\n{}".format(valid['hits@1'], stdout),
        )
Example #19
0
    def test_train_fixed(self):
        args = self._get_args()
        args['candidates'] = 'fixed'
        args['encode_candidate_vecs'] = False
        stdout, valid, test = testing_utils.train_model(args)
        threshold = self._get_threshold()

        self.assertGreaterEqual(
            valid['hits@1'],
            threshold,
            "valid hits@1 = {}\nLOG:\n{}".format(valid['hits@1'], stdout),
        )
Example #20
0
    def test_eval_vocab(self):
        args = self._get_args()
        args['eval_candidates'] = 'vocab'
        args['encode_candidate_vecs'] = True
        stdout, valid, test = testing_utils.train_model(args)

        # accuracy should be zero, none of the vocab candidates should be the
        # correct label
        self.assertEqual(
            valid['hits@100'],
            0,
            "valid hits@1 = {}\nLOG:\n{}".format(valid['hits@1'], stdout),
        )
    def test_resuming_reduce_on_plateau(self):
        """
        Reduce on Plateau can be tricky when combined with warmup.

        See: https://github.com/facebookresearch/ParlAI/pull/1812
        """
        with testing_utils.tempdir() as tmpdir:
            model_file = os.path.join(tmpdir, 'model')
            stdout1, valid1, test1 = testing_utils.train_model(
                dict(
                    model_file=model_file,
                    task='integration_tests:candidate',
                    model='transformer/ranker',
                    optimizer='adamax',
                    learningrate=7e-3,
                    batchsize=32,
                    num_epochs=1,
                    n_layers=1,
                    n_heads=1,
                    ffn_size=32,
                    embedding_size=32,
                    warmup_updates=1,
                    lr_scheduler='reduceonplateau',
                )
            )

            stdout2, valid2, test2 = testing_utils.train_model(
                dict(
                    model_file=model_file,
                    task='integration_tests:candidate',
                    model='transformer/ranker',
                    num_epochs=1,
                    lr_scheduler='reduceonplateau',
                )
            )
            # make sure the learning rate is decreasing
            self.assertGreater(
                valid2['lr'], 1e-5, 'Learning rate should not be that low when resuming'
            )
Example #22
0
    def test_eval_fixed(self):
        args = self._get_args()
        args['eval_candidates'] = 'fixed'
        args['encode_candidate_vecs'] = True
        args['ignore_bad_candidates'] = True
        stdout, valid, test = testing_utils.train_model(args)

        # none of the train candidates appear in evaluation, so should have
        # zero accuracy: this tests whether the fixed candidates were built
        # properly (i.e., only using candidates from the train set)
        self.assertEqual(
            valid['hits@1'],
            0,
            "valid hits@1 = {}\nLOG:\n{}".format(valid['hits@1'], stdout),
        )

        # now try again with a fixed candidate file that includes all possible
        # candidates
        teacher = CandidateTeacher({'datatype': 'train'})
        all_cands = teacher.train + teacher.val + teacher.test
        all_cands_str = '\n'.join([' '.join(x) for x in all_cands])

        with testing_utils.tempdir() as tmpdir:
            tmp_cands_file = os.path.join(tmpdir, 'all_cands.text')
            with open(tmp_cands_file, 'w') as f:
                f.write(all_cands_str)
            args['fixed_candidates_path'] = tmp_cands_file
            args['encode_candidate_vecs'] = False  # don't encode before training
            args['ignore_bad_candidates'] = False
            args['num_epochs'] = 20
            stdout, valid, test = testing_utils.train_model(args)
            self.assertGreaterEqual(
                valid['hits@100'],
                0.1,
                "valid hits@1 = {}\nLOG:\n{}".format(valid['hits@1'], stdout),
            )
 def test_badinput(self):
     """Ensures model doesn't crash on malformed inputs."""
     stdout, _, _ = testing_utils.train_model(dict(
         task='integration_tests:bad_example',
         model='seq2seq',
         learningrate=LR,
         batchsize=10,
         datatype='train:ordered:stream',
         num_epochs=1,
         numthreads=1,
         no_cuda=True,
         embeddingsize=16,
         hiddensize=16,
     ))
     self.assertIn('valid:{', stdout)
     self.assertIn('test:{', stdout)
Example #24
0
    def test_pyt_preprocess_train(self):
        """
        Test that the preprocess functionality works with the PytorchDataTeacher
        with a sample TorchAgent (here, the Seq2seq model).

        This tests whether an agent can train to completion with
        these preprocessed examples
        """
        defaults = integration_test_parser_defaults.copy()
        defaults['datatype'] = 'train'
        defaults['pytorch_preprocess'] = True
        str_output, valid, test = testing_utils.train_model(defaults)
        self.assertTrue(
            solved_task(str_output, valid, test),
            'Teacher could not teach seq2seq with preprocessed obs, output: {}'
            .format(str_output)
        )
Example #25
0
    def _pyt_train(self, datatype):
        """
        Integration test: ensure that pytorch data teacher can successfully
        teach Seq2Seq model to fully solve the babi:task10k:1 task.

        The Seq2Seq model can solve the babi:task10k:1 task with the normal
        ParlAI setup, and thus should be able to with a PytorchDataTeacher
        """
        defaults = integration_test_parser_defaults.copy()
        defaults['datatype'] = datatype
        defaults['shuffle'] = True  # for train:stream
        str_output, valid, test = testing_utils.train_model(defaults)
        self.assertTrue(
            solved_task(str_output, valid, test),
            'Teacher could not teach seq2seq with args: {}; here is str_output: {}'
            .format(defaults, str_output)
        )
Example #26
0
 def test_biencoder(self):
     stdout, valid, test = testing_utils.train_model(
         dict(
             task='convai2:LimitedSelfOriginalTeacher',
             model='bert_ranker/bi_encoder_ranker',
             num_epochs=1.0,
             batchsize=8,
             text_truncate=32,
         ))
     # can't conclude much from the biencoder after that little iterations.
     # accuracy should be present and somewhere between 0.01 and 0.2
     # basically it's still a random classifier
     self.assertGreaterEqual(
         test['accuracy'], 0.01,
         'test accuracy = {}\nLOG:\n{}'.format(test['accuracy'], stdout))
     self.assertLessEqual(
         test['accuracy'], 0.2,
         'test accuracy = {}\nLOG:\n{}'.format(test['accuracy'], stdout))
Example #27
0
 def test_biencoder(self):
     stdout, valid, test = testing_utils.train_model(
         dict(
             task='convai2',
             model='bert_ranker/bi_encoder_ranker',
             num_epochs=0.1,
             batchsize=8,
             learningrate=3e-4,
             text_truncate=32,
             validation_max_exs=20,
             short_final_eval=True,
         ))
     # can't conclude much from the biencoder after that little iterations.
     # this test will just make sure it hasn't crashed and the accuracy isn't
     # too high
     self.assertLessEqual(
         test['accuracy'], 0.5,
         'test accuracy = {}\nLOG:\n{}'.format(test['accuracy'], stdout))
Example #28
0
    def test_generation(self):
        stdout, valid, test = testing_utils.train_model(
            dict(
                task='integration_tests:nocandidate',
                model='fairseq',
                arch='lstm_wiseman_iwslt_de_en',
                lr=LR,
                batchsize=BATCH_SIZE,
                num_epochs=NUM_EPOCHS,
                rank_candidates=False,
                skip_generation=False,
            ))

        self.assertTrue(
            valid['ppl'] < 1.2,
            "valid ppl = {}\nLOG:\n{}".format(valid['ppl'], stdout))
        self.assertTrue(test['ppl'] < 1.2,
                        "test ppl = {}\nLOG:\n{}".format(test['ppl'], stdout))
Example #29
0
    def test_labelcands(self):
        stdout, valid, test = testing_utils.train_model(
            dict(
                task='integration_tests:candidate',
                model='fairseq',
                arch='lstm_wiseman_iwslt_de_en',
                lr=LR,
                batchsize=BATCH_SIZE,
                num_epochs=NUM_EPOCHS,
                rank_candidates=True,
                skip_generation=True,
            ))

        self.assertTrue(
            valid['hits@1'] > 0.95,
            "valid hits@1 = {}\nLOG:\n{}".format(valid['hits@1'], stdout))
        self.assertTrue(
            test['hits@1'] > 0.95,
            "test hits@1 = {}\nLOG:\n{}".format(test['hits@1'], stdout))
Example #30
0
 def test_biencoder(self):
     stdout, valid, test = testing_utils.train_model(
         dict(
             task='convai2',
             model='bert_ranker/bi_encoder_ranker',
             num_epochs=0.1,
             batchsize=8,
             learningrate=3e-4,
             text_truncate=32,
             validation_max_exs=20,
             short_final_eval=True,
         ))
     # can't conclude much from the biencoder after that little iterations.
     # accuracy should be present and somewhere between 0.01 and 0.2
     # basically it's still a random classifier
     self.assertGreaterEqual(
         test['accuracy'], 0.01,
         'test accuracy = {}\nLOG:\n{}'.format(test['accuracy'], stdout))
     self.assertLessEqual(
         test['accuracy'], 0.5,
         'test accuracy = {}\nLOG:\n{}'.format(test['accuracy'], stdout))