Example #1
0
    def __init__(self,
                 opt,
                 datatype: str = 'train',
                 seed: Optional[int] = None):
        """
        Initalize the context generator.

        opt: only a 'datapath' key is required, to specify the ParlAI data folder
        """

        if seed is not None:
            self.rng = random.Random(seed)
        else:
            self.rng = random.Random()

        convai2_opt = Opt({'datapath': opt['datapath'], 'datatype': datatype})
        self.convai2_teacher = BothTeacher(convai2_opt)

        ed_opt = Opt({
            'datapath': opt['datapath'],
            'datatype': datatype,
            'train_experiencer_only': True,
        })
        # Specify train_experiencer_only = True because we want to ensure that the text
        # will correspond to a Speaker utterance and the label to a Listener response
        self.ed_teacher = EmpatheticDialoguesTeacher(ed_opt)

        wow_opt = Opt({'datapath': opt['datapath'], 'datatype': datatype})
        self.wow_teacher = WizardDialogKnowledgeTeacher(wow_opt)

        self.topic_to_persona_path = _topic_to_persona_path(opt)
        self.wow_topics_to_episode_idxes = self._setup_topics_to_episodes()
        self.persona_strings_to_wow_topics = self._setup_personas_to_topics()
Example #2
0
def cl_build_ref_agent(self):
    ref_model_file = self.opt['ref_model_file']
    if ref_model_file is None or ref_model_file.lower() == "none":
        raise RuntimeError("CL training requires reference model!")
    else:
        from parlai.core.agents import create_agent_from_opt_file
        ref_agent = create_agent_from_opt_file(
            Opt({'model_file': ref_model_file}))
        eval_ref_agent = create_agent_from_opt_file(
            Opt({'model_file': ref_model_file}))
        if ref_agent is None:
            raise RuntimeError(
                "Build reference model failed! check your `ref_model_file`:{}!"
                .format(ref_model_file))
        if self.id == ref_agent.id and dict_same(self, ref_agent):
            self.use_external_ref_model = False
        else:
            self.use_external_ref_model = True
        # No need to do this
        # # check dict
        # if self.dict.tok2ind != ref_agent.dict.tok2ind or self.dict.ind2tok != ref_agent.dict.ind2tok:
        #     raise RuntimeError("Reference model is using different dict!")

    self.eval_ref_agent = eval_ref_agent
    self.ref_agent = ref_agent
Example #3
0
    def test_beamsearch_contextblocking(self):
        """
        Test beamsearch context blocking.
        """

        agent = create_agent_from_model_file(
            'zoo:unittest/context_blocking/model')
        agent.observe({'text': '5 4 3 2', 'episode_done': True})
        assert agent.act()['text'] == '5 4 3 2'

        agent = create_agent_from_model_file(
            'zoo:unittest/context_blocking/model',
            Opt(beam_context_block_ngram=1))
        agent.observe({'text': '5 4 3 2', 'episode_done': True})
        text = agent.act()['text']
        assert '5' not in text
        assert '4' not in text
        assert '3' not in text
        assert '2' not in text

        agent = create_agent_from_model_file(
            'zoo:unittest/context_blocking/model',
            Opt(beam_context_block_ngram=2))
        agent.observe({'text': '5 4 3 2', 'episode_done': True})
        text = agent.act()['text']
        assert '5' in text
        assert '5 4' not in text
        assert '4 3' not in text
        assert '3 2' not in text
Example #4
0
        def get_tl(tmpdir):
            final_opt = Opt({
                'task': 'integration_tests',
                'datatype': 'valid',
                'validation_max_exs': 30,
                'short_final_eval': True,
            })
            final_opt.save(os.path.join(tmpdir, "final_opt.opt"))

            opt = Opt({
                'task':
                'integration_tests',
                'validation_max_exs':
                10,
                'model':
                'repeat_label',
                'model_file':
                os.path.join(tmpdir, 'model'),
                'short_final_eval':
                True,
                'num_epochs':
                1.0,
                'final_extra_opt':
                str(os.path.join(tmpdir, "final_opt.opt")),
            })
            parser = tms.setup_args()
            parser.set_params(**opt)
            popt = parser.parse_args([])
            for k, v in opt.items():
                popt[k] = v
            return tms.TrainLoop(popt)
Example #5
0
    def test_counts(self):

        with testing_utils.tempdir() as tmpdir:
            data_path = tmpdir

            # Check EmpatheticDialoguesTeacher, with multiple examples per episode
            opts_episodes_and_examples = [
                (
                    {'datatype': 'train'},
                    EPISODE_COUNTS['train_both_sides'],
                    EXAMPLE_COUNTS['train_both_sides'],
                ),  # Test the default mode
                (
                    {'datatype': 'train', 'train_experiencer_only': True},
                    EPISODE_COUNTS['train_experiencer_only'],
                    EXAMPLE_COUNTS['train_experiencer_only'],
                ),
                (
                    {'datatype': 'train', 'train_experiencer_only': False},
                    EPISODE_COUNTS['train_both_sides'],
                    EXAMPLE_COUNTS['train_both_sides'],
                ),
                (
                    {'datatype': 'valid'},
                    EPISODE_COUNTS['valid'],
                    EXAMPLE_COUNTS['valid'],
                ),
                ({'datatype': 'test'}, EPISODE_COUNTS['test'], EXAMPLE_COUNTS['test']),
            ]
            for teacher_class in [EmpatheticDialoguesTeacher]:
                for opt, num_episodes, num_examples in opts_episodes_and_examples:
                    full_opt = Opt({**opt, 'datapath': data_path})
                    teacher = teacher_class(full_opt)
                    self.assertEqual(teacher.num_episodes(), num_episodes)
                    self.assertEqual(teacher.num_examples(), num_examples)

            # Check EmotionClassificationSituationTeacher, with one example per episode
            train_episode_count = EPISODE_COUNTS['train_experiencer_only']
            # For the situation classifier, we only want to have one episode per train
            # conversation
            opts_episodes = [
                ({'datatype': 'train'}, train_episode_count),  # Test the default mode
                (
                    {'datatype': 'train', 'train_experiencer_only': True},
                    train_episode_count,
                ),
                (
                    {'datatype': 'train', 'train_experiencer_only': False},
                    train_episode_count,
                ),
                ({'datatype': 'valid'}, EPISODE_COUNTS['valid']),
                ({'datatype': 'test'}, EPISODE_COUNTS['test']),
            ]
            for teacher_class in [EmotionClassificationSituationTeacher]:
                for opt, num_episodes in opts_episodes:
                    full_opt = Opt({**opt, 'datapath': data_path})
                    teacher = teacher_class(full_opt)
                    self.assertEqual(teacher.num_episodes(), num_episodes)
                    self.assertEqual(teacher.num_examples(), num_episodes)
Example #6
0
    def _test_iterate(self, teacher_class):
        for dt in [
            'train:ordered',
            'train:stream:ordered',
            'valid',
            'test',
            'valid:stream',
            'test:stream',
        ]:
            opt = Opt({'datatype': dt, 'datapath': '/tmp', 'task': 'test'})
            teacher = teacher_class(opt)

            self._verify_act(teacher.act(), 1, 2, False)
            self._verify_act(teacher.act(), 2, 4, False)
            self._verify_act(teacher.act(), 3, 6, True)

            self._verify_act(teacher.act(), 1, 2, False)
            self._verify_act(teacher.act(), 2, 4, False)
            self._verify_act(teacher.act(), 3, 6, True)

            self._verify_act(teacher.act(), 1, 2, False)
            self._verify_act(teacher.act(), 2, 4, False)
            self._verify_act(teacher.act(), 3, 6, True)

            assert teacher.epoch_done()
Example #7
0
 def test_save_load(self):
     o = Opt({'a': 3, 'b': 'foo'})
     with testing_utils.tempdir() as tmpdir:
         fn = os.path.join(tmpdir, "opt")
         o.save(fn)
         o2 = Opt.load(fn)
         assert o == o2
Example #8
0
    def test_init_from_from_checkpoint(self):
        with testing_utils.tempdir() as temp_dir:
            opt_from_file = {
                'datapath': 'dummy_path',
                'model': 'repeat_label',
                'init_model': os.path.join(temp_dir, 'something'),
                'model_file': os.path.join(temp_dir, 'something_else'),
            }
            opt = Opt({
                'datapath':
                'dummy_path',
                'model':
                'repeat_label',
                'init_model':
                os.path.join(temp_dir, 'something_else.checkpoint'),
                'model_file':
                os.path.join(temp_dir, 'something_else'),
                'load_from_checkpoint':
                True,
            })

            with open(os.path.join(temp_dir, 'something_else.opt'), 'w') as f:
                f.write(json.dumps(opt_from_file))

            agent = create_agent_from_opt_file(opt)
            init_model = agent.opt['init_model']
            # assert that the model was loaded with the correct checkpoitn
            assert '.checkpoint' in init_model
Example #9
0
    def test_allow_missing_init_opts(self):
        """
        Test --allow-missing-init-opts.
        """

        with testing_utils.tempdir() as temp_dir:

            init_opt_path = os.path.join(temp_dir, 'init_opt.opt')

            # Save a test opt file with an argument that doesn't exist
            init_opt = Opt({'made_up_arg': 'foo'})
            init_opt.save(init_opt_path)

            # Assert that the opt file normally can't be loaded in
            with self.assertRaises(RuntimeError):
                _ = ParlaiParser(True,
                                 True).parse_kwargs(init_opt=init_opt_path)

            # Assert that the opt file *can* be loaded in if we set
            # --allow-missing-init-opts, and assert that the made-up arg does not exist
            # in the opt
            opt = ParlaiParser(True,
                               True).parse_kwargs(init_opt=init_opt_path,
                                                  allow_missing_init_opts=True)
            self.assertFalse(hasattr(opt, 'made_up_arg'))
Example #10
0
    def add_extra_args(self, args=None):
        super().add_extra_args(args)
        parsed = vars(self.parse_known_args(args, nohelp=True)[0])
        # Also load extra args options if a file is given.
        if parsed.get("init_opt") is not None:
            try:
                self._load_known_opts(parsed.get("init_opt"), parsed)
            except FileNotFoundError:
                # don't die if -o isn't found here. See comment in second call
                # later on.
                pass
        parsed = self._infer_datapath(parsed)

        partial = Opt(parsed)

        for model in [
                "system_model",
                "user_model",
                "api_schema_grounding_model",
                "goal_grounding_model",
                "api_resp_model",
        ]:
            if (model in partial and partial[model] is not None
                    and len(partial[model]) > 0):
                self.add_model_subargs(partial[model], partial)

        for model_file_prefix in ["system", "user"]:
            key = model_file_prefix + "_model_file"
            if key in partial and partial[key] and len(partial[key]) > 0:
                model_name = self._get_model_name_from_model_file(key, partial)
                self.add_model_subargs(model_name, partial)
Example #11
0
 def test_gpt2_bpe_tokenize(self):
     with testing_utils.capture_output():
         opt = Opt({'dict_tokenizer': 'gpt2', 'datapath': './data'})
         agent = DictionaryAgent(opt)
     self.assertEqual(
         # grinning face emoji
         agent.gpt2_tokenize(u'Hello, ParlAI! \U0001f600'),
         [
             'Hello',
             ',',
             r'\xc4\xa0Par',
             'l',
             'AI',
             '!',
             r'\xc4\xa0\xc3\xb0\xc5\x81\xc4\xba',
             r'\xc4\xa2',
         ],
     )
     self.assertEqual(
         agent.vec2txt(agent.tok2ind[w] for w in [
             'Hello',
             ',',
             r'\xc4\xa0Par',
             'l',
             'AI',
             '!',
             r'\xc4\xa0\xc3\xb0\xc5\x81\xc4\xba',
             r'\xc4\xa2',
         ]),
         # grinning face emoji
         u'Hello, ParlAI! \U0001f600',
     )
 def run(self):
     """
     1) load model 2) generate embeddings 3) save embeddings.
     """
     self.use_cuda = not self.opt.get('no_cuda') and torch.cuda.is_available()
     overrides = {'interactive_mode': True, 'interactive_candidates': 'inline'}
     if self.opt['dpr_model']:
         overrides.update(
             {
                 'model': 'dpr_agent',
                 'model_file': self.opt['model_file'],
                 'override': {
                     'model': 'dpr_agent',
                     'interactive_candidates': 'inline',
                 },
             }
         )
         agent = create_agent(Opt(overrides))
     else:
         agent = create_agent_from_model_file(self.opt['model_file'], overrides)
     model = agent.model.module if hasattr(agent.model, 'module') else agent.model
     assert hasattr(model, 'encoder_cand') or hasattr(model, 'cand_encoder')
     assert isinstance(agent, TorchRankerAgent)
     passages = self.load_passages()
     data = self.encode_passages(agent, passages)
     self.save_data(data)
Example #13
0
    def _build_model(self, opt: Opt) -> Tuple[PolyEncoderModule, DictionaryAgent]:
        """
        Build poly-encoder module.

        :param opt:
            options from base RAG Model

        :return dropout poly-encoder:
            return dropout poly agent.
        """
        model_file = modelzoo_path(opt['datapath'], opt['poly_faiss_model_file'])
        model_opt = Opt.load(f'{model_file}.opt')

        create_model_opt = {
            **{k: model_opt[k] for k in TRANSFORMER_RANKER_BASE_OPT},
            **{k: model_opt[k] for k in POLYENCODER_OPT_KEYS},
            'model': 'transformer/dropout_poly',
            'init_model': model_file,
            'dict_file': f'{model_file}.dict',
            # necessary opt args
            'multitask_weights': [1],
            # dropout_poly args
            'poly_dropout_reduction_type': model_opt['poly_dropout_reduction_type'],
            'poly_dropout_use_codes': model_opt.get('poly_dropout_use_codes', True),
        }
        logging.disable()
        agent = create_agent(Opt(create_model_opt))
        logging.enable()
        assert isinstance(agent, DropoutPolyAgent)
        return agent.model, agent.dict
Example #14
0
    def add_extra_args(self, args=None):
        """
        Add more args depending on how known args are set.
        """
        parsed = vars(self.parse_known_args(args, nohelp=True)[0])
        # Also load extra args options if a file is given.
        if parsed.get('init_opt') is not None:
            try:
                self._load_known_opts(parsed.get('init_opt'), parsed)
            except FileNotFoundError:
                # don't die if -o isn't found here. See comment in second call
                # later on.
                pass
        parsed = self._infer_datapath(parsed)

        partial = Opt(parsed)

        # find which image mode specified if any, and add additional arguments
        image_mode = parsed.get('image_mode', None)
        if image_mode is not None and image_mode != 'no_image_model':
            self.add_image_args(image_mode)

        # find which task specified if any, and add its specific arguments
        task = parsed.get('task', None)
        if task is not None:
            self.add_task_args(task, partial)
        evaltask = parsed.get('evaltask', None)
        if evaltask is not None:
            self.add_task_args(evaltask, partial)

        # find which model specified if any, and add its specific arguments
        model = get_model_name(parsed)
        if model is not None:
            self.add_model_subargs(model, partial)

        # add world args, if we know a priori which world is being used
        if task is not None:
            self.add_world_args(
                task,
                parsed.get('interactive_task', False),
                parsed.get('selfchat_task', False),
                partial,
            )

        # reparse args now that we've inferred some things.  specifically helps
        # with a misparse of `-opt` as `-o pt`, which causes opt loading to
        # try to load the file "pt" which doesn't exist.
        # After adding model arguments, -opt becomes known (it's in TorchAgent),
        # and we parse the `-opt` value correctly.
        parsed = vars(self.parse_known_args(args, nohelp=True)[0])
        if parsed.get('init_opt') is not None:
            self._load_known_opts(parsed.get('init_opt'), parsed)

        # reset parser-level defaults over any model-level defaults
        try:
            self.set_defaults(**self._defaults)
        except AttributeError:
            raise RuntimeError('Please file an issue on github that argparse '
                               'got an attribute error when parsing.')
Example #15
0
    def _process_args_to_opts(self,
                              args_that_override: Optional[List[str]] = None):
        self.opt = Opt(vars(self.args))

        # custom post-parsing
        self.opt['parlai_home'] = self.parlai_home
        self.opt = self._infer_datapath(self.opt)

        # set all arguments specified in command line as overridable
        option_strings_dict = {}
        store_true = []
        store_false = []
        for group in self._action_groups:
            for a in group._group_actions:
                if hasattr(a, 'option_strings'):
                    for option in a.option_strings:
                        option_strings_dict[option] = a.dest
                        if '_StoreTrueAction' in str(type(a)):
                            store_true.append(option)
                        elif '_StoreFalseAction' in str(type(a)):
                            store_false.append(option)

        if args_that_override is None:
            args_that_override = _sys.argv[1:]

        for i in range(len(args_that_override)):
            if args_that_override[i] in option_strings_dict:
                if args_that_override[i] in store_true:
                    self.overridable[option_strings_dict[
                        args_that_override[i]]] = True
                elif args_that_override[i] in store_false:
                    self.overridable[option_strings_dict[
                        args_that_override[i]]] = False
                elif (i < len(args_that_override) - 1 and
                      args_that_override[i + 1] not in option_strings_dict):
                    key = option_strings_dict[args_that_override[i]]
                    self.overridable[key] = self.opt[key]
        self.opt['override'] = self.overridable

        # load opts if a file is provided.
        if self.opt.get('init_opt', None) is not None:
            self._load_opts(self.opt)

        # map filenames that start with 'zoo:' to point to the model zoo dir
        options_to_change = {
            'model_file', 'dict_file', 'bpe_vocab', 'bpe_merge'
        }
        for each_key in options_to_change:
            if self.opt.get(each_key) is not None:
                self.opt[each_key] = modelzoo_path(self.opt.get('datapath'),
                                                   self.opt[each_key])
            if self.opt['override'].get(each_key) is not None:
                # also check override
                self.opt['override'][each_key] = modelzoo_path(
                    self.opt.get('datapath'), self.opt['override'][each_key])

        # add start time of an experiment
        self.opt['starttime'] = datetime.datetime.today().strftime(
            '%b%d_%H-%M')
Example #16
0
 def test_save_withignore(self):
     o = Opt({'a': 3, 'b': 'foo', 'override': {'a': 3}})
     with testing_utils.tempdir() as tmpdir:
         fn = os.path.join(tmpdir, "opt")
         o.save(fn)
         o2 = Opt.load(fn)
         assert o != o2
         assert 'override' not in o2
Example #17
0
def train(epochs = 5):
    opt = Opt({'num_epochs' : epochs,'datapath':datapath,'datatype':datatype})
    # set up timers
    train_time = Timer()
    validate_time = Timer()
    log_time = Timer()
    save_time = Timer()
    parleys = 0
Example #18
0
 def test_iter(self):
     opt = Opt({'datatype': 'valid', 'datapath': '/tmp', 'task': 'test'})
     teacher = TupleTeacher(opt)
     # twice to ensure we reset iterators correctly
     examples = list(teacher)
     assert len(examples) == 9
     examples = list(teacher)
     assert len(examples) == 9
Example #19
0
 def test_no_truncate(self):
     with self.assertRaises(ValueError):
         testing_utils.train_model(
             Opt({
                 **_DEFAULT_OPTIONS,
                 **{
                     'truncate': -1
                 }
             }))
def get_dictionary(PATH: str) -> DictionaryAgent:
    """
                    读取字典
                    :param PATH: 字典工具目录
                    :return 读取的字典
                    """
    opt = Opt()
    dictionary = DictionaryAgent(opt=opt)
    dictionary.load(PATH)
    return dictionary
Example #21
0
    def __init__(
        self,
        opt: Opt,
        dpr_model: str = 'bert',
        pretrained_path: str = DPR_ZOO_MODEL,
        encoder_type: str = 'query',
    ):
        # Override options
        try:
            config: BertConfig = BertConfig.from_pretrained(
                'bert-base-uncased')
        except OSError:
            config_path = PathManager.get_local_path(
                os.path.join(opt['datapath'], "bert_base_uncased",
                             self.CONFIG_PATH))
            config: BertConfig = BertConfig.from_pretrained(config_path)

        pretrained_path = modelzoo_path(opt['datapath'],
                                        pretrained_path)  # type: ignore
        if not os.path.exists(pretrained_path):
            # when initializing from parlai rag models, the pretrained path
            # may not longer exist. This is fine if we've already trained
            # the model.
            assert dpr_model == 'bert_from_parlai_rag'
            logging.error(f'Pretrained Path does not exist: {pretrained_path}')
            pretrained_path = modelzoo_path(opt['datapath'],
                                            DPR_ZOO_MODEL)  # type: ignore
            dpr_model = 'bert'
            logging.error(f'Setting to zoo model: {pretrained_path}')
        enc_opt = {
            "n_heads": config.num_attention_heads,
            "n_layers": config.num_hidden_layers,
            "embedding_size": config.hidden_size,
            "ffn_size": config.intermediate_size,
            "dropout": config.hidden_dropout_prob,
            "attention_dropout": config.attention_probs_dropout_prob,
            "activation": config.hidden_act,
            "variant": 'xlm',
            "reduction_type": 'first',
            "n_positions": config.max_position_embeddings,
            "n_segments": config.type_vocab_size,
        }
        embedding = torch.nn.Embedding(config.vocab_size,
                                       config.hidden_size,
                                       padding_idx=config.pad_token_id)
        super().__init__(
            Opt(enc_opt),
            vocabulary_size=config.vocab_size,
            padding_idx=config.pad_token_id,
            embedding=embedding,
            reduction_type='first',
        )

        self._load_state(opt['datapath'], dpr_model, pretrained_path,
                         encoder_type)
Example #22
0
    def test_safe_personas(self):

        base_kwargs = Opt({'datatype': 'train', 'task': 'blended_skill_talk'})
        safe_personas_only_to_count = {False: 4819, True: 3890}
        for safe_personas_only, count in safe_personas_only_to_count.items():
            full_kwargs = {**base_kwargs, 'safe_personas_only': safe_personas_only}
            parser = setup_args()
            parser.set_defaults(**full_kwargs)
            opt = parser.parse_args([])
            personas = _load_personas(opt)
            self.assertEqual(len(personas), count)
Example #23
0
 def _opt(self, **kwargs):
     return Opt(
         batchsize=4,
         optimizer='adam',
         n_layers=1,
         n_heads=4,
         ffn_size=16,
         embedding_size=16,
         skip_generation=True,
         **kwargs,
     )
Example #24
0
    def test_beamsearch_blocking(self):
        """
        Test beamsearch blocking.
        """
        with testing_utils.tempdir() as tmpdir:
            agent = create_agent_from_model_file(
                'zoo:unittest/beam_blocking/model')
            agent.observe({'text': '5 5 5 5 5 5 5', 'episode_done': True})
            assert agent.act()['text'] == '5 5 5 5 5 5 5'

            agent = create_agent_from_model_file(
                'zoo:unittest/beam_blocking/model', Opt(beam_block_ngram=1))
            agent.observe({'text': '5 5 5 5 5 5 5', 'episode_done': True})
            assert '5 5' not in agent.act()['text']

            agent = create_agent_from_model_file(
                'zoo:unittest/beam_blocking/model', Opt(beam_block_ngram=2))
            agent.observe({'text': '5 5 5 5 5 5 5', 'episode_done': True})
            assert '5 5 5' not in agent.act()['text']

            with open(os.path.join(tmpdir, 'blocklist.txt'), 'w') as f:
                f.write("38\n62\n34 34\n")

            agent = create_agent_from_model_file(
                'zoo:unittest/beam_blocking/model',
                Opt(beam_block_list_filename=os.path.join(
                    tmpdir, 'blocklist.txt')),
            )
            agent.observe({'text': '4 4 4', 'episode_done': True})
            assert agent.act()['text'] == '4 4 4'

            agent.observe({'text': '38 38 38', 'episode_done': True})
            assert '38' not in agent.act()['text']

            agent.observe({'text': '62 62 62', 'episode_done': True})
            assert '62' not in agent.act()['text']

            agent.observe({'text': '34 34 34', 'episode_done': True})
            text = agent.act()['text']
            assert '34' in text
            assert '34 34' not in text
Example #25
0
    def test_multitask(self):
        """
        Test that model correctly handles multiple inputs.

        Random chance is 10%, so this should be able to get much better than that very
        quickly.
        """
        args = Opt({**self.base_args, **self.multitask_args})
        valid, test = testing_utils.train_model(args)
        assert (
            valid['accuracy'] > 0.2
        ), f'ImagePolyencoderAgent val-set accuracy on a simple task was {valid["accuracy"].value():0.2f}.'
Example #26
0
 def test_nodatafile(self):
     for dt in [
             'train:ordered',
             'train:stream:ordered',
             'valid',
             'test',
             'valid:stream',
             'test:stream',
     ]:
         opt = Opt({'datatype': dt, 'datapath': '/tmp', 'task': 'test'})
         with self.assertRaises(KeyError):
             NoDatafileTeacher(opt)
Example #27
0
    def test_opt(self):
        opt = {'x': 0}
        opt = Opt(opt)
        opt['x'] += 1
        opt['x'] = 10
        history = opt.history['x']
        self.assertEqual(history[0][1], 1, 'History not set properly')
        self.assertEqual(history[1][1], 10, 'History not set properly')

        opt_copy = deepcopy(opt)
        history = opt_copy.history['x']
        self.assertEqual(history[0][1], 1, 'Deepcopy history not set properly')
        self.assertEqual(history[1][1], 10, 'Deepcopy history not set properly')
 def test_gpt2_bpe_tokenize(self):
     opt = Opt({'dict_tokenizer': 'gpt2', 'datapath': './data'})
     agent = DictionaryAgent(opt)
     self.assertEqual(
         # grinning face emoji
         agent.gpt2_tokenize(u'Hello, ParlAI! \U0001f600'),
         GPT2_BPE_RESULT,
     )
     self.assertEqual(
         agent.vec2txt(agent.tok2ind[w] for w in GPT2_BPE_RESULT),
         # grinning face emoji
         u'Hello, ParlAI! \U0001f600',
     )
Example #29
0
    def test_asymmetry(self):
        opt = Opt({'model': 'transformer/generator', 'n_layers': 1})
        agent = create_agent(opt)
        self.assertEqual(agent.model.encoder.n_layers, 1)
        self.assertEqual(agent.model.decoder.n_layers, 1)

        opt = Opt({
            'model': 'transformer/generator',
            'n_layers': 1,
            'n_encoder_layers': 2
        })
        agent = create_agent(opt)
        self.assertEqual(agent.model.encoder.n_layers, 2)
        self.assertEqual(agent.model.decoder.n_layers, 1)

        opt = Opt({
            'model': 'transformer/generator',
            'n_layers': 1,
            'n_encoder_layers': 2,
            'n_decoder_layers': 4,
        })
        agent = create_agent(opt)
        self.assertEqual(agent.model.encoder.n_layers, 2)
        self.assertEqual(agent.model.decoder.n_layers, 4)

        opt = Opt({
            'model': 'transformer/generator',
            'n_layers': 1,
            'n_decoder_layers': 4
        })
        agent = create_agent(opt)
        self.assertEqual(agent.model.encoder.n_layers, 1)
        self.assertEqual(agent.model.decoder.n_layers, 4)

        opt = Opt({'model': 'transformer/generator'})
        agent = create_agent(opt)
        self.assertEqual(agent.model.encoder.n_layers, 2)
        self.assertEqual(agent.model.decoder.n_layers, 2)
Example #30
0
 def test_gpt2_bpe_tokenize(self):
     datapath = ParlaiParser().parse_args([], print_args=False)['datapath']
     opt = Opt({'dict_tokenizer': 'gpt2', 'datapath': datapath})
     agent = DictionaryAgent(opt)
     self.assertEqual(
         # grinning face emoji
         agent.gpt2_tokenize(u'Hello, ParlAI! \U0001f600'),
         GPT2_BPE_RESULT,
     )
     self.assertEqual(
         agent.vec2txt(agent.tok2ind[w] for w in GPT2_BPE_RESULT),
         # grinning face emoji
         u'Hello, ParlAI! \U0001f600',
     )