Example #1
0
    def setup_task(cls, args, **kwargs):
        """Setup the task (e.g., load dictionaries).

        Args:
            args (argparse.Namespace): parsed command-line arguments
        """
        paths = utils.split_paths(args['task']['data'])
        assert len(paths) > 0
        # load dictionaries
        # src_dict = cls.load_dictionary(os.path.join(paths[0], 'csnjs_8k_9995p_unigram_url.dict.txt'))
        src_dict = Dictionary(extra_special_symbols=[
            constants.CLS, constants.SEP, constants.MASK, constants.EOL,
            constants.URL
        ])
        src_dict.add_from_file(args['dataset']['srcdict'])
        tgt_dict = Dictionary.load(args['dataset']['tgtdict'])

        # src_dict = cls.load_dictionary(os.path.join(paths[0], '{}.dict.txt'.format(args['task']['source_lang'])))
        # tgt_dict = cls.load_dictionary(os.path.join(paths[0], '{}.dict.txt'.format(args['task']['target_lang'])))
        # assert src_dict.pad() == tgt_dict.pad()
        # assert src_dict.eos() == tgt_dict.eos()
        # assert src_dict.unk() == tgt_dict.unk()
        # LOGGER.info('[{}] dictionary: {} types'.format(args['task']['source_lang'], len(src_dict)))
        # LOGGER.info('[{}] dictionary: {} types'.format(args['task']['target_lang'], len(tgt_dict)))

        return cls(args, src_dict, tgt_dict)
Example #2
0
    def load_dictionary(cls, filename):
        """Load the dictionary from the filename

        Args:
            filename (str): the filename
        """
        if filename.endswith('.txt'):
            dictionary = Dictionary(extra_special_symbols=[
                constants.CLS, constants.SEP, constants.MASK, constants.EOL,
                constants.URL
            ])
            dictionary.add_from_file(filename)
        else:
            dictionary = Dictionary(extra_special_symbols=[
                constants.CLS, constants.SEP, constants.MASK, constants.EOL,
                constants.URL
            ]).add_from_json_file(filename)
        return dictionary