Python Dictionary.add_symbol 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: fairseq_stchde.data

클래스/타입: Dictionary

메소드/함수: add_symbol

hotexamples.com에서의 예제들: 5

Python Dictionary.add_symbol - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 fairseq_stchde.data.Dictionary.add_symbol에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

load(7)

Dictionary(6)

add_symbol(5)

add_file_to_dictionary(2)

index(2)

pad(2)

finalize(1)

pad_to_multiple_(1)

예제 #1

파일 보기

파일: multilingual_data_manager.py 프로젝트: dubelbog/st_ch_de

 def create_lang_dictionary(cls, langs):
     unk = "<unk>"
     # hack to remove symbols other than unk as they are not needed by lang dict
     lang_dict = Dictionary(pad=unk, eos=unk, unk=unk, bos=unk)
     for lang in langs:
         lang_dict.add_symbol(lang)
     return lang_dict

예제 #2

파일 보기

파일: dummy_masked_lm.py 프로젝트: dubelbog/st_ch_de

 def setup_task(cls, args, **kwargs):
     """Setup the task. """
     dictionary = Dictionary()
     for i in range(args.dict_size):
         dictionary.add_symbol("word{}".format(i))
     logger.info("dictionary: {} types".format(len(dictionary)))
     return cls(args, dictionary)

예제 #3

파일 보기

파일: dummy_mt.py 프로젝트: dubelbog/st_ch_de

    def setup_task(cls, args, **kwargs):
        """Setup the task. """
        dictionary = Dictionary()
        for i in range(args.dict_size):
            dictionary.add_symbol("word{}".format(i))
        logger.info("dictionary: {} types".format(len(dictionary)))

        args.max_source_positions = args.src_len + dictionary.pad() + 2
        args.max_target_positions = args.tgt_len + dictionary.pad() + 2

        return cls(args, dictionary)

예제 #4

파일 보기

class DummyLMTask(FairseqTask):
    def __init__(self, cfg: DummyLMConfig):
        super().__init__(cfg)

        # load dictionary
        self.dictionary = Dictionary()
        for i in range(cfg.dict_size):
            self.dictionary.add_symbol("word{}".format(i))
        self.dictionary.pad_to_multiple_(8)  # often faster if divisible by 8
        logger.info("dictionary: {} types".format(len(self.dictionary)))

        seq = torch.arange(cfg.tokens_per_sample +
                           1) + self.dictionary.pad() + 1

        self.dummy_src = seq[:-1]
        self.dummy_tgt = seq[1:]

    def load_dataset(self, split, epoch=1, combine=False, **kwargs):
        """Load a given dataset split.
        Args:
            split (str): name of the split (e.g., train, valid, test)
        """
        if self.cfg.batch_size is not None:
            bsz = self.cfg.batch_size
        else:
            bsz = max(1, self.cfg.max_tokens // self.cfg.tokens_per_sample)
        self.datasets[split] = DummyDataset(
            {
                "id": 1,
                "net_input": {
                    "src_tokens":
                    torch.stack([self.dummy_src for _ in range(bsz)]),
                    "src_lengths":
                    torch.full(
                        (bsz, ), self.cfg.tokens_per_sample, dtype=torch.long),
                },
                "target": torch.stack([self.dummy_tgt for _ in range(bsz)]),
                "nsentences": bsz,
                "ntokens": bsz * self.cfg.tokens_per_sample,
            },
            num_items=self.cfg.dataset_size,
            item_size=self.cfg.tokens_per_sample,
        )

    @property
    def source_dictionary(self):
        return self.dictionary

    @property
    def target_dictionary(self):
        return self.dictionary

예제 #5

파일 보기

def augment_dictionary(
    dictionary: Dictionary,
    language_list: List[str],
    lang_tok_style: str,
    langtoks_specs: Sequence[str] = (LangTokSpec.main.value, ),
    extra_data: Optional[Dict[str, str]] = None,
) -> None:
    for spec in langtoks_specs:
        for language in language_list:
            dictionary.add_symbol(
                get_lang_tok(lang=language,
                             lang_tok_style=lang_tok_style,
                             spec=spec))

    if lang_tok_style == LangTokStyle.mbart.value or (
            extra_data is not None
            and LangTokSpec.mono_dae.value in extra_data):
        dictionary.add_symbol("<mask>")