Python Dictionary.pad 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: pytorch_translate.dictionary

클래스/타입: Dictionary

메소드/함수: pad

hotexamples.com에서의 예제들: 4

Python Dictionary.pad - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 pytorch_translate.dictionary.Dictionary.pad에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

eos(4)

pad(4)

build_vocab_file_if_nonexistent(3)

Dictionary(2)

add_symbol(1)

예제 #1

파일 보기

파일: data.py 프로젝트: planb-hakone/translate

def make_language_pair_dataset_from_text(
    source_text_file: str,
    target_text_file: str,
    source_dict: pytorch_translate_dictionary.Dictionary,
    target_dict: pytorch_translate_dictionary.Dictionary,
    append_eos: Optional[bool] = False,
    reverse_source: Optional[bool] = True,
    char_source_dict: Optional[pytorch_translate_dictionary.Dictionary] = None,
) -> data.LanguagePairDataset:
    dst_dataset = indexed_dataset.IndexedRawTextDataset(
        path=target_text_file,
        dictionary=target_dict,
        # We always append EOS to the target sentence since we still want
        # the model to output an indication the sentence has finished, even
        # if we don't append the EOS symbol to the source sentence
        # (to prevent the model from misaligning UNKs or other words
        # to the frequently occurring EOS).
        append_eos=True,
        # We don't reverse the order of the target sentence, since
        # even if the source sentence is fed to the model backwards,
        # we still want the model to start outputting from the first word.
        reverse_order=False,
    )

    if char_source_dict is not None:
        src_dataset = char_data.InMemoryNumpyWordCharDataset()
        src_dataset.parse(
            path=source_text_file,
            word_dict=source_dict,
            char_dict=char_source_dict,
            reverse_order=reverse_source,
            append_eos=append_eos,
        )
        return char_data.LanguagePairSourceCharDataset(
            src=src_dataset,
            dst=dst_dataset,
            pad_idx=source_dict.pad(),
            eos_idx=source_dict.eos(),
        )
    else:
        return data.LanguagePairDataset(
            src=indexed_dataset.IndexedRawTextDataset(
                path=source_text_file,
                dictionary=source_dict,
                append_eos=append_eos,
                reverse_order=reverse_source,
            ),
            dst=dst_dataset,
            pad_idx=source_dict.pad(),
            eos_idx=source_dict.eos(),
        )

예제 #2

파일 보기

파일: data.py 프로젝트: planb-hakone/translate

def make_language_pair_dataset_from_text_multilingual(
    source_text_file: str,
    target_text_file: str,
    source_lang_id: int,
    target_lang_id: int,
    source_dict: pytorch_translate_dictionary.Dictionary,
    target_dict: pytorch_translate_dictionary.Dictionary,
    append_eos: Optional[bool] = False,
    reverse_source: Optional[bool] = True,
) -> data.LanguagePairDataset:
    return data.LanguagePairDataset(
        src=IndexedRawTextDatasetWithLangId(
            path=source_text_file,
            dictionary=source_dict,
            lang_id=source_lang_id,
            append_eos=append_eos,
            reverse_order=reverse_source,
            prepend_language_id=False,
        ),
        dst=IndexedRawTextDatasetWithLangId(
            path=target_text_file,
            dictionary=target_dict,
            lang_id=target_lang_id,
            append_eos=True,
            reverse_order=False,
            prepend_language_id=True,
        ),
        pad_idx=source_dict.pad(),
        eos_idx=source_dict.eos(),
    )

예제 #3

파일 보기

파일: data.py 프로젝트: vvvictorlee/translate

def make_language_pair_dataset(
    source_file: str,
    target_file: str,
    source_dict: pytorch_translate_dictionary.Dictionary,
    target_dict: pytorch_translate_dictionary.Dictionary,
    args: Optional[argparse.Namespace] = None,
) -> data.LanguagePairDataset:
    return data.LanguagePairDataset(
        src=indexed_dataset.IndexedRawTextDataset(
            path=source_file,
            dictionary=source_dict,
            append_eos=args.append_eos_to_source if args is not None else True,
            reverse_order=args.reverse_source if args is not None else False,
        ),
        dst=indexed_dataset.IndexedRawTextDataset(
            path=target_file,
            dictionary=target_dict,
            # We always append EOS to the target sentence since we still want
            # the model to output an indication the sentence has finished, even
            # if we don't append the EOS symbol to the source sentence
            # (to prevent the model from misaligning UNKs or other words
            # to the frequently occurring EOS).
            append_eos=True,
            # We don't reverse the order of the target sentence, since
            # even if the source sentence is fed to the model backwards,
            # we still want the model to start outputting from the first word.
            reverse_order=False,
        ),
        pad_idx=source_dict.pad(),
        eos_idx=source_dict.eos(),
    )

예제 #4

파일 보기

def make_multisource_language_pair_dataset_from_text(
    source_text_files: List[str],
    target_text_file: str,
    source_dict: pytorch_translate_dictionary.Dictionary,
    target_dict: pytorch_translate_dictionary.Dictionary,
    append_eos: Optional[bool] = False,
    reverse_source: Optional[bool] = True,
) -> MultisourceLanguagePairDataset:
    return MultisourceLanguagePairDataset(
        src=IndexedRawTextMultisentDataset(
            path=source_text_files,
            dictionary=source_dict,
            append_eos=append_eos,
            reverse_order=reverse_source,
        ),
        dst=indexed_dataset.IndexedRawTextDataset(
            path=target_text_file,
            dictionary=target_dict,
            # We always append EOS to the target sentence since we still want
            # the model to output an indication the sentence has finished, even
            # if we don't append the EOS symbol to the source sentence
            # (to prevent the model from misaligning UNKs or other words
            # to the frequently occurring EOS).
            append_eos=True,
            # We don't reverse the order of the target sentence, since
            # even if the source sentence is fed to the model backwards,
            # we still want the model to start outputting from the first word.
            reverse_order=False,
        ),
        pad_idx=source_dict.pad(),
        eos_idx=source_dict.eos(),
    )