コード例 #1
0
 def make_embedding(emb_hparams, token_to_id_map):
     r"""Optionally loads embedding from file (if provided), and returns
     an instance of :class:`texar.torch.data.Embedding`.
     """
     embedding = None
     if emb_hparams["file"] is not None and len(emb_hparams["file"]) > 0:
         embedding = Embedding(token_to_id_map, emb_hparams)
     return embedding
コード例 #2
0
    def make_embedding(src_emb_hparams,
                       src_token_to_id_map,
                       tgt_emb_hparams=None,
                       tgt_token_to_id_map=None,
                       emb_init_share=False):
        r"""Optionally loads source and target embeddings from files (if
        provided), and returns respective :class:`texar.torch.data.Embedding`
        instances.
        """
        src_embedding = MonoTextData.make_embedding(src_emb_hparams,
                                                    src_token_to_id_map)

        if emb_init_share:
            tgt_embedding = src_embedding
        else:
            tgt_emb_file = tgt_emb_hparams["file"]
            tgt_embedding = None
            if tgt_emb_file is not None and tgt_emb_file != "":
                tgt_embedding = Embedding(tgt_token_to_id_map, tgt_emb_hparams)

        return src_embedding, tgt_embedding
コード例 #3
0
def _default_mono_text_dataset_hparams():
    r"""Returns hyperparameters of a mono text dataset with default values.

    See :meth:`texar.torch.MonoTextData.default_hparams` for details.
    """
    return {
        "files": [],
        "compression_type": None,
        "vocab_file": "",
        "embedding_init": Embedding.default_hparams(),
        "delimiter": None,
        "max_seq_length": None,
        "length_filter_mode": "truncate",
        "pad_to_max_seq_length": False,
        "bos_token": SpecialTokens.BOS,
        "eos_token": SpecialTokens.EOS,
        "other_transformations": [],
        "variable_utterance": False,
        "utterance_delimiter": "|||",
        "max_utterance_cnt": 5,
        "data_name": None,
        "@no_typecheck": ["files"]
    }
コード例 #4
0
    def make_embedding(hparams: List[HParams], vocabs: List[Optional[Vocab]]) \
            -> List[Optional[Embedding]]:
        r"""Optionally loads embeddings from files (if provided), and
        returns respective :class:`texar.torch.data.Embedding` instances.
        """
        embeddings: List[Optional[Embedding]] = []
        for i, hparams_i in enumerate(hparams):
            if not _is_text_data(hparams_i.data_type):
                embeddings.append(None)
                continue

            emb_share = hparams_i.embedding_init_share_with
            if emb_share is not None:
                if emb_share >= i:
                    MultiAlignedData._raise_sharing_error(
                        i, emb_share, "embedding_init_share_with")
                if not embeddings[emb_share]:
                    raise ValueError(
                        f"Cannot share embedding with dataset {emb_share} "
                        "which does not have an embedding.")
                if emb_share != hparams_i.vocab_share_with:
                    raise ValueError(
                        "'embedding_init_share_with' != 'vocab_share_with'."
                        "'embedding_init' can be shared only when vocab is"
                        "shared.")
                emb = embeddings[emb_share]
            else:
                emb = None
                emb_file = hparams_i.embedding_init.file
                vocab = vocabs[i]
                if emb_file and emb_file != "":
                    assert vocab is not None
                    emb = Embedding(vocab.token_to_id_map_py,
                                    hparams_i.embedding_init)
            embeddings.append(emb)

        return embeddings