def make_embedding(emb_hparams, token_to_id_map): r"""Optionally loads embedding from file (if provided), and returns an instance of :class:`texar.torch.data.Embedding`. """ embedding = None if emb_hparams["file"] is not None and len(emb_hparams["file"]) > 0: embedding = Embedding(token_to_id_map, emb_hparams) return embedding
def make_embedding(src_emb_hparams, src_token_to_id_map, tgt_emb_hparams=None, tgt_token_to_id_map=None, emb_init_share=False): r"""Optionally loads source and target embeddings from files (if provided), and returns respective :class:`texar.torch.data.Embedding` instances. """ src_embedding = MonoTextData.make_embedding(src_emb_hparams, src_token_to_id_map) if emb_init_share: tgt_embedding = src_embedding else: tgt_emb_file = tgt_emb_hparams["file"] tgt_embedding = None if tgt_emb_file is not None and tgt_emb_file != "": tgt_embedding = Embedding(tgt_token_to_id_map, tgt_emb_hparams) return src_embedding, tgt_embedding
def _default_mono_text_dataset_hparams(): r"""Returns hyperparameters of a mono text dataset with default values. See :meth:`texar.torch.MonoTextData.default_hparams` for details. """ return { "files": [], "compression_type": None, "vocab_file": "", "embedding_init": Embedding.default_hparams(), "delimiter": None, "max_seq_length": None, "length_filter_mode": "truncate", "pad_to_max_seq_length": False, "bos_token": SpecialTokens.BOS, "eos_token": SpecialTokens.EOS, "other_transformations": [], "variable_utterance": False, "utterance_delimiter": "|||", "max_utterance_cnt": 5, "data_name": None, "@no_typecheck": ["files"] }
def make_embedding(hparams: List[HParams], vocabs: List[Optional[Vocab]]) \ -> List[Optional[Embedding]]: r"""Optionally loads embeddings from files (if provided), and returns respective :class:`texar.torch.data.Embedding` instances. """ embeddings: List[Optional[Embedding]] = [] for i, hparams_i in enumerate(hparams): if not _is_text_data(hparams_i.data_type): embeddings.append(None) continue emb_share = hparams_i.embedding_init_share_with if emb_share is not None: if emb_share >= i: MultiAlignedData._raise_sharing_error( i, emb_share, "embedding_init_share_with") if not embeddings[emb_share]: raise ValueError( f"Cannot share embedding with dataset {emb_share} " "which does not have an embedding.") if emb_share != hparams_i.vocab_share_with: raise ValueError( "'embedding_init_share_with' != 'vocab_share_with'." "'embedding_init' can be shared only when vocab is" "shared.") emb = embeddings[emb_share] else: emb = None emb_file = hparams_i.embedding_init.file vocab = vocabs[i] if emb_file and emb_file != "": assert vocab is not None emb = Embedding(vocab.token_to_id_map_py, hparams_i.embedding_init) embeddings.append(emb) return embeddings