def _build_vocab(self): data = self.source_text + self.target_text self.source_idx2token, self.source_token2idx, self.source_vocab_size = build_vocab( data, self.source_vocab_size, self.special_token_list) self.target_idx2token, self.target_token2idx, self.target_vocab_size = self.source_idx2token, self.source_token2idx, self.source_vocab_size entity_data = [[[entity.split() for entity in doc] for doc in group] for group in self.source_entity] self.source_entity_idx2token, self.source_entity_token2idx, _ = build_vocab( entity_data, self.source_vocab_size, []) self.source_relation_idx2token, self.source_relation_token2idx, _ = build_vocab( self.relation, self.source_vocab_size, [])
def _build_vocab(self): if self.share_vocab: assert self.source_vocab_size == self.target_vocab_size text_data = self.source_text + self.target_text self.source_idx2token, self.source_token2idx, self.source_vocab_size = build_vocab( text_data, self.source_vocab_size, self.special_token_list) self.target_idx2token, self.target_token2idx, self.target_vocab_size = self.source_idx2token, self.source_token2idx, self.source_vocab_size else: self.source_idx2token, self.source_token2idx, self.source_vocab_size = build_vocab( self.source_text, self.source_vocab_size, self.special_token_list) self.target_idx2token, self.target_token2idx, self.target_vocab_size = build_vocab( self.target_text, self.target_vocab_size, self.special_token_list)
def _build_vocab(self): self.source_key_idx2token, self.source_key_token2idx, self.source_key_vocab_size = build_vocab( self.source_key_text, self.source_vocab_size, self.special_token_list) data = self.source_value_text + self.target_text self.source_idx2token, self.source_token2idx, self.source_vocab_size = build_vocab( data, self.source_vocab_size, self.special_token_list) self.target_idx2token, self.target_token2idx, self.target_vocab_size = self.source_idx2token, self.source_token2idx, self.source_vocab_size
def _build_vocab(self): if self.share_vocab: assert self.source_language == self.target_language text_data = self.source_text_data + self.target_text_data self.source_idx2token, self.source_token2idx, self.max_source_vocab_size = build_vocab( text_data, self.max_source_vocab_size, self.special_token_list) self.target_idx2token, self.target_token2idx = self.source_idx2token, self.source_token2idx else: self.source_idx2token, self.source_token2idx, self.max_source_vocab_size = build_vocab( self.source_text_data, self.max_source_vocab_size, self.special_token_list) self.target_idx2token, self.target_token2idx, self.max_target_vocab_size = build_vocab( self.target_text_data, self.max_target_vocab_size, self.special_token_list)
def _build_vocab(self): self.idx2token, self.token2idx, self.max_vocab_size = build_vocab( self.text_data, self.max_vocab_size, self.special_token_list)
def _build_vocab(self): self.source_idx2token, self.source_token2idx = build_attribute_vocab( self.source_text) self.target_idx2token, self.target_token2idx, self.target_vocab_size = build_vocab( self.target_text, self.target_vocab_size, self.special_token_list)
def _build_vocab(self): text_data = self.group_text_data[0] + self.group_text_data[ 1] + self.group_text_data[2] self.idx2token, self.token2idx, self.max_vocab_size = build_vocab( text_data, self.max_vocab_size, self.special_token_list)
def _build_vocab(self): self.target_idx2token, self.target_token2idx, self.target_vocab_size = build_vocab( self.target_text, self.target_vocab_size, self.special_token_list)