def __init__(self, params): """Initialize classifiers. """ super(ActionExecutor, self).__init__() self.params = params input_size = self.params["hidden_size"] if self.params["text_encoder"] == "transformer": input_size = self.params["word_embed_size"] self.action_net = self._get_classify_network(input_size, params["num_actions"]) if params["use_action_attention"]: self.attention_net = models.SelfAttention(input_size) # If multimodal input state is to be used. if self.params["use_multimodal_state"]: input_size += self.params["hidden_size"] # B : if belief state is to be used. if self.params["use_belief_state"]: input_size += self.params["hidden_size"] self.action_net = self._get_classify_network(input_size, params["num_actions"]) # Read action metadata. with open(params["metainfo_path"], "r") as file_id: action_metainfo = json.load(file_id)["actions"] action_dict = {ii["name"]: ii["id"] for ii in action_metainfo} self.action_metainfo = {ii["name"]: ii for ii in action_metainfo} self.action_map = loaders.Vocabulary(immutable=True, verbose=False) sorted_actions = sorted(action_dict.keys(), key=lambda x: action_dict[x]) self.action_map.set_vocabulary_state(sorted_actions) # Read action attribute metadata. with open(params["attr_vocab_path"], "r") as file_id: self.attribute_vocab = json.load(file_id) # Create classifiers for action attributes. self.classifiers = {} for key, val in self.attribute_vocab.items(): self.classifiers[key] = self._get_classify_network( input_size, len(val)) self.classifiers = nn.ModuleDict(self.classifiers) # Model multimodal state. if params["use_multimodal_state"]: if params["domain"] == "furniture": self.multimodal_embed = models.CarouselEmbedder(params) elif params["domain"] == "fashion": self.multimodal_embed = models.UserMemoryEmbedder(params) else: raise ValueError("Domain neither of furniture/fashion") # NOTE: Action output is modeled as multimodal state. if params["use_action_output"]: if params["domain"] == "furniture": self.action_output_embed = models.CarouselEmbedder(params) elif params["domain"] == "fashion": self.action_output_embed = models.UserMemoryEmbedder(params) else: raise ValueError("Domain neither of furniture/fashion") self.criterion_mean = nn.CrossEntropyLoss() self.criterion = nn.CrossEntropyLoss(reduction="none") self.criterion_multi = torch.nn.MultiLabelSoftMarginLoss()
def __init__(self, params): self.params = params # Load the dataset. raw_data = np.load(params["data_read_path"], allow_pickle=True) self.raw_data = raw_data[()] if self.params["encoder"] != "pretrained_transformer": self.words = loaders.Vocabulary() self.words.set_vocabulary_state( self.raw_data["vocabulary"]["word"]) # Aliases. self.start_token = self.words.index("<start>") self.end_token = self.words.index("<end>") self.pad_token = self.words.index("<pad>") self.unk_token = self.words.index("<unk>") else: from transformers import BertTokenizer self.words = BertTokenizer.from_pretrained( self.raw_data["vocabulary"]) # Aliases. self.start_token = self.words.added_tokens_encoder["[start]"] self.end_token = self.words.added_tokens_encoder["[end]"] self.pad_token = self.words.pad_token_id self.unk_token = self.words.unk_token_id self.words.word = self.words.convert_ids_to_tokens self.words.index = self.words.convert_tokens_to_ids # Read the metainfo for the dataset. with open(params["metainfo_path"], "r") as file_id: self.metainfo = json.load(file_id) self.action_map = { ii["name"]: ii["id"] for ii in self.metainfo["actions"] } # Read the attribute vocabulary for the dataset. with open(params["attr_vocab_path"], "r") as file_id: attribute_map = json.load(file_id) print("Loading attribute vocabularies..") self.attribute_map = {} for attr, attr_vocab in attribute_map.items(): self.attribute_map[attr] = loaders.Vocabulary(immutable=True, verbose=False) self.attribute_map[attr].set_vocabulary_state(attr_vocab) # Encode attribute supervision. for d_id, super_datum in enumerate( self.raw_data["action_supervision"]): for r_id, round_datum in enumerate(super_datum): if round_datum is None: continue if self.params["domain"] == "furniture": new_supervision = { key: self.attribute_map[key].index(val) for key, val in round_datum.items() if key in self.attribute_map } elif self.params["domain"] == "fashion": ATTRIBUTE_FIXES = { "embellishment": "embellishments", "hemlength": "hemLength" } new_supervision = {} for key, val in round_datum.items(): # No dictionary to map attributes to indices. # (Non-classification/categorical fields) if key not in self.attribute_map: continue # Encode each attribute -- multi-class classification. fixed_keys = [ ATTRIBUTE_FIXES.get(ii, ii) for ii in val ] new_supervision[key] = [ self.attribute_map[key].index(ii) if ii in self.attribute_map[key] else self.attribute_map[key].index("other") for ii in fixed_keys ] else: raise ValueError( "Domain must be either furniture/fashion!") self.raw_data["action_supervision"][d_id][ r_id] = new_supervision if self.params["domain"] == "furniture": if self.params["use_multimodal_state"]: # Read embeddings for furniture assets to model carousel state. self._prepare_carousel_states() if self.params["use_action_output"]: # Output for the actions. self._prepare_carousel_states(key="action_output_state") elif self.params["domain"] == "fashion": # Prepare embeddings for fashion items. self._prepare_asset_embeddings() else: raise ValueError("Domain must be either furniture/fashion!") # Additional data constructs (post-processing). if params["encoder"] == "memory_network": self._construct_fact() elif params["encoder"] == "tf_idf": self.compute_idf_features() super(DataloaderSIMMC, self).__init__()