def __init__(self, params): """Initialize classifiers. """ super(ActionExecutor, self).__init__() self.params = params input_size = self.params["hidden_size"] if self.params["text_encoder"] == "transformer": input_size = self.params["word_embed_size"] self.action_net = self._get_classify_network(input_size, params["num_actions"]) if params["use_action_attention"]: self.attention_net = models.SelfAttention(input_size) # If multimodal input state is to be used. if self.params["use_multimodal_state"]: input_size += self.params["hidden_size"] # B : if belief state is to be used. if self.params["use_belief_state"]: input_size += self.params["hidden_size"] self.action_net = self._get_classify_network(input_size, params["num_actions"]) # Read action metadata. with open(params["metainfo_path"], "r") as file_id: action_metainfo = json.load(file_id)["actions"] action_dict = {ii["name"]: ii["id"] for ii in action_metainfo} self.action_metainfo = {ii["name"]: ii for ii in action_metainfo} self.action_map = loaders.Vocabulary(immutable=True, verbose=False) sorted_actions = sorted(action_dict.keys(), key=lambda x: action_dict[x]) self.action_map.set_vocabulary_state(sorted_actions) # Read action attribute metadata. with open(params["attr_vocab_path"], "r") as file_id: self.attribute_vocab = json.load(file_id) # Create classifiers for action attributes. self.classifiers = {} for key, val in self.attribute_vocab.items(): self.classifiers[key] = self._get_classify_network( input_size, len(val)) self.classifiers = nn.ModuleDict(self.classifiers) # Model multimodal state. if params["use_multimodal_state"]: if params["domain"] == "furniture": self.multimodal_embed = models.CarouselEmbedder(params) elif params["domain"] == "fashion": self.multimodal_embed = models.UserMemoryEmbedder(params) else: raise ValueError("Domain neither of furniture/fashion") # NOTE: Action output is modeled as multimodal state. if params["use_action_output"]: if params["domain"] == "furniture": self.action_output_embed = models.CarouselEmbedder(params) elif params["domain"] == "fashion": self.action_output_embed = models.UserMemoryEmbedder(params) else: raise ValueError("Domain neither of furniture/fashion") self.criterion_mean = nn.CrossEntropyLoss() self.criterion = nn.CrossEntropyLoss(reduction="none") self.criterion_multi = torch.nn.MultiLabelSoftMarginLoss()
def __init__(self, args): super(Model, self).__init__() self.device = args.device self.layers = args.num_layers self.input_size_graph = args.input_size_graph self.output_size_graph = args.output_size_graph self.train_data = args.train_data self.test_data = args.test_data self.train_labels = args.train_labels self.test_labels = args.test_labels self.latent_size = args.latent_size self.hidden_size = args.hidden_size self.learning_rate = args.lr self.batch_size = args.batch_size self.input_dim = args.input_dim self.warmup = args.warmup self.num_labels = args.num_labels self.graph = args.graph self.sequence = args.sequence self.recons = args.recons self.use_attn = args.attn self.use_fusion = args.fusion self.graph_pretrain = graph_models.GraphSage(self.layers, self.input_size_graph, self.output_size_graph, device=self.device, gcn="True", agg_func="MEAN") self.VAE = seq_models.VAE(args) self.AtomEmbedding = nn.Embedding(self.input_size_graph, self.hidden_size).to(self.device) self.AtomEmbedding.weight.requires_grad = True self.output_layer = models.classifier(self.latent_size, self.num_labels, self.device) self.label_criterion = nn.BCEWithLogitsLoss(reduction="none") if self.use_attn: self.attention = models.SelfAttention(self.hidden_size) self.optimizer = optim.Adam(self.parameters(), lr=self.learning_rate, weight_decay=1e-8, amsgrad=True) for name, para in self.named_parameters(): if para.requires_grad: print(name, para.data.shape)
def _build_model(self): self.doc_encoder = doc_encoder = self.get_doc_encoder() user_encoder = keras.layers.TimeDistributed(doc_encoder) clicked = keras.Input((self.config.window_size, self.config.title_shape)) candidate = keras.Input((self.config.title_shape,)) clicked_vec = user_encoder(clicked) candidate_vec = doc_encoder(candidate) mask = models.ComputeMasking(0)(clicked) clicked_vec = keras.layers.Lambda(lambda x: x[0] * keras.backend.expand_dims(x[1]))([clicked_vec, mask]) user_model = self.config.arch logging.info('[!] Selecting User Model: {}'.format(user_model)) if user_model == 'att': clicked_vec = models.SimpleAttentionMasked(mask)(clicked_vec) elif user_model == 'gru': clicked_vec = keras.layers.Masking()(clicked_vec) clicked_vec = keras.layers.GRU(self.config.user_embedding_dim)(clicked_vec) elif user_model == 'cnn': clicked_vec = keras.layers.Conv1D(*self.config.title_filter_shape, padding='same', activation='relu')( clicked_vec) clicked_vec = models.GlobalAveragePoolingMasked(mask)(clicked_vec) elif user_model == 'catt': clicked_vec = keras.layers.Conv1D(*self.config.title_filter_shape, padding='same', activation='relu')( clicked_vec) clicked_vec = models.SimpleAttentionMasked(mask)(clicked_vec) elif user_model == 'qatt': clicked_vec = keras.layers.Masking()(clicked_vec) clicked_vec = models.QueryAttention()([clicked_vec, candidate_vec]) elif user_model == 'satt': clicked_vec = keras.layers.Masking()(clicked_vec) clicked_vec = models.SelfAttention()(clicked_vec) clicked_vec = models.GlobalAveragePoolingMasked(mask)(clicked_vec) elif user_model == 'lstm': clicked_vec = keras.layers.Masking()(clicked_vec) clicked_vec = keras.layers.LSTM(self.config.user_embedding_dim)(clicked_vec) elif user_model == 'lz1': input_shape = clicked_vec.get_shape()[-1].value def auto_attend(): docs = keras.layers.Input(shape=(self.config.window_size, input_shape)) cross_product = keras.layers.dot([docs, docs], axes=2) cross_weights = keras.layers.Softmax(axis=2)(cross_product) attended_docs = keras.layers.dot([cross_weights, docs], axes=1) return keras.Model(docs, attended_docs) def docs_pool(): docs = keras.layers.Input(shape=(self.config.window_size, self.config.hidden_dim)) pool_vec = keras.layers.Dense(units=1) weights = keras.layers.TimeDistributed(pool_vec)(docs) squeeze = keras.layers.Lambda(lambda x: keras.backend.squeeze(x, axis=2)) weights = squeeze(weights) output = keras.layers.dot([docs, weights], axes=1) return keras.Model(docs, output) def docs_rnn(): docs = keras.layers.Input(shape=(self.config.window_size, self.config.hidden_dim)) gru = keras.layers.GRU(units=self.config.hidden_dim, activation="relu") output = gru(docs) return keras.Model(docs, output) def user_encode(): raw_docs = keras.layers.Input(shape=(self.config.window_size, input_shape)) att_docs = auto_attend()(raw_docs) ful_docs = keras.layers.concatenate([raw_docs, att_docs]) ful_docs = keras.layers.Dense(units=self.config.hidden_dim)(ful_docs) pooling = docs_pool()(ful_docs) temporal = docs_rnn()(ful_docs) output = keras.layers.concatenate([pooling, temporal]) return keras.Model(raw_docs, output) clicked_vec = user_encode()(clicked_vec) elif user_model == 'lz2': input_shape = clicked_vec.get_shape()[-1].value def auto_attend(): docs = keras.layers.Input(shape=(self.config.window_size, input_shape)) cross_product = keras.layers.dot([docs, docs], axes=2) cross_weights = keras.layers.Softmax(axis=2)(cross_product) attended_docs = keras.layers.dot([cross_weights, docs], axes=1) return keras.Model(docs, attended_docs) def docs_rnn(): docs = keras.layers.Input(shape=(self.config.window_size, self.config.hidden_dim)) gru = keras.layers.GRU(units=self.config.hidden_dim, activation="relu") output = gru(docs) return keras.Model(docs, output) def user_encode(): raw_docs = keras.layers.Input(shape=(self.config.window_size, input_shape)) att_docs = auto_attend()(raw_docs) ful_docs = keras.layers.concatenate([raw_docs, att_docs]) ful_docs = keras.layers.Dense(units=self.config.hidden_dim)(ful_docs) pooling = models.GlobalAveragePoolingMasked(mask)(ful_docs) temporal = docs_rnn()(ful_docs) output = keras.layers.concatenate([pooling, temporal]) return keras.Model(raw_docs, output) clicked_vec = user_encode()(clicked_vec) elif user_model == 'lz3': input_shape = clicked_vec.get_shape()[-1].value recent_len = min(input_shape, 20) def auto_attend(): docs = keras.layers.Input(shape=(input_shape, self.config.hidden_dim)) cross_product = keras.layers.dot([docs, docs], axes=2) cross_weights = keras.layers.Softmax(axis=2)(cross_product) attended_docs = keras.layers.dot([cross_weights, docs], axes=1) return keras.Model(docs, attended_docs) def pool_attend(): docs = keras.layers.Input(shape=(input_shape, self.config.hidden_dim)) pool_vec = keras.layers.Dense(units=1) weights = keras.layers.TimeDistributed(pool_vec)(docs) squeeze = keras.layers.Lambda(lambda x: keras.backend.squeeze(x, axis=2)) weights = squeeze(weights) output = keras.layers.dot([docs, weights], axes=1) return keras.Model(docs, output) def time_attend(): full_docs = keras.layers.Input(shape=(input_shape, self.config.hidden_dim)) time_window = keras.layers.Lambda(lambda x: x[:, -recent_len:, :]) recent_docs = time_window(full_docs) gru = keras.layers.GRU(units=self.config.hidden_dim, activation="relu", name="gru_0") recent_vecs = gru(recent_docs) weights = keras.layers.dot([recent_vecs, full_docs], axes=-1) weights = keras.layers.Softmax(axis=-1)(weights) attend_vec = keras.layers.dot([weights, full_docs], axes=1) output = keras.layers.concatenate([recent_vecs, attend_vec]) return keras.Model(full_docs, output) att_docs = auto_attend()(clicked_vec) historical_pool_docs = pool_attend()(att_docs) recent_attended_docs = time_attend()(att_docs) clicked_vec = keras.layers.concatenate([historical_pool_docs, recent_attended_docs]) else: if user_model != 'avg': logging.warning('[!] arch {} not found, using average by default'.format(user_model)) clicked_vec = models.GlobalAveragePoolingMasked(mask)(clicked_vec) join_vec = keras.layers.concatenate([clicked_vec, candidate_vec]) hidden = keras.layers.Dense(self.config.hidden_dim, activation='relu')(join_vec) logits = keras.layers.Dense(1, activation='sigmoid')(hidden) self.model = keras.Model([clicked, candidate], logits) if self.__class__ == Seq2VecForward: self.model.compile( optimizer=keras.optimizers.Adam(self.config.learning_rate), loss=self.loss, metrics=[utils.auc_roc] ) else: return self.model