예제 #1
0
    def __init__(self, params):
        """Initialize classifiers.
        """
        super(ActionExecutor, self).__init__()
        self.params = params

        input_size = self.params["hidden_size"]
        if self.params["text_encoder"] == "transformer":
            input_size = self.params["word_embed_size"]
        self.action_net = self._get_classify_network(input_size,
                                                     params["num_actions"])
        if params["use_action_attention"]:
            self.attention_net = models.SelfAttention(input_size)
        # If multimodal input state is to be used.
        if self.params["use_multimodal_state"]:
            input_size += self.params["hidden_size"]
        # B : if belief state is to be used.
        if self.params["use_belief_state"]:
            input_size += self.params["hidden_size"]
        self.action_net = self._get_classify_network(input_size,
                                                     params["num_actions"])
        # Read action metadata.
        with open(params["metainfo_path"], "r") as file_id:
            action_metainfo = json.load(file_id)["actions"]
            action_dict = {ii["name"]: ii["id"] for ii in action_metainfo}
            self.action_metainfo = {ii["name"]: ii for ii in action_metainfo}
            self.action_map = loaders.Vocabulary(immutable=True, verbose=False)
            sorted_actions = sorted(action_dict.keys(),
                                    key=lambda x: action_dict[x])
            self.action_map.set_vocabulary_state(sorted_actions)
        # Read action attribute metadata.
        with open(params["attr_vocab_path"], "r") as file_id:
            self.attribute_vocab = json.load(file_id)
        # Create classifiers for action attributes.
        self.classifiers = {}
        for key, val in self.attribute_vocab.items():
            self.classifiers[key] = self._get_classify_network(
                input_size, len(val))
        self.classifiers = nn.ModuleDict(self.classifiers)

        # Model multimodal state.
        if params["use_multimodal_state"]:
            if params["domain"] == "furniture":
                self.multimodal_embed = models.CarouselEmbedder(params)
            elif params["domain"] == "fashion":
                self.multimodal_embed = models.UserMemoryEmbedder(params)
            else:
                raise ValueError("Domain neither of furniture/fashion")

        # NOTE: Action output is modeled as multimodal state.
        if params["use_action_output"]:
            if params["domain"] == "furniture":
                self.action_output_embed = models.CarouselEmbedder(params)
            elif params["domain"] == "fashion":
                self.action_output_embed = models.UserMemoryEmbedder(params)
            else:
                raise ValueError("Domain neither of furniture/fashion")
        self.criterion_mean = nn.CrossEntropyLoss()
        self.criterion = nn.CrossEntropyLoss(reduction="none")
        self.criterion_multi = torch.nn.MultiLabelSoftMarginLoss()
예제 #2
0
    def __init__(self, args):
        super(Model, self).__init__()

        self.device = args.device
        self.layers = args.num_layers
        self.input_size_graph = args.input_size_graph
        self.output_size_graph = args.output_size_graph
        self.train_data = args.train_data
        self.test_data = args.test_data
        self.train_labels = args.train_labels
        self.test_labels = args.test_labels
        self.latent_size = args.latent_size
        self.hidden_size = args.hidden_size
        self.learning_rate = args.lr
        self.batch_size = args.batch_size
        self.input_dim = args.input_dim
        self.warmup = args.warmup
        self.num_labels = args.num_labels

        self.graph = args.graph
        self.sequence = args.sequence
        self.recons = args.recons
        self.use_attn = args.attn
        self.use_fusion = args.fusion

        self.graph_pretrain = graph_models.GraphSage(self.layers,
                                                     self.input_size_graph,
                                                     self.output_size_graph,
                                                     device=self.device,
                                                     gcn="True",
                                                     agg_func="MEAN")

        self.VAE = seq_models.VAE(args)

        self.AtomEmbedding = nn.Embedding(self.input_size_graph,
                                          self.hidden_size).to(self.device)
        self.AtomEmbedding.weight.requires_grad = True

        self.output_layer = models.classifier(self.latent_size,
                                              self.num_labels, self.device)

        self.label_criterion = nn.BCEWithLogitsLoss(reduction="none")

        if self.use_attn:
            self.attention = models.SelfAttention(self.hidden_size)

        self.optimizer = optim.Adam(self.parameters(),
                                    lr=self.learning_rate,
                                    weight_decay=1e-8,
                                    amsgrad=True)

        for name, para in self.named_parameters():
            if para.requires_grad:
                print(name, para.data.shape)
예제 #3
0
    def _build_model(self):
        self.doc_encoder = doc_encoder = self.get_doc_encoder()
        user_encoder = keras.layers.TimeDistributed(doc_encoder)

        clicked = keras.Input((self.config.window_size, self.config.title_shape))
        candidate = keras.Input((self.config.title_shape,))

        clicked_vec = user_encoder(clicked)
        candidate_vec = doc_encoder(candidate)

        mask = models.ComputeMasking(0)(clicked)
        clicked_vec = keras.layers.Lambda(lambda x: x[0] * keras.backend.expand_dims(x[1]))([clicked_vec, mask])

        user_model = self.config.arch
        logging.info('[!] Selecting User Model: {}'.format(user_model))
        if user_model == 'att':
            clicked_vec = models.SimpleAttentionMasked(mask)(clicked_vec)
        elif user_model == 'gru':
            clicked_vec = keras.layers.Masking()(clicked_vec)
            clicked_vec = keras.layers.GRU(self.config.user_embedding_dim)(clicked_vec)
        elif user_model == 'cnn':
            clicked_vec = keras.layers.Conv1D(*self.config.title_filter_shape, padding='same', activation='relu')(
                clicked_vec)
            clicked_vec = models.GlobalAveragePoolingMasked(mask)(clicked_vec)
        elif user_model == 'catt':
            clicked_vec = keras.layers.Conv1D(*self.config.title_filter_shape, padding='same', activation='relu')(
                clicked_vec)
            clicked_vec = models.SimpleAttentionMasked(mask)(clicked_vec)
        elif user_model == 'qatt':
            clicked_vec = keras.layers.Masking()(clicked_vec)
            clicked_vec = models.QueryAttention()([clicked_vec, candidate_vec])
        elif user_model == 'satt':
            clicked_vec = keras.layers.Masking()(clicked_vec)
            clicked_vec = models.SelfAttention()(clicked_vec)
            clicked_vec = models.GlobalAveragePoolingMasked(mask)(clicked_vec)
        elif user_model == 'lstm':
            clicked_vec = keras.layers.Masking()(clicked_vec)
            clicked_vec = keras.layers.LSTM(self.config.user_embedding_dim)(clicked_vec)
        elif user_model == 'lz1':
            input_shape = clicked_vec.get_shape()[-1].value

            def auto_attend():
                docs = keras.layers.Input(shape=(self.config.window_size, input_shape))
                cross_product = keras.layers.dot([docs, docs], axes=2)
                cross_weights = keras.layers.Softmax(axis=2)(cross_product)
                attended_docs = keras.layers.dot([cross_weights, docs], axes=1)
                return keras.Model(docs, attended_docs)

            def docs_pool():
                docs = keras.layers.Input(shape=(self.config.window_size, self.config.hidden_dim))
                pool_vec = keras.layers.Dense(units=1)
                weights = keras.layers.TimeDistributed(pool_vec)(docs)
                squeeze = keras.layers.Lambda(lambda x: keras.backend.squeeze(x, axis=2))
                weights = squeeze(weights)
                output = keras.layers.dot([docs, weights], axes=1)
                return keras.Model(docs, output)

            def docs_rnn():
                docs = keras.layers.Input(shape=(self.config.window_size, self.config.hidden_dim))
                gru = keras.layers.GRU(units=self.config.hidden_dim, activation="relu")
                output = gru(docs)
                return keras.Model(docs, output)

            def user_encode():
                raw_docs = keras.layers.Input(shape=(self.config.window_size, input_shape))
                att_docs = auto_attend()(raw_docs)
                ful_docs = keras.layers.concatenate([raw_docs, att_docs])
                ful_docs = keras.layers.Dense(units=self.config.hidden_dim)(ful_docs)
                pooling = docs_pool()(ful_docs)
                temporal = docs_rnn()(ful_docs)
                output = keras.layers.concatenate([pooling, temporal])
                return keras.Model(raw_docs, output)

            clicked_vec = user_encode()(clicked_vec)
        elif user_model == 'lz2':
            input_shape = clicked_vec.get_shape()[-1].value

            def auto_attend():
                docs = keras.layers.Input(shape=(self.config.window_size, input_shape))
                cross_product = keras.layers.dot([docs, docs], axes=2)
                cross_weights = keras.layers.Softmax(axis=2)(cross_product)
                attended_docs = keras.layers.dot([cross_weights, docs], axes=1)
                return keras.Model(docs, attended_docs)

            def docs_rnn():
                docs = keras.layers.Input(shape=(self.config.window_size, self.config.hidden_dim))
                gru = keras.layers.GRU(units=self.config.hidden_dim, activation="relu")
                output = gru(docs)
                return keras.Model(docs, output)

            def user_encode():
                raw_docs = keras.layers.Input(shape=(self.config.window_size, input_shape))
                att_docs = auto_attend()(raw_docs)
                ful_docs = keras.layers.concatenate([raw_docs, att_docs])
                ful_docs = keras.layers.Dense(units=self.config.hidden_dim)(ful_docs)
                pooling = models.GlobalAveragePoolingMasked(mask)(ful_docs)
                temporal = docs_rnn()(ful_docs)
                output = keras.layers.concatenate([pooling, temporal])
                return keras.Model(raw_docs, output)

            clicked_vec = user_encode()(clicked_vec)
        elif user_model == 'lz3':
            input_shape = clicked_vec.get_shape()[-1].value
            recent_len = min(input_shape, 20)

            def auto_attend():
                docs = keras.layers.Input(shape=(input_shape, self.config.hidden_dim))
                cross_product = keras.layers.dot([docs, docs], axes=2)
                cross_weights = keras.layers.Softmax(axis=2)(cross_product)
                attended_docs = keras.layers.dot([cross_weights, docs], axes=1)
                return keras.Model(docs, attended_docs)

            def pool_attend():
                docs = keras.layers.Input(shape=(input_shape, self.config.hidden_dim))
                pool_vec = keras.layers.Dense(units=1)
                weights = keras.layers.TimeDistributed(pool_vec)(docs)
                squeeze = keras.layers.Lambda(lambda x: keras.backend.squeeze(x, axis=2))
                weights = squeeze(weights)
                output = keras.layers.dot([docs, weights], axes=1)
                return keras.Model(docs, output)

            def time_attend():
                full_docs = keras.layers.Input(shape=(input_shape, self.config.hidden_dim))
                time_window = keras.layers.Lambda(lambda x: x[:, -recent_len:, :])
                recent_docs = time_window(full_docs)
                gru = keras.layers.GRU(units=self.config.hidden_dim, activation="relu", name="gru_0")
                recent_vecs = gru(recent_docs)
                weights = keras.layers.dot([recent_vecs, full_docs], axes=-1)
                weights = keras.layers.Softmax(axis=-1)(weights)
                attend_vec = keras.layers.dot([weights, full_docs], axes=1)
                output = keras.layers.concatenate([recent_vecs, attend_vec])
                return keras.Model(full_docs, output)

            att_docs = auto_attend()(clicked_vec)
            historical_pool_docs = pool_attend()(att_docs)
            recent_attended_docs = time_attend()(att_docs)
            clicked_vec = keras.layers.concatenate([historical_pool_docs, recent_attended_docs])
        else:
            if user_model != 'avg':
                logging.warning('[!] arch {} not found, using average by default'.format(user_model))
            clicked_vec = models.GlobalAveragePoolingMasked(mask)(clicked_vec)

        join_vec = keras.layers.concatenate([clicked_vec, candidate_vec])
        hidden = keras.layers.Dense(self.config.hidden_dim, activation='relu')(join_vec)
        logits = keras.layers.Dense(1, activation='sigmoid')(hidden)

        self.model = keras.Model([clicked, candidate], logits)
        if self.__class__ == Seq2VecForward:
            self.model.compile(
                optimizer=keras.optimizers.Adam(self.config.learning_rate),
                loss=self.loss,
                metrics=[utils.auc_roc]
            )
        else:
            return self.model