コード例 #1
0
    def model_implicit_sequence(
            self,
            train: Interactions,
            random_state: np.random.RandomState,
            representation: str = None,
            hyperparameters: dict = None) -> ImplicitSequenceModel:
        logger = logging.getLogger()
        if not representation:
            if hyperparameters:
                net = CNNNet(train.num_items,
                             embedding_dim=hyperparameters['embedding_dim'],
                             kernel_width=hyperparameters['kernel_width'],
                             dilation=hyperparameters['dilation'],
                             num_layers=hyperparameters['num_layers'],
                             nonlinearity=hyperparameters['nonlinearity'],
                             residual_connections=hyperparameters['residual'])
            else:
                net = CNNNet(train.num_items)

            representation = net

        out_string = 'CNN' if isinstance(representation,
                                         CNNNet) else representation.upper()
        if hyperparameters:
            logger.info(
                "Beginning fitting implicit sequence {0} model... \n Hyperparameters: \n {1}"
                .format(
                    out_string,
                    json.dumps({
                        i: hyperparameters[i]
                        for i in hyperparameters if i != 'use_cuda'
                    })))
            model = ImplicitSequenceModel(
                loss=hyperparameters['loss'],
                representation=representation,
                batch_size=hyperparameters['batch_size'],
                learning_rate=hyperparameters['learning_rate'],
                l2=hyperparameters['l2'],
                n_iter=hyperparameters['n_iter'],
                use_cuda=True,
                random_state=random_state)
        else:
            model = ImplicitSequenceModel(use_cuda=True)
            logger.info(
                "Beginning fitting implicit sequence {} model with default hyperparameters..."
                .format(out_string))

        model.fit(train, verbose=True)
        model.predict(train.sequences)
        return model
コード例 #2
0
def test_implicit_cnn_dilation_synthetic(num_layers, dilation, expected_mrr):

    random_state = np.random.RandomState(RANDOM_SEED)
    train, test = _get_synthetic_data(randomness=1e-03,
                                      num_interactions=20000,
                                      random_state=random_state)

    model = ImplicitSequenceModel(loss=LOSS,
                                  representation=CNNNet(
                                      train.num_items,
                                      embedding_dim=EMBEDDING_DIM,
                                      kernel_width=1,
                                      dilation=dilation,
                                      num_layers=num_layers),
                                  batch_size=BATCH_SIZE,
                                  learning_rate=1e-2,
                                  l2=0.0,
                                  n_iter=NUM_EPOCHS * num_layers,
                                  random_state=random_state)

    model.fit(train, verbose=VERBOSE)

    mrr = _evaluate(model, test)

    assert mrr.mean() > expected_mrr
コード例 #3
0
def evaluate_cnn_model(hyperparameters, train, test, validation, random_state):

    h = hyperparameters

    net = CNNNet(train.num_items,
                 embedding_dim=h['embedding_dim'],
                 kernel_width=h['kernel_width'],
                 dilation=h['dilation'],
                 num_layers=h['num_layers'],
                 nonlinearity=h['nonlinearity'],
                 residual_connections=h['residual'])

    model = ImplicitSequenceModel(loss=h['loss'],
                                  representation=net,
                                  batch_size=h['batch_size'],
                                  learning_rate=h['learning_rate'],
                                  l2=h['l2'],
                                  n_iter=h['n_iter'],
                                  use_cuda=CUDA,
                                  random_state=random_state)

    model.fit(train, verbose=True)

    test_mrr = sequence_mrr_score(model, test)
    val_mrr = sequence_mrr_score(model, validation)

    return test_mrr, val_mrr
コード例 #4
0
ファイル: tuner.py プロジェクト: kvt0012/dimo-recsys
def evaluate_cnn_model(hyperparameters, train, test, validation, random_state):
    h = hyperparameters

    net = CNNNet(train.num_items,
                 kernel_width=h['kernel_width'],
                 dilation=h['dilation'],
                 num_layers=h['num_layers'],
                 nonlinearity=h['nonlinearity'],
                 residual_connections=h['residual'])

    model = ImplicitSequenceModel(loss=h['loss'],
                                  representation=net,
                                  n_iter=h['n_iter'],
                                  use_cuda=CUDA,
                                  random_state=random_state)

    model.fit(train, verbose=True)

    test_eval = {}
    test_eval['mrr'] = sequence_mrr_score(model, test).mean()

    val_eval = {}
    val_eval['mrr'] = sequence_mrr_score(model, validation).mean()

    return test_eval, val_eval
コード例 #5
0
def test_bloom_cnn(compression_ratio, expected_mrr):

    random_state = np.random.RandomState(RANDOM_SEED)
    train, test = _get_synthetic_data(randomness=1e-03,
                                      num_interactions=20000,
                                      random_state=random_state)

    embedding = BloomEmbedding(train.num_items,
                               32,
                               compression_ratio=compression_ratio,
                               num_hash_functions=2)

    representation = CNNNet(train.num_items,
                            embedding_dim=EMBEDDING_DIM,
                            kernel_width=3,
                            item_embedding_layer=embedding)

    model = ImplicitSequenceModel(loss=LOSS,
                                  representation=representation,
                                  batch_size=BATCH_SIZE,
                                  learning_rate=1e-2,
                                  l2=0.0,
                                  n_iter=NUM_EPOCHS,
                                  random_state=random_state,
                                  use_cuda=CUDA)

    model.fit(train, verbose=VERBOSE)

    mrr = _evaluate(model, test)

    assert mrr.mean() > expected_mrr
コード例 #6
0
    def _initialize(self, interactions):

        self._num_items = interactions.num_items

        if self._representation == 'pooling':
            self._net = PoolNet(self._num_items, self._embedding_dim, sparse=self._sparse)
        elif self._representation == 'cnn':
            self._net = CNNNet(self._num_items, self._embedding_dim, sparse=self._sparse)
        elif self._representation == 'lstm':
            self._net = LSTMNet(self._num_items, self._embedding_dim, sparse=self._sparse)
        elif self._representation == 'mixture':
            self._net = MixtureLSTMNet(self._num_items, self._embedding_dim, sparse=self._sparse)
        else:
            self._net = self._representation

        self._net = gpu(self._net, self._use_cuda)

        if self._optimizer_func is None:
            self._optimizer = optim.Adam(self._net.parameters(), weight_decay=self._l2, lr=self._learning_rate)
        else:
            self._optimizer = self._optimizer_func(self._net.parameters())

        if self._loss == 'pointwise':
            self._loss_func = pointwise_loss
        elif self._loss == 'bpr':
            self._loss_func = bpr_loss
        elif self._loss == 'hinge':
            self._loss_func = hinge_loss
        else:
            self._loss_func = adaptive_hinge_loss
コード例 #7
0
    def _cnn_net(self, interactions):
        cnn_params = dict(
            num_items=interactions.num_items,
            embedding_dim=self.model_params['embedding_dim'],
            **{
                k: v
                for k, v in self.fit_params.items()
                if k in collect_named_init_params(CNNNet)['CNNNet']
            })

        # updating dilation parameter to be the right tuple
        cnn_params['dilation'] = [
            cnn_params['dilation'] * (2**i)
            for i in range(cnn_params['num_layers'])
        ]

        return CNNNet(**cnn_params)
コード例 #8
0
def test_implicit_sequence_serialization(data):

    train, test = data
    train = train.to_sequence(max_sequence_length=128)
    test = test.to_sequence(max_sequence_length=128)

    model = ImplicitSequenceModel(loss='bpr',
                                  representation=CNNNet(train.num_items,
                                                        embedding_dim=32,
                                                        kernel_width=3,
                                                        dilation=(1, ),
                                                        num_layers=1),
                                  batch_size=128,
                                  learning_rate=1e-1,
                                  l2=0.0,
                                  n_iter=5,
                                  random_state=RANDOM_STATE,
                                  use_cuda=CUDA)
    model.fit(train)

    mrr_original = sequence_mrr_score(model, test).mean()
    mrr_recovered = sequence_mrr_score(_reload(model), test).mean()

    assert mrr_original == mrr_recovered
コード例 #9
0
ファイル: implicit.py プロジェクト: zgsxwsdxg/spotlight
    def fit(self, interactions, verbose=False):
        """
        Fit the model.

        Parameters
        ----------

        interactions: :class:`spotlight.interactions.SequenceInteractions`
            The input sequence dataset.
        """

        sequences = interactions.sequences.astype(np.int64)

        self._num_items = interactions.num_items

        if self._representation == 'pooling':
            self._net = PoolNet(self._num_items,
                                self._embedding_dim,
                                sparse=self._sparse)
        elif self._representation == 'cnn':
            self._net = CNNNet(self._num_items,
                               self._embedding_dim,
                               sparse=self._sparse)
        elif self._representation == 'lstm':
            self._net = LSTMNet(self._num_items,
                                self._embedding_dim,
                                sparse=self._sparse)
        else:
            self._net = self._representation

        self._net = gpu(self._net, self._use_cuda)

        if self._optimizer is None:
            self._optimizer = optim.Adam(self._net.parameters(),
                                         weight_decay=self._l2,
                                         lr=self._learning_rate)

        if self._loss == 'pointwise':
            loss_fnc = pointwise_loss
        elif self._loss == 'bpr':
            loss_fnc = bpr_loss
        elif self._loss == 'hinge':
            loss_fnc = hinge_loss
        else:
            loss_fnc = adaptive_hinge_loss

        for epoch_num in range(self._n_iter):

            sequences = shuffle(sequences, random_state=self._random_state)

            sequences_tensor = gpu(torch.from_numpy(sequences), self._use_cuda)

            epoch_loss = 0.0

            for minibatch_num, batch_sequence in enumerate(
                    minibatch(sequences_tensor, batch_size=self._batch_size)):

                sequence_var = Variable(batch_sequence)

                user_representation, _ = self._net.user_representation(
                    sequence_var)

                positive_prediction = self._net(user_representation,
                                                sequence_var)

                if self._loss == 'adaptive_hinge':
                    negative_prediction = [
                        self._get_negative_prediction(sequence_var.size(),
                                                      user_representation)
                        for __ in range(5)
                    ]
                else:
                    negative_prediction = self._get_negative_prediction(
                        sequence_var.size(), user_representation)

                self._optimizer.zero_grad()

                loss = loss_fnc(positive_prediction,
                                negative_prediction,
                                mask=(sequence_var != PADDING_IDX))
                epoch_loss += loss.data[0]

                loss.backward()
                self._optimizer.step()

            epoch_loss /= minibatch_num + 1

            if verbose:
                print('Epoch {}: loss {}'.format(epoch_num, epoch_loss))
コード例 #10
0
                                                                                                 pairs_ndcg,
                                                                                                 hit_ratio,
                                                                                                 ndcg))
    torch.save(net, model_store_dir + "/" + model_alias + "-" + str(epoch_num))
    net.train()


if "BASE_DIR" not in os.environ:
    os.environ["BASE_DIR"] = "/home/ec2-user/emb3"

random_state = np.random.RandomState(100)

net = CNNNet(num_items,
                 embedding_dim=h['emb_dim'],
                 kernel_width=h['kernel'],
                 dilation=1,
                 num_layers=h['layers'],
                 nonlinearity=h['nonlin'],
                 residual_connections=True)

model = ImplicitSequenceModel(loss=h['loss'],
                              representation=net,
                              batch_size=h['batch'],
                              learning_rate=h['lr'],
                              l2=h['l2'],
                              n_iter=h['n_iter'],
                              embedding_dim=h['emb_dim'],
                              use_cuda=use_cuda,
                              random_state=random_state,
                              notify_loss_completion=notify_loss_completion,
                              notify_batch_eval_completion=notify_batch_eval_completion,
コード例 #11
0
                           ratings=ratings,
                           num_users=int(num_user),
                           num_items=int(foods_items),
                           timestamps=timeStamps)

    if name == "test":
        dataset_test = dataset
    elif name == "train":
        dataset_train = dataset

if model_mode.lower() == "ifm":
    model = ImplicitFactorizationModel(n_iter=n_iter)
if model_mode.lower() == "efm":
    model = ExplicitFactorizationModel(n_iter=n_iter)
if model_mode.lower() == "cnn":
    net = CNNNet(num_items=int(foods_items))
    model = ImplicitSequenceModel(n_iter=n_iter,
                                  use_cuda=torch.cuda.is_available(),
                                  representation=net)

model.fit(dataset_train)

with open(save_file, 'wb') as f:
    pickle.dump(model, f, pickle.HIGHEST_PROTOCOL)

if model_mode.lower() == "cnn":
    mrr = sequence_mrr_score(model, dataset_test)
else:
    mrr = mrr_score(model, dataset_test)

print("mrr = ", len(mrr))