Beispiel #1
0
    def _build_serving_graph(self):
        big_bpr = BPR(batch_size=self._batch_size,
                      max_user=self._max_user,
                      max_item=self._max_item,
                      dim_embed=20)
        Recommender.load(big_bpr, "model-51")
        print('calling _build_serving_graph')

        tf.reset_default_graph()

        big_bpr2 = BPR(batch_size=self._batch_size,
                       max_user=self._max_user,
                       max_item=self._max_item,
                       dim_embed=20)
        Recommender.load(big_bpr2, "model-52")

        tf.reset_default_graph()

        big_bpr3 = BPR(batch_size=self._batch_size,
                       max_user=self._max_user,
                       max_item=self._max_item,
                       dim_embed=20)
        Recommender.load(big_bpr2, "model-53")

        self._rec1 = big_bpr
        self._rec2 = big_bpr2
        self._rec3 = big_bpr3
        pass
Beispiel #2
0
#                 dim_embed=20, opt='Adam', sess_config=sess_config)
# sampler = PairwiseSampler(batch_size=batch_size,
#                           dataset=train_dataset, num_process=5)
# model_trainer = ImplicitModelTrainer(batch_size=batch_size, test_batch_size=test_batch_size,
#                                      train_dataset=train_dataset, model=bpr_model, sampler=sampler)
# auc_evaluator = AUC()

# model_trainer.train(num_itr=int(10), display_itr=display_itr, eval_datasets=[val_dataset, test_dataset],
#                     evaluators=[auc_evaluator])

# print("Save")
# bpr_model.save("./model", 1)
# print("Saved")

big_bpr = BPR(batch_size=batch_size,
              max_user=max_user,
              max_item=max_item,
              dim_embed=20)
Recommender.load(big_bpr, "model-1")
print(big_bpr)
print(big_bpr.serve(pythonsucks))

# goodnight
tf.reset_default_graph()

big_bpr2 = BPR(batch_size=batch_size,
               max_user=max_user,
               max_item=max_item,
               dim_embed=20)
Recommender.load(big_bpr2, "model-1")
print(big_bpr2)
print(big_bpr2.serve(pythonsucks))
Beispiel #3
0
train_dataset = ImplicitDataset(raw_data['train_data'],
                                raw_data['max_user'],
                                raw_data['max_item'],
                                name='Train')
val_dataset = ImplicitDataset(raw_data['val_data'],
                              raw_data['max_user'],
                              raw_data['max_item'],
                              name='Val')
test_dataset = ImplicitDataset(raw_data['test_data'],
                               raw_data['max_user'],
                               raw_data['max_item'],
                               name='Test')

bpr_model = BPR(batch_size=batch_size,
                max_user=train_dataset.max_user(),
                max_item=train_dataset.max_item(),
                dim_embed=20,
                opt='Adam',
                sess_config=sess_config)
sampler = PairwiseSampler(batch_size=batch_size,
                          dataset=train_dataset,
                          num_process=1)
model_trainer = ImplicitModelTrainer(batch_size=batch_size,
                                     test_batch_size=test_batch_size,
                                     train_dataset=train_dataset,
                                     model=bpr_model,
                                     sampler=sampler)
auc_evaluator = AUC()

model_trainer.train(num_itr=int(1e6),
                    display_itr=display_itr,
                    eval_datasets=[val_dataset, test_dataset],
Beispiel #4
0
    def sample_data_and_train(self):
        self.logger.warning(
            'sample_data_and_train called, pid = %d Please kill process on unsuccessful training',
            os.getpid())
        self.logger.info('-------- sample_data_and_train starts --------')

        total_users = 0
        interactions_count = 0
        with open(
                os.path.dirname(os.path.abspath(__file__)) +
                self.path_to_dataset, 'r') as fin:
            for line in fin:
                interactions_count += int(line.split()[0])
                total_users += 1
        self.logger.info('############ collecting data.. ############')

        # radomly hold out an item per user for validation and testing respectively.
        val_structured_arr = np.zeros(total_users,
                                      dtype=[('user_id', np.int32),
                                             ('item_id', np.int32)])
        test_structured_arr = np.zeros(total_users,
                                       dtype=[('user_id', np.int32),
                                              ('item_id', np.int32)])
        train_structured_arr = np.zeros(interactions_count - total_users * 2,
                                        dtype=[('user_id', np.int32),
                                               ('item_id', np.int32)])

        interaction_ind = 0
        next_user_id = 0
        next_item_id = 0
        map_to_item_id = dict()  # Map item id from 0 to len(items)-1

        with open(
                os.path.dirname(os.path.abspath(__file__)) +
                self.path_to_dataset, 'r') as fin:
            for line in fin:
                item_list = line.split()[1:]
                random.shuffle(item_list)
                for ind, item in enumerate(item_list):
                    if item not in map_to_item_id:
                        map_to_item_id[item] = next_item_id
                        next_item_id += 1
                    if ind == 0:
                        val_structured_arr[next_user_id] = (
                            next_user_id, map_to_item_id[item])
                    elif ind == 1:
                        test_structured_arr[next_user_id] = (
                            next_user_id, map_to_item_id[item])
                    else:
                        train_structured_arr[interaction_ind] = (
                            next_user_id, map_to_item_id[item])
                        interaction_ind += 1
                next_user_id += 1

        self.logger.info('############ instantiating dataset.. ############')

        from openrec.utils import Dataset

        train_dataset = Dataset(raw_data=train_structured_arr,
                                total_users=total_users,
                                total_items=len(map_to_item_id),
                                name='Train')
        val_dataset = Dataset(raw_data=val_structured_arr,
                              total_users=total_users,
                              total_items=len(map_to_item_id),
                              num_negatives=500,
                              name='Val')
        test_dataset = Dataset(raw_data=test_structured_arr,
                               total_users=total_users,
                               total_items=len(map_to_item_id),
                               num_negatives=500,
                               name='Test')

        self.logger.info("############ instantiating Samplers.. ############")

        from openrec.utils.samplers import RandomPairwiseSampler
        from openrec.utils.samplers import EvaluationSampler

        train_sampler = RandomPairwiseSampler(batch_size=1000,
                                              dataset=train_dataset,
                                              num_process=5)
        val_sampler = EvaluationSampler(batch_size=1000, dataset=val_dataset)
        test_sampler = EvaluationSampler(batch_size=1000, dataset=test_dataset)

        self.logger.info(
            "############ instantiating Recommender.. ############")

        from openrec.recommenders import BPR

        bpr_model = BPR(batch_size=1000,
                        total_users=train_dataset.total_users(),
                        total_items=train_dataset.total_items(),
                        dim_user_embed=50,
                        dim_item_embed=50,
                        save_model_dir='bpr_recommender/',
                        train=True,
                        serve=True)

        self.logger.info("############ instantiating Evaluator.. ############")

        from openrec.utils.evaluators import AUC

        auc_evaluator = AUC()

        self.logger.info(
            "############ instantiating Model trainer.. ############")

        from openrec import ModelTrainer

        model_trainer = ModelTrainer(model=bpr_model)

        print("############ starting training.. ############")

        model_trainer.train(
            total_iter=10000,  # Total number of training iterations
            eval_iter=1000,  # Evaluate the model every "eval_iter" iterations
            save_iter=10000,  # Save the model every "save_iter" iterations
            train_sampler=train_sampler,
            eval_samplers=[val_sampler, test_sampler],
            evaluators=[auc_evaluator])
        # self.logger.info("THIS IS WHEN MODEL WILL START TRAINING... returning")
        self.logger.info("-------- sample_data_and_train ends --------")
Beispiel #5
0
    train_dataset = ImplicitDataset(raw_data=csv,
                                    max_user=max_users,
                                    max_item=max_items,
                                    name='Train')
    val_dataset = ImplicitDataset(raw_data=csv,
                                  max_user=max_users,
                                  max_item=max_items,
                                  name='Val')
    test_dataset = ImplicitDataset(raw_data=csv,
                                   max_user=max_users,
                                   max_item=max_items,
                                   name='Test')

    bpr_model = BPR(batch_size=1000,
                    max_user=train_dataset.max_user(),
                    max_item=train_dataset.max_item(),
                    dim_embed=20,
                    opt='Adam')

    print("before sampler")
    sampler = PairwiseSampler(batch_size=1000, dataset=train_dataset)
    print("after sampler")

    auc_evaluator = AUC()
    print("after evaluator")

    model_trainer = ImplicitModelTrainer(batch_size=1000,
                                         test_batch_size=100,
                                         train_dataset=train_dataset,
                                         model=bpr_model,
                                         sampler=sampler)
Beispiel #6
0
def UCML(batch_size,
         dim_user_embed,
         dim_item_embed,
         total_users,
         total_items,
         l2_reg=None,
         init_model_dir=None,
         save_model_dir='Recommender/',
         train=True,
         serve=False):

    rec = BPR(batch_size=batch_size,
              dim_user_embed=dim_user_embed,
              dim_item_embed=dim_item_embed,
              total_users=total_users,
              total_items=total_items,
              l2_reg=l2_reg,
              init_model_dir=init_model_dir,
              save_model_dir=save_model_dir,
              train=train,
              serve=serve)

    t = rec.traingraph
    s = rec.servegraph

    def censor_vec(embedding, censor_id):
        unique_censor_id, _ = tf.unique(censor_id)
        embedding_gather = tf.gather(embedding, indices=unique_censor_id)
        norm = tf.sqrt(
            tf.reduce_sum(tf.square(embedding_gather), axis=1, keepdims=True))
        return tf.scatter_update(embedding,
                                 indices=unique_censor_id,
                                 updates=embedding_gather /
                                 tf.maximum(norm, 1.0))

    @t.usergraph.extend
    def censor_user_vec(subgraph):
        user_embedding, _ = LatentFactor(l2_reg=None,
                                         init='normal',
                                         id_=None,
                                         shape=[total_users, dim_user_embed],
                                         scope='user')
        user_censor_ops = censor_vec(user_embedding, subgraph['user_id'])
        subgraph.register_global_operation(user_censor_ops, 'censor_embedding')

    @t.itemgraph.extend
    def censor_item_vec(subgraph):
        item_embedding, _ = LatentFactor(l2_reg=None,
                                         init='normal',
                                         id_=None,
                                         shape=[total_items, dim_item_embed],
                                         subgraph=subgraph,
                                         scope='item')
        item_censor_ops = censor_vec(
            item_embedding,
            tf.concat([subgraph['p_item_id'], subgraph['n_item_id']], axis=0))
        subgraph.register_global_operation(item_censor_ops, 'censor_embedding')

    @t.interactiongraph(ins=[
        'user_vec', 'p_item_vec', 'n_item_vec', 'p_item_bias', 'n_item_bias'
    ])
    def interaction_graph(subgraph):
        PairwiseEuDist(user_vec=subgraph['user_vec'],
                       p_item_vec=subgraph['p_item_vec'],
                       n_item_vec=subgraph['n_item_vec'],
                       p_item_bias=subgraph['p_item_bias'],
                       n_item_bias=subgraph['n_item_bias'],
                       subgraph=subgraph,
                       train=True,
                       scope='PairwiseEuDist')

    @s.interactiongraph(ins=['user_vec', 'item_vec', 'item_bias'])
    def serving_interaction_graph(subgraph):
        PairwiseEuDist(user_vec=subgraph['user_vec'],
                       item_vec=subgraph['item_vec'],
                       item_bias=subgraph['item_bias'],
                       train=False,
                       subgraph=subgraph,
                       scope='PairwiseEuDist')

    return rec
Beispiel #7
0
def VBPR(
    batch_size,
    dim_user_embed,
    dim_item_embed,
    dim_v,
    total_users,
    total_items,
    l2_reg_embed=None,
    l2_reg_mlp=None,
    init_model_dir=None,
    save_model_dir="Recommender/",
    train=True,
    serve=False,
):

    rec = BPR(
        batch_size=batch_size,
        dim_user_embed=dim_user_embed,
        dim_item_embed=dim_item_embed,
        total_users=total_users,
        total_items=total_items,
        l2_reg=l2_reg_embed,
        init_model_dir=init_model_dir,
        save_model_dir=save_model_dir,
        train=train,
        serve=serve,
    )

    t = rec.traingraph
    s = rec.servegraph

    @t.inputgraph.extend(outs=["p_item_vfeature", "n_item_vfeature"])
    def train_item_visual_features(subgraph):
        subgraph["p_item_vfeature"] = tf.placeholder(tf.float32,
                                                     shape=[batch_size, dim_v],
                                                     name="p_item_vfeature")
        subgraph["n_item_vfeature"] = tf.placeholder(tf.float32,
                                                     shape=[batch_size, dim_v],
                                                     name="n_item_vfeature")
        subgraph.update_global_input_mapping({
            "p_item_vfeature":
            subgraph["p_item_vfeature"],
            "n_item_vfeature":
            subgraph["n_item_vfeature"],
        })

    @s.inputgraph.extend(outs=["item_vfeature"])
    def serving_item_visual_features(subgraph):
        subgraph["item_vfeature"] = tf.placeholder(tf.float32,
                                                   shape=[None, dim_v],
                                                   name="item_vfeature")
        subgraph.update_global_input_mapping(
            {"item_vfeature": subgraph["item_vfeature"]})

    @t.itemgraph.extend(ins=["p_item_vfeature", "n_item_vfeature"])
    def train_add_item_graph(subgraph):
        p_item_vout = MultiLayerFC(
            in_tensor=subgraph["p_item_vfeature"],
            l2_reg=l2_reg_mlp,
            subgraph=subgraph,
            dims=[dim_user_embed - dim_item_embed],
            scope="item_MLP",
        )
        n_item_vout = MultiLayerFC(
            in_tensor=subgraph["n_item_vfeature"],
            l2_reg=l2_reg_mlp,
            subgraph=subgraph,
            dims=[dim_user_embed - dim_item_embed],
            scope="item_MLP",
        )
        subgraph["p_item_vec"] = tf.concat(
            [subgraph["p_item_vec"], p_item_vout], axis=1)
        subgraph["n_item_vec"] = tf.concat(
            [subgraph["n_item_vec"], n_item_vout], axis=1)

    @s.itemgraph.extend(ins=["item_vfeature"])
    def serving_add_item_graph(subgraph):
        item_vout = MultiLayerFC(
            in_tensor=subgraph["item_vfeature"],
            l2_reg=l2_reg_mlp,
            subgraph=subgraph,
            dims=[dim_user_embed - dim_item_embed],
            scope="item_MLP",
        )
        subgraph["item_vec"] = tf.concat([subgraph["item_vec"], item_vout],
                                         axis=1)

    @t.connector.extend
    def train_connect(graph):
        graph.itemgraph["p_item_vfeature"] = graph.inputgraph[
            "p_item_vfeature"]
        graph.itemgraph["n_item_vfeature"] = graph.inputgraph[
            "n_item_vfeature"]

    @s.connector.extend
    def serve_connect(graph):
        graph.itemgraph["item_vfeature"] = graph.inputgraph["item_vfeature"]

    return rec
Beispiel #8
0
                      num_negatives=500)
test_dataset = Dataset(raw_data['test_data'],
                       raw_data['total_users'],
                       raw_data['total_items'],
                       name='Test',
                       num_negatives=500)

train_sampler = RandomPairwiseSampler(batch_size=batch_size,
                                      dataset=train_dataset,
                                      num_process=5)
val_sampler = EvaluationSampler(batch_size=batch_size, dataset=val_dataset)
test_sampler = EvaluationSampler(batch_size=batch_size, dataset=test_dataset)

bpr_model = BPR(batch_size=batch_size,
                total_users=train_dataset.total_users(),
                total_items=train_dataset.total_items(),
                dim_user_embed=dim_embed,
                dim_item_embed=dim_embed,
                save_model_dir='bpr_recommender/',
                train=True,
                serve=True)

model_trainer = ModelTrainer(model=bpr_model)

auc_evaluator = AUC()
model_trainer.train(total_iter=total_iter,
                    eval_iter=eval_iter,
                    save_iter=save_iter,
                    train_sampler=train_sampler,
                    eval_samplers=[val_sampler, test_sampler],
                    evaluators=[auc_evaluator])
Beispiel #9
0
def exp(dataset, l2_reg, p_n_ratio, eval_explicit, save_log, eval_rank):

    if dataset == 'spotify':
        data = loadSpotify()

    elif dataset == 'bytedance':
        data = loadByteDance()

    else:
        print("Unsupported dataset...")
        return

    # save logging and model
    log_dir = "validation_logs/{}_{}_{}_{}_{}/".format(dataset, l2_reg,
                                                       p_n_ratio,
                                                       eval_explicit,
                                                       eval_rank)
    os.popen("mkdir -p %s" % log_dir).read()
    if save_log:
        log = open(log_dir + "validation.log", "w")
        sys.stdout = log

    # prepare train, val, test sets
    train_dataset = Dataset(data['train'],
                            data['total_users'],
                            data['total_items'],
                            name='Train')
    if p_n_ratio is None:
        train_sampler = RandomPairwiseSampler(batch_size=batch_size,
                                              dataset=train_dataset,
                                              num_process=5)
    else:
        train_sampler = StratifiedPairwiseSampler(batch_size=batch_size,
                                                  dataset=train_dataset,
                                                  p_n_ratio=p_n_ratio,
                                                  num_process=5)
        if p_n_ratio > 0.0:
            print("Re-weighting implicit negative feedback")
        else:
            print("Corrected negative feedback labels but not re-weighting")

    eval_num_neg = None if eval_explicit else 500  # num of negative samples for evaluation
    if eval_rank:
        # show evaluation metrics for click-complete and click-skip items separately
        pos_dataset = Dataset(data['pos_test'],
                              data['total_users'],
                              data['total_items'],
                              implicit_negative=not eval_explicit,
                              name='Pos_Test',
                              num_negatives=eval_num_neg)
        neg_dataset = Dataset(data['neg_test'],
                              data['total_users'],
                              data['total_items'],
                              implicit_negative=not eval_explicit,
                              name='Neg_Test',
                              num_negatives=eval_num_neg)
        pos_sampler = EvaluationSampler(batch_size=batch_size,
                                        dataset=pos_dataset)
        neg_sampler = EvaluationSampler(batch_size=batch_size,
                                        dataset=neg_dataset)
        eval_samplers = [pos_sampler, neg_sampler]
    else:
        val_dataset = Dataset(data['val'],
                              data['total_users'],
                              data['total_items'],
                              implicit_negative=not eval_explicit,
                              name='Val',
                              num_negatives=eval_num_neg)
        test_dataset = Dataset(data['test'],
                               data['total_users'],
                               data['total_items'],
                               implicit_negative=not eval_explicit,
                               name='Test',
                               num_negatives=eval_num_neg)
        val_sampler = EvaluationSampler(batch_size=batch_size,
                                        dataset=val_dataset)
        test_sampler = EvaluationSampler(batch_size=batch_size,
                                         dataset=test_dataset)
        eval_samplers = [val_sampler, test_sampler]

    # set evaluators
    auc_evaluator = AUC()
    evaluators = [auc_evaluator]

    # set model parameters
    model = BPR(l2_reg=l2_reg,
                batch_size=batch_size,
                total_users=train_dataset.total_users(),
                total_items=train_dataset.total_items(),
                dim_user_embed=dim_user_embed,
                dim_item_embed=dim_item_embed,
                save_model_dir=log_dir,
                train=True,
                serve=True)

    # set model trainer
    model_trainer = ModelTrainer(model=model)
    model_trainer.train(total_iter=total_iter,
                        eval_iter=eval_iter,
                        save_iter=save_iter,
                        train_sampler=train_sampler,
                        eval_samplers=eval_samplers,
                        evaluators=evaluators)