def _build_serving_graph(self): big_bpr = BPR(batch_size=self._batch_size, max_user=self._max_user, max_item=self._max_item, dim_embed=20) Recommender.load(big_bpr, "model-51") print('calling _build_serving_graph') tf.reset_default_graph() big_bpr2 = BPR(batch_size=self._batch_size, max_user=self._max_user, max_item=self._max_item, dim_embed=20) Recommender.load(big_bpr2, "model-52") tf.reset_default_graph() big_bpr3 = BPR(batch_size=self._batch_size, max_user=self._max_user, max_item=self._max_item, dim_embed=20) Recommender.load(big_bpr2, "model-53") self._rec1 = big_bpr self._rec2 = big_bpr2 self._rec3 = big_bpr3 pass
# dim_embed=20, opt='Adam', sess_config=sess_config) # sampler = PairwiseSampler(batch_size=batch_size, # dataset=train_dataset, num_process=5) # model_trainer = ImplicitModelTrainer(batch_size=batch_size, test_batch_size=test_batch_size, # train_dataset=train_dataset, model=bpr_model, sampler=sampler) # auc_evaluator = AUC() # model_trainer.train(num_itr=int(10), display_itr=display_itr, eval_datasets=[val_dataset, test_dataset], # evaluators=[auc_evaluator]) # print("Save") # bpr_model.save("./model", 1) # print("Saved") big_bpr = BPR(batch_size=batch_size, max_user=max_user, max_item=max_item, dim_embed=20) Recommender.load(big_bpr, "model-1") print(big_bpr) print(big_bpr.serve(pythonsucks)) # goodnight tf.reset_default_graph() big_bpr2 = BPR(batch_size=batch_size, max_user=max_user, max_item=max_item, dim_embed=20) Recommender.load(big_bpr2, "model-1") print(big_bpr2) print(big_bpr2.serve(pythonsucks))
train_dataset = ImplicitDataset(raw_data['train_data'], raw_data['max_user'], raw_data['max_item'], name='Train') val_dataset = ImplicitDataset(raw_data['val_data'], raw_data['max_user'], raw_data['max_item'], name='Val') test_dataset = ImplicitDataset(raw_data['test_data'], raw_data['max_user'], raw_data['max_item'], name='Test') bpr_model = BPR(batch_size=batch_size, max_user=train_dataset.max_user(), max_item=train_dataset.max_item(), dim_embed=20, opt='Adam', sess_config=sess_config) sampler = PairwiseSampler(batch_size=batch_size, dataset=train_dataset, num_process=1) model_trainer = ImplicitModelTrainer(batch_size=batch_size, test_batch_size=test_batch_size, train_dataset=train_dataset, model=bpr_model, sampler=sampler) auc_evaluator = AUC() model_trainer.train(num_itr=int(1e6), display_itr=display_itr, eval_datasets=[val_dataset, test_dataset],
def sample_data_and_train(self): self.logger.warning( 'sample_data_and_train called, pid = %d Please kill process on unsuccessful training', os.getpid()) self.logger.info('-------- sample_data_and_train starts --------') total_users = 0 interactions_count = 0 with open( os.path.dirname(os.path.abspath(__file__)) + self.path_to_dataset, 'r') as fin: for line in fin: interactions_count += int(line.split()[0]) total_users += 1 self.logger.info('############ collecting data.. ############') # radomly hold out an item per user for validation and testing respectively. val_structured_arr = np.zeros(total_users, dtype=[('user_id', np.int32), ('item_id', np.int32)]) test_structured_arr = np.zeros(total_users, dtype=[('user_id', np.int32), ('item_id', np.int32)]) train_structured_arr = np.zeros(interactions_count - total_users * 2, dtype=[('user_id', np.int32), ('item_id', np.int32)]) interaction_ind = 0 next_user_id = 0 next_item_id = 0 map_to_item_id = dict() # Map item id from 0 to len(items)-1 with open( os.path.dirname(os.path.abspath(__file__)) + self.path_to_dataset, 'r') as fin: for line in fin: item_list = line.split()[1:] random.shuffle(item_list) for ind, item in enumerate(item_list): if item not in map_to_item_id: map_to_item_id[item] = next_item_id next_item_id += 1 if ind == 0: val_structured_arr[next_user_id] = ( next_user_id, map_to_item_id[item]) elif ind == 1: test_structured_arr[next_user_id] = ( next_user_id, map_to_item_id[item]) else: train_structured_arr[interaction_ind] = ( next_user_id, map_to_item_id[item]) interaction_ind += 1 next_user_id += 1 self.logger.info('############ instantiating dataset.. ############') from openrec.utils import Dataset train_dataset = Dataset(raw_data=train_structured_arr, total_users=total_users, total_items=len(map_to_item_id), name='Train') val_dataset = Dataset(raw_data=val_structured_arr, total_users=total_users, total_items=len(map_to_item_id), num_negatives=500, name='Val') test_dataset = Dataset(raw_data=test_structured_arr, total_users=total_users, total_items=len(map_to_item_id), num_negatives=500, name='Test') self.logger.info("############ instantiating Samplers.. ############") from openrec.utils.samplers import RandomPairwiseSampler from openrec.utils.samplers import EvaluationSampler train_sampler = RandomPairwiseSampler(batch_size=1000, dataset=train_dataset, num_process=5) val_sampler = EvaluationSampler(batch_size=1000, dataset=val_dataset) test_sampler = EvaluationSampler(batch_size=1000, dataset=test_dataset) self.logger.info( "############ instantiating Recommender.. ############") from openrec.recommenders import BPR bpr_model = BPR(batch_size=1000, total_users=train_dataset.total_users(), total_items=train_dataset.total_items(), dim_user_embed=50, dim_item_embed=50, save_model_dir='bpr_recommender/', train=True, serve=True) self.logger.info("############ instantiating Evaluator.. ############") from openrec.utils.evaluators import AUC auc_evaluator = AUC() self.logger.info( "############ instantiating Model trainer.. ############") from openrec import ModelTrainer model_trainer = ModelTrainer(model=bpr_model) print("############ starting training.. ############") model_trainer.train( total_iter=10000, # Total number of training iterations eval_iter=1000, # Evaluate the model every "eval_iter" iterations save_iter=10000, # Save the model every "save_iter" iterations train_sampler=train_sampler, eval_samplers=[val_sampler, test_sampler], evaluators=[auc_evaluator]) # self.logger.info("THIS IS WHEN MODEL WILL START TRAINING... returning") self.logger.info("-------- sample_data_and_train ends --------")
train_dataset = ImplicitDataset(raw_data=csv, max_user=max_users, max_item=max_items, name='Train') val_dataset = ImplicitDataset(raw_data=csv, max_user=max_users, max_item=max_items, name='Val') test_dataset = ImplicitDataset(raw_data=csv, max_user=max_users, max_item=max_items, name='Test') bpr_model = BPR(batch_size=1000, max_user=train_dataset.max_user(), max_item=train_dataset.max_item(), dim_embed=20, opt='Adam') print("before sampler") sampler = PairwiseSampler(batch_size=1000, dataset=train_dataset) print("after sampler") auc_evaluator = AUC() print("after evaluator") model_trainer = ImplicitModelTrainer(batch_size=1000, test_batch_size=100, train_dataset=train_dataset, model=bpr_model, sampler=sampler)
def UCML(batch_size, dim_user_embed, dim_item_embed, total_users, total_items, l2_reg=None, init_model_dir=None, save_model_dir='Recommender/', train=True, serve=False): rec = BPR(batch_size=batch_size, dim_user_embed=dim_user_embed, dim_item_embed=dim_item_embed, total_users=total_users, total_items=total_items, l2_reg=l2_reg, init_model_dir=init_model_dir, save_model_dir=save_model_dir, train=train, serve=serve) t = rec.traingraph s = rec.servegraph def censor_vec(embedding, censor_id): unique_censor_id, _ = tf.unique(censor_id) embedding_gather = tf.gather(embedding, indices=unique_censor_id) norm = tf.sqrt( tf.reduce_sum(tf.square(embedding_gather), axis=1, keepdims=True)) return tf.scatter_update(embedding, indices=unique_censor_id, updates=embedding_gather / tf.maximum(norm, 1.0)) @t.usergraph.extend def censor_user_vec(subgraph): user_embedding, _ = LatentFactor(l2_reg=None, init='normal', id_=None, shape=[total_users, dim_user_embed], scope='user') user_censor_ops = censor_vec(user_embedding, subgraph['user_id']) subgraph.register_global_operation(user_censor_ops, 'censor_embedding') @t.itemgraph.extend def censor_item_vec(subgraph): item_embedding, _ = LatentFactor(l2_reg=None, init='normal', id_=None, shape=[total_items, dim_item_embed], subgraph=subgraph, scope='item') item_censor_ops = censor_vec( item_embedding, tf.concat([subgraph['p_item_id'], subgraph['n_item_id']], axis=0)) subgraph.register_global_operation(item_censor_ops, 'censor_embedding') @t.interactiongraph(ins=[ 'user_vec', 'p_item_vec', 'n_item_vec', 'p_item_bias', 'n_item_bias' ]) def interaction_graph(subgraph): PairwiseEuDist(user_vec=subgraph['user_vec'], p_item_vec=subgraph['p_item_vec'], n_item_vec=subgraph['n_item_vec'], p_item_bias=subgraph['p_item_bias'], n_item_bias=subgraph['n_item_bias'], subgraph=subgraph, train=True, scope='PairwiseEuDist') @s.interactiongraph(ins=['user_vec', 'item_vec', 'item_bias']) def serving_interaction_graph(subgraph): PairwiseEuDist(user_vec=subgraph['user_vec'], item_vec=subgraph['item_vec'], item_bias=subgraph['item_bias'], train=False, subgraph=subgraph, scope='PairwiseEuDist') return rec
def VBPR( batch_size, dim_user_embed, dim_item_embed, dim_v, total_users, total_items, l2_reg_embed=None, l2_reg_mlp=None, init_model_dir=None, save_model_dir="Recommender/", train=True, serve=False, ): rec = BPR( batch_size=batch_size, dim_user_embed=dim_user_embed, dim_item_embed=dim_item_embed, total_users=total_users, total_items=total_items, l2_reg=l2_reg_embed, init_model_dir=init_model_dir, save_model_dir=save_model_dir, train=train, serve=serve, ) t = rec.traingraph s = rec.servegraph @t.inputgraph.extend(outs=["p_item_vfeature", "n_item_vfeature"]) def train_item_visual_features(subgraph): subgraph["p_item_vfeature"] = tf.placeholder(tf.float32, shape=[batch_size, dim_v], name="p_item_vfeature") subgraph["n_item_vfeature"] = tf.placeholder(tf.float32, shape=[batch_size, dim_v], name="n_item_vfeature") subgraph.update_global_input_mapping({ "p_item_vfeature": subgraph["p_item_vfeature"], "n_item_vfeature": subgraph["n_item_vfeature"], }) @s.inputgraph.extend(outs=["item_vfeature"]) def serving_item_visual_features(subgraph): subgraph["item_vfeature"] = tf.placeholder(tf.float32, shape=[None, dim_v], name="item_vfeature") subgraph.update_global_input_mapping( {"item_vfeature": subgraph["item_vfeature"]}) @t.itemgraph.extend(ins=["p_item_vfeature", "n_item_vfeature"]) def train_add_item_graph(subgraph): p_item_vout = MultiLayerFC( in_tensor=subgraph["p_item_vfeature"], l2_reg=l2_reg_mlp, subgraph=subgraph, dims=[dim_user_embed - dim_item_embed], scope="item_MLP", ) n_item_vout = MultiLayerFC( in_tensor=subgraph["n_item_vfeature"], l2_reg=l2_reg_mlp, subgraph=subgraph, dims=[dim_user_embed - dim_item_embed], scope="item_MLP", ) subgraph["p_item_vec"] = tf.concat( [subgraph["p_item_vec"], p_item_vout], axis=1) subgraph["n_item_vec"] = tf.concat( [subgraph["n_item_vec"], n_item_vout], axis=1) @s.itemgraph.extend(ins=["item_vfeature"]) def serving_add_item_graph(subgraph): item_vout = MultiLayerFC( in_tensor=subgraph["item_vfeature"], l2_reg=l2_reg_mlp, subgraph=subgraph, dims=[dim_user_embed - dim_item_embed], scope="item_MLP", ) subgraph["item_vec"] = tf.concat([subgraph["item_vec"], item_vout], axis=1) @t.connector.extend def train_connect(graph): graph.itemgraph["p_item_vfeature"] = graph.inputgraph[ "p_item_vfeature"] graph.itemgraph["n_item_vfeature"] = graph.inputgraph[ "n_item_vfeature"] @s.connector.extend def serve_connect(graph): graph.itemgraph["item_vfeature"] = graph.inputgraph["item_vfeature"] return rec
num_negatives=500) test_dataset = Dataset(raw_data['test_data'], raw_data['total_users'], raw_data['total_items'], name='Test', num_negatives=500) train_sampler = RandomPairwiseSampler(batch_size=batch_size, dataset=train_dataset, num_process=5) val_sampler = EvaluationSampler(batch_size=batch_size, dataset=val_dataset) test_sampler = EvaluationSampler(batch_size=batch_size, dataset=test_dataset) bpr_model = BPR(batch_size=batch_size, total_users=train_dataset.total_users(), total_items=train_dataset.total_items(), dim_user_embed=dim_embed, dim_item_embed=dim_embed, save_model_dir='bpr_recommender/', train=True, serve=True) model_trainer = ModelTrainer(model=bpr_model) auc_evaluator = AUC() model_trainer.train(total_iter=total_iter, eval_iter=eval_iter, save_iter=save_iter, train_sampler=train_sampler, eval_samplers=[val_sampler, test_sampler], evaluators=[auc_evaluator])
def exp(dataset, l2_reg, p_n_ratio, eval_explicit, save_log, eval_rank): if dataset == 'spotify': data = loadSpotify() elif dataset == 'bytedance': data = loadByteDance() else: print("Unsupported dataset...") return # save logging and model log_dir = "validation_logs/{}_{}_{}_{}_{}/".format(dataset, l2_reg, p_n_ratio, eval_explicit, eval_rank) os.popen("mkdir -p %s" % log_dir).read() if save_log: log = open(log_dir + "validation.log", "w") sys.stdout = log # prepare train, val, test sets train_dataset = Dataset(data['train'], data['total_users'], data['total_items'], name='Train') if p_n_ratio is None: train_sampler = RandomPairwiseSampler(batch_size=batch_size, dataset=train_dataset, num_process=5) else: train_sampler = StratifiedPairwiseSampler(batch_size=batch_size, dataset=train_dataset, p_n_ratio=p_n_ratio, num_process=5) if p_n_ratio > 0.0: print("Re-weighting implicit negative feedback") else: print("Corrected negative feedback labels but not re-weighting") eval_num_neg = None if eval_explicit else 500 # num of negative samples for evaluation if eval_rank: # show evaluation metrics for click-complete and click-skip items separately pos_dataset = Dataset(data['pos_test'], data['total_users'], data['total_items'], implicit_negative=not eval_explicit, name='Pos_Test', num_negatives=eval_num_neg) neg_dataset = Dataset(data['neg_test'], data['total_users'], data['total_items'], implicit_negative=not eval_explicit, name='Neg_Test', num_negatives=eval_num_neg) pos_sampler = EvaluationSampler(batch_size=batch_size, dataset=pos_dataset) neg_sampler = EvaluationSampler(batch_size=batch_size, dataset=neg_dataset) eval_samplers = [pos_sampler, neg_sampler] else: val_dataset = Dataset(data['val'], data['total_users'], data['total_items'], implicit_negative=not eval_explicit, name='Val', num_negatives=eval_num_neg) test_dataset = Dataset(data['test'], data['total_users'], data['total_items'], implicit_negative=not eval_explicit, name='Test', num_negatives=eval_num_neg) val_sampler = EvaluationSampler(batch_size=batch_size, dataset=val_dataset) test_sampler = EvaluationSampler(batch_size=batch_size, dataset=test_dataset) eval_samplers = [val_sampler, test_sampler] # set evaluators auc_evaluator = AUC() evaluators = [auc_evaluator] # set model parameters model = BPR(l2_reg=l2_reg, batch_size=batch_size, total_users=train_dataset.total_users(), total_items=train_dataset.total_items(), dim_user_embed=dim_user_embed, dim_item_embed=dim_item_embed, save_model_dir=log_dir, train=True, serve=True) # set model trainer model_trainer = ModelTrainer(model=model) model_trainer.train(total_iter=total_iter, eval_iter=eval_iter, save_iter=save_iter, train_sampler=train_sampler, eval_samplers=eval_samplers, evaluators=evaluators)