tweet_pad = np.full(shape=(1, args.max_seq_len), fill_value=pad_word, dtype=np.int64) tweet = np.vstack([tweet, tweet_pad]) # config config['num_users'], config['num_items'] = int(data_rating.userId.max() + 1), int(data_rating.itemId.max() + 1) config['user_friends'],config['user_tweets'],config['num_friends'] = load_friends_tweets(args.data_profile) args.tweet = tweet config['args'] = args config['vocab'] = vocab # Specify the exact model model = sys.argv[1] if len(sys.argv) == 2 else "gmf" if args.model.lower() == "gmf": config['group'] = False config['latent_dim'] = config['latent_dim_mf'] engine = GMFEngine(config) elif args.model.lower() == "gmflstm": config['sum'] = False config['group'] = False config['latent_dim'] = config['latent_dim_mf'] engine = GMFLSTMEngine(config) elif args.model.lower() == "global_sum_embedding": config['latent_dim'] = config['latent_dim_mf'] engine =New_Gloabl_sum_embedding_shareEngine(config) elif args.model.lower() == "global_sum_embedding_gmf": config['latent_dim'] = config['latent_dim_mf'] engine =New_Gloabl_sum_embedding_gmfEngine(config) elif args.model.lower() == "global_sum_embedding_mlp":
ml1m_rating = pd.merge(ml1m_rating, user_id, on=['uid'], how='left') item_id = ml1m_rating[['mid']].drop_duplicates() item_id['itemId'] = np.arange(len(item_id)) ml1m_rating = pd.merge(ml1m_rating, item_id, on=['mid'], how='left') ml1m_rating = ml1m_rating[['userId', 'itemId', 'rating', 'timestamp']] print('Range of userId is [{}, {}]'.format(ml1m_rating.userId.min(), ml1m_rating.userId.max())) print('Range of itemId is [{}, {}]'.format(ml1m_rating.itemId.min(), ml1m_rating.itemId.max())) # 加载训练数据 sample_generator = SampleGenerator(ratings=ml1m_rating) evaluate_data = sample_generator.evaluate_data # 指定训练的参数和训练模型 for config in [gmf_config, mlp_config, neumf_config]: if config == mlp_config: engine = MLPEngine(config) elif config == gmf_config: engine = GMFEngine(config) else: engine = NeuMFEngine(config) for epoch in range(config['num_epoch']): print('Epoch {} starts !'.format(epoch)) print('-' * 80) train_loader = sample_generator.instance_a_train_loader( config['num_negative'], config['batch_size']) engine.train_an_epoch(train_loader, epoch_id=epoch) hit_ratio, ndcg = engine.evaluate(evaluate_data, epoch_id=epoch) engine.save(config['alias'], epoch, hit_ratio, ndcg)
# Load Data ml1m_dir = 'data/ml-1m/ratings.dat' ml1m_rating = pd.read_csv(ml1m_dir, sep='::', header=None, names=['uid', 'mid', 'rating', 'timestamp'], engine='python') # Reindex user_id = ml1m_rating[['uid']].drop_duplicates().reindex() #drop_duplicates > 중복 제거 user_id['userId'] = np.arange(len(user_id)) #arange > array return, range > list return ml1m_rating = pd.merge(ml1m_rating, user_id, on=['uid'], how='left') item_id = ml1m_rating[['mid']].drop_duplicates() item_id['itemId'] = np.arange(len(item_id)) ml1m_rating = pd.merge(ml1m_rating, item_id, on=['mid'], how='left') ml1m_rating = ml1m_rating[['userId', 'itemId', 'rating', 'timestamp']] print('Range of userId is [{}, {}]'.format(ml1m_rating.userId.min(), ml1m_rating.userId.max())) print('Range of itemId is [{}, {}]'.format(ml1m_rating.itemId.min(), ml1m_rating.itemId.max())) # DataLoader for training sample_generator = SampleGenerator(ratings=ml1m_rating) evaluate_data = sample_generator.evaluate_data # Specify the exact model config = gmf_config engine = GMFEngine(config) # config = mlp_config # engine = MLPEngine(config) # config = neumf_config # engine = NeuMFEngine(config) for epoch in range(config['num_epoch']): print('Epoch {} starts !'.format(epoch)) print('-' * 80) train_loader = sample_generator.instance_a_train_loader(config['num_negative'], config['batch_size']) engine.train_an_epoch(train_loader, epoch_id=epoch) hit_ratio, ndcg = engine.evaluate(evaluate_data, epoch_id=epoch) engine.save(config['alias'], epoch, hit_ratio, ndcg)