tweet_pad = np.full(shape=(1, args.max_seq_len), fill_value=pad_word, dtype=np.int64)
tweet = np.vstack([tweet, tweet_pad])

# config
config['num_users'], config['num_items'] = int(data_rating.userId.max() + 1), int(data_rating.itemId.max() + 1)
config['user_friends'],config['user_tweets'],config['num_friends'] = load_friends_tweets(args.data_profile)
args.tweet = tweet
config['args'] = args
config['vocab'] = vocab

# Specify the exact model
model = sys.argv[1] if len(sys.argv) == 2 else "gmf"
if args.model.lower() == "gmf":
    config['group'] = False
    config['latent_dim'] = config['latent_dim_mf']
    engine = GMFEngine(config)
elif args.model.lower() == "gmflstm":
    config['sum'] = False
    config['group'] = False
    config['latent_dim'] = config['latent_dim_mf']
    engine = GMFLSTMEngine(config)
elif args.model.lower() == "global_sum_embedding":

    config['latent_dim'] = config['latent_dim_mf']
    engine =New_Gloabl_sum_embedding_shareEngine(config)
elif args.model.lower() == "global_sum_embedding_gmf":

    config['latent_dim'] = config['latent_dim_mf']
    engine =New_Gloabl_sum_embedding_gmfEngine(config)

elif args.model.lower() == "global_sum_embedding_mlp":
ml1m_rating = pd.merge(ml1m_rating, user_id, on=['uid'], how='left')
item_id = ml1m_rating[['mid']].drop_duplicates()
item_id['itemId'] = np.arange(len(item_id))
ml1m_rating = pd.merge(ml1m_rating, item_id, on=['mid'], how='left')
ml1m_rating = ml1m_rating[['userId', 'itemId', 'rating', 'timestamp']]
print('Range of userId is [{}, {}]'.format(ml1m_rating.userId.min(),
                                           ml1m_rating.userId.max()))
print('Range of itemId is [{}, {}]'.format(ml1m_rating.itemId.min(),
                                           ml1m_rating.itemId.max()))

# 加载训练数据
sample_generator = SampleGenerator(ratings=ml1m_rating)
evaluate_data = sample_generator.evaluate_data

# 指定训练的参数和训练模型
for config in [gmf_config, mlp_config, neumf_config]:
    if config == mlp_config:
        engine = MLPEngine(config)
    elif config == gmf_config:
        engine = GMFEngine(config)
    else:
        engine = NeuMFEngine(config)
    for epoch in range(config['num_epoch']):
        print('Epoch {} starts !'.format(epoch))
        print('-' * 80)
        train_loader = sample_generator.instance_a_train_loader(
            config['num_negative'], config['batch_size'])
        engine.train_an_epoch(train_loader, epoch_id=epoch)
        hit_ratio, ndcg = engine.evaluate(evaluate_data, epoch_id=epoch)
        engine.save(config['alias'], epoch, hit_ratio, ndcg)
Exemple #3
0
# Load Data
ml1m_dir = 'data/ml-1m/ratings.dat'
ml1m_rating = pd.read_csv(ml1m_dir, sep='::', header=None, names=['uid', 'mid', 'rating', 'timestamp'],  engine='python')
# Reindex
user_id = ml1m_rating[['uid']].drop_duplicates().reindex() #drop_duplicates > 중복 제거
user_id['userId'] = np.arange(len(user_id)) #arange > array return, range > list return
ml1m_rating = pd.merge(ml1m_rating, user_id, on=['uid'], how='left')
item_id = ml1m_rating[['mid']].drop_duplicates()
item_id['itemId'] = np.arange(len(item_id))
ml1m_rating = pd.merge(ml1m_rating, item_id, on=['mid'], how='left')
ml1m_rating = ml1m_rating[['userId', 'itemId', 'rating', 'timestamp']]
print('Range of userId is [{}, {}]'.format(ml1m_rating.userId.min(), ml1m_rating.userId.max()))
print('Range of itemId is [{}, {}]'.format(ml1m_rating.itemId.min(), ml1m_rating.itemId.max()))
# DataLoader for training
sample_generator = SampleGenerator(ratings=ml1m_rating)
evaluate_data = sample_generator.evaluate_data
# Specify the exact model
config = gmf_config
engine = GMFEngine(config)
# config = mlp_config
# engine = MLPEngine(config)
# config = neumf_config
# engine = NeuMFEngine(config)
for epoch in range(config['num_epoch']):
    print('Epoch {} starts !'.format(epoch))
    print('-' * 80)
    train_loader = sample_generator.instance_a_train_loader(config['num_negative'], config['batch_size'])
    engine.train_an_epoch(train_loader, epoch_id=epoch)
    hit_ratio, ndcg = engine.evaluate(evaluate_data, epoch_id=epoch)
    engine.save(config['alias'], epoch, hit_ratio, ndcg)