config['latent_dim'] = config['latent_dim_mf'] engine =New_Gloabl_sum_embedding_MLPEngine(config) # DataLoader for training sample_generator = SampleGenerator(ratings=data_rating, train=data_rating_train, test=data_rating_test) # Train this model evaluate_data = sample_generator.evaluate_data sample_train_data = sample_generator.sample_train_data print("TRAINING:---------------------") engine.evaluate(sample_train_data, epoch_id=0, save=False) print("TESTING:----------------------") hit_ratio_max, ndcg_max = engine.evaluate(evaluate_data, epoch_id=0) for epoch in range(config['num_epoch']): print('Epoch {} starts !'.format(epoch)) print('-' * 80) train_loader = sample_generator.instance_a_train_loader(config['num_negative'], config['batch_size']) engine.train_an_epoch(train_loader, epoch_id=epoch) #print("TRAINING:-----------------") #engine.evaluate(sample_train_data, epoch, save=False) print("TESTING:------------------") hit_ratio, ndcg = engine.evaluate(evaluate_data, epoch_id=epoch) if hit_ratio_max <= hit_ratio or ndcg_max <= ndcg: hit_ratio_max = max(hit_ratio_max, hit_ratio) ndcg_max = max(ndcg_max, ndcg)
# Load Data ml1m_dir = 'data/ml-1m/ratings.dat' ml1m_rating = pd.read_csv(ml1m_dir, sep='::', header=None, names=['uid', 'mid', 'rating', 'timestamp'], engine='python') # Reindex user_id = ml1m_rating[['uid']].drop_duplicates().reindex() #drop_duplicates > 중복 제거 user_id['userId'] = np.arange(len(user_id)) #arange > array return, range > list return ml1m_rating = pd.merge(ml1m_rating, user_id, on=['uid'], how='left') item_id = ml1m_rating[['mid']].drop_duplicates() item_id['itemId'] = np.arange(len(item_id)) ml1m_rating = pd.merge(ml1m_rating, item_id, on=['mid'], how='left') ml1m_rating = ml1m_rating[['userId', 'itemId', 'rating', 'timestamp']] print('Range of userId is [{}, {}]'.format(ml1m_rating.userId.min(), ml1m_rating.userId.max())) print('Range of itemId is [{}, {}]'.format(ml1m_rating.itemId.min(), ml1m_rating.itemId.max())) # DataLoader for training sample_generator = SampleGenerator(ratings=ml1m_rating) evaluate_data = sample_generator.evaluate_data # Specify the exact model config = gmf_config engine = GMFEngine(config) # config = mlp_config # engine = MLPEngine(config) # config = neumf_config # engine = NeuMFEngine(config) for epoch in range(config['num_epoch']): print('Epoch {} starts !'.format(epoch)) print('-' * 80) train_loader = sample_generator.instance_a_train_loader(config['num_negative'], config['batch_size']) engine.train_an_epoch(train_loader, epoch_id=epoch) hit_ratio, ndcg = engine.evaluate(evaluate_data, epoch_id=epoch) engine.save(config['alias'], epoch, hit_ratio, ndcg)