def main(): args = sys.argv set_level = args[1] train_prob = args[2] user_k = int(args[3]) try: top_n = int(args[4]) except: top_n = 500 #输出top500的推荐到文件 #Filepath config item_tag_file = './song_dataset/mid_data/song_tag_distribution.json' user_tag_file = './song_dataset/mid_data/user_tag_distribution_%s_%s.json'%(set_level,train_prob) file_template = './song_dataset/user_dataset_%s_%s_%s' #set_num,type,train_prob train_file = file_template%(set_level,'train',train_prob) test_file = file_template%(set_level,'test',train_prob) user_sim_file = './song_dataset/mid_data/user_similarity_withTag_%s_%s.json'%(set_level,train_prob) #Build dataset dataset = BaseDataSet() dataset.build_data(train_file,test_file) logging.info("Build dataset cost:%s"%(dataset.cost_time)) #Initiate recommender recommender = UserTagCF() recommender.build_userTagDistribution(dataset.train_data,item_tag_file,user_tag_file) recommender.build_user_similarity(dataset.train_data,user_sim_file,top_user_k=1000) #保留用户的top1000个最相近的用户 #Recommendation recommender.recommend(dataset.train_data,user_k=user_k,top_n=top_n) logging.info("Train_prob:%s User_k:%s Top_n:%s cost:%s"%(train_prob,user_k,top_n,recommender.cost_time))
def main(): args = sys.argv set_level = args[1] train_prob = args[2] user_k = int(args[3]) try: top_n = int(args[4]) except: top_n = 500 #File path config file_template = './song_dataset/user_dataset_%s_%s_%s' #set_num,type,train_prob user_sim_file = './song_dataset/mid_data/user_sim_%s_%s.json' % ( set_level, train_prob) # user-user simiarity matrix train_file = file_template % (set_level, 'train', train_prob) test_file = file_template % (set_level, 'test', train_prob) #Build dataset dataset = BaseDataSet() dataset.build_data(train_file, test_file) logging.info("Build dataset cost:%s" % (dataset.cost_time)) #Initiate Recommender recommender = UserCF() recommender.build_user_similarity( dataset.train_data, user_sim_file, top_user_k=1000) #Top_user_k represent keep top k sim_user to file #Recommendation recommender.recommend(dataset.train_data, user_k=user_k, top_n=top_n) logging.info("Train_prob:%s User_k:%s Top_n:%s cost:%s" % (train_prob, user_k, top_n, recommender.cost_time))
def main(): args = sys.argv set_level = args[1] train_prob = args[2] user_k = int(args[3]) try: top_n = int(args[4]) except: top_n = 500 #File path config file_template = './song_dataset/user_dataset_%s_%s_%s' #set_num,type,train_prob user_sim_file = './song_dataset/mid_data/user_sim_%s_%s.json'%(set_level,train_prob) # user-user simiarity matrix train_file = file_template%(set_level,'train',train_prob) test_file = file_template%(set_level,'test',train_prob) #Build dataset dataset = BaseDataSet() dataset.build_data(train_file,test_file) logging.info("Build dataset cost:%s"%(dataset.cost_time)) #Initiate Recommender recommender = UserCF() recommender.build_user_similarity(dataset.train_data,user_sim_file,top_user_k=1000) #Top_user_k represent keep top k sim_user to file #Recommendation recommender.recommend(dataset.train_data,user_k=user_k,top_n=top_n) logging.info("Train_prob:%s User_k:%s Top_n:%s cost:%s"%(train_prob,user_k,top_n,recommender.cost_time))
def main(): args = sys.argv set_level = args[1] train_prob = args[2] top_n = int(args[3]) #Filepath config item_tag_file = './song_dataset/mid_data/song_tag_distribution.json' #item_tag_file = './song_dataset/mid_data/song_tag_dist_with_singer.json' user_tag_file = './song_dataset/mid_data/user_tag_distribution_%s_%s.json'%(set_level,train_prob) file_template = './song_dataset/user_dataset_%s_%s_%s' #set_num,type,train_prob train_file = file_template%(set_level,'train',train_prob) test_file = file_template%(set_level,'test',train_prob) user_sim_file = './song_dataset/mid_data/user_similarity_withTag_%s_%s.json'%(set_level,train_prob) #Build dataset dataset = BaseDataSet() dataset.build_data(train_file,test_file) logging.info("Build dataset cost:%s"%(dataset.cost_time)) print "DataForTrain: %s"%(train_file) print "DataForTest: %s"%(test_file) print "Dataset train_set info: %s"%(dataset.get_train_info()) print "Dataset test_set info: %s"%(dataset.get_test_info()) #Record best scores best_f_score = {'f_score':0} best_precision = {'precision':0} best_recall = {'recall':0} #Initiate recommender recommender = UserTagCF() recommender.build_userTagDistribution(dataset.train_data,item_tag_file,user_tag_file) recommender.build_user_similarity(dataset.train_data,user_sim_file,top_user_k=1000) #Recommendation for user_k in [5]+range(10,101,10): recommender.recommend(dataset.train_data,user_k=user_k,top_n=top_n) logging.info("Train_prob:%s User_k:%s Top_n:%s cost:%s"%(train_prob,user_k,top_n,recommender.cost_time)) scores = recommender.score(dataset.test_data,len(dataset.all_songs)) print "User_k:%s\tTop_n:%s\tScores:%s"%(user_k,top_n,scores) #Find Best Score if scores['f_score'] > best_f_score['f_score']: best_f_score = scores best_f_score['user_k'] = user_k best_f_score['top_n'] = top_n if scores['precision'] > best_precision['precision']: best_precision = scores best_precision['user_k']=user_k best_precision['top_n'] = top_n if scores['recall'] > best_recall['recall']: best_recall = scores best_recall['user_k']=user_k best_recall['top_n'] = top_n print "Best_F_Score: %s"%(best_f_score) print "Best_Precision: %s"%(best_precision) print "Best_Recall: %s"%(best_recall)
def main(): args = sys.argv set_level = args[1] train_prob = args[2] topic_num = int(args[3]) recommend_job = args[4] user_k = int(args[5]) try: top_n = int(args[6]) except: top_n = 500 #Log config log_file = './log/hybirdModel_%s_%s_%s_%s_%s.log'%(set_level,train_prob,topic_num,recommend_job,top_n) logging.basicConfig(level=logging.INFO,format='%(asctime)s %(levelname)s %(funcName)s %(lineno)d %(message)s',filename=log_file) #Filepath config file_template = './song_dataset/user_dataset_%s_%s_%s' #set_level, type, train_prob user_sim_file = './song_dataset/mid_data/user_sim_%s_%s.json'%(set_level,train_prob) userTag_sim_file = './song_dataset/mid_data/user_similarity_withTag_%s_%s.json'%(set_level,train_prob) userLDA_sim_file = './song_dataset/mid_data/user_sim_with_lda_%s_%s_%s.json'%(set_level,train_prob,topic_num) train_file = file_template%(set_level,'train',train_prob) test_file = file_template%(set_level,'test', train_prob) #Build dataset dataset = BaseDataSet() dataset.build_data(train_file,test_file) logging.info("Build dataset cost:%s"%(dataset.cost_time)) #Data Preparation items_tag_dict = {} users_tag_dict = {} if recommend_job in ('mix_result_reorder','mix_sim_reorder'): items_tag_dict = load_tag_distribution('./song_dataset/mid_data/song_tag_distribution.json') #Load item_tag_distrib user_tag_file = './song_dataset/mid_data/user_tag_distribution_%s_%s.json'%(set_level,train_prob) users_tag_dict = load_tag_distribution(user_tag_file) #Initiate Hybird-Model recommender = HybirdModel() if recommend_job in ('mix_sim','mix_sim_reorder'): recommender.hybird_user_sim(dataset.train_data,userTag_sim_file,userLDA_sim_file,theta=0.45) elif recommend_job in ('mix_result','mix_result_reorder'): recommender.userTag.load_user_similarity(userTag_sim_file,norm=1) recommender.userLda.load_user_similarity(userLDA_sim_file,norm=1) if recommend_job == 'mix_sim': recommender.recommend(dataset.train_data,users_tag_dict,items_tag_dict,user_k,top_n,reorder=0) elif recommend_job == 'mix_sim_reorder': recommender.recommend(dataset.train_data,users_tag_dict,items_tag_dict,user_k,top_n,reorder=1) elif recommend_job == 'mix_result': recommender.hybird_recommend_result(dataset.train_data,user_k,top_n) elif recommend_job == 'mix_result_reorder': recommender.hybird_result_withReorder(dataset.train_data,users_tag_dict,items_tag_dict,user_k,top_n) logging.info("Train_prob:%s User_k:%s Top_n:%s cost:%s"%(train_prob,user_k,top_n,recommender.cost_time))
def main(): args = sys.argv set_level = args[1] train_prob = args[2] dataset = BaseDataSet() file_template = './song_dataset/user_dataset_%s_%s_%s' #set_level,type,train_prob train_file = file_template%(set_level,'train',train_prob) test_file = file_template%(set_level,'test',train_prob) dataset.build_data(train_file,test_file) logging.info("Build dataset cost:%s",dataset.cost_time) recommender = RandomSelect() recommender.recommend(dataset.train_data,list(dataset.all_songs),500) logging.info("Train_prob:%s cost:%s"%(train_prob,recommender.get_time()))
def main(): args = sys.argv set_level = args[1] train_prob = args[2] dataset = BaseDataSet() file_template = './song_dataset/user_dataset_%s_%s_%s' #set_levle,type,train_prob train_file = file_template % (set_level, 'train', train_prob) test_file = file_template % (set_level, 'test', train_prob) dataset.build_data(train_file, test_file) logging.info("Build dataset cost:%s", dataset.cost_time) #Initiate Recommender recommender = Popularity() recommender.recommend(dataset.train_data, 500) logging.info("Train_prob:%s cost:%s" % (train_prob, recommender.cost_time))
def main(): args = sys.argv set_level = args[1] train_prob = args[2] dataset = BaseDataSet() file_template = './song_dataset/user_dataset_%s_%s_%s' #set_levle,type,train_prob train_file = file_template%(set_level,'train',train_prob) test_file = file_template%(set_level,'test',train_prob) dataset.build_data(train_file,test_file) logging.info("Build dataset cost:%s",dataset.cost_time) #Initiate Recommender recommender = Popularity() recommender.recommend(dataset.train_data,500) logging.info("Train_prob:%s cost:%s"%(train_prob,recommender.cost_time))
def main(): args = sys.argv set_level = args[1] train_prob = args[2] e_type = args[3] #Experiment type: song or playlist dataset = BaseDataSet() file_template = './song_dataset/user_dataset_%s_%s_%s' #set_levle,type,train_prob if e_type == 'playlist': file_template = './pl_dataset/user_playlist_%s_%s_%s' #set_levle,type,train_prob train_file = file_template%(set_level,'train',train_prob) test_file = file_template%(set_level,'test',train_prob) dataset.build_data(train_file,test_file) logging.info("Build dataset cost:%s",dataset.cost_time) print "DataForTrain: %s"%(train_file) print "DataForTest: %s"%(test_file) print "Dataset train_set info: %s"%(dataset.get_train_info()) print "Dataset test_set info: %s"%(dataset.get_test_info()) #Record best scores best_f_score = {'f_score':0} best_precision = {'precision':0} best_recall = {'recall':0} #Initiate Recommender recommender = Popularity() for i in [1,50,100,150,200]: recommender.recommend(dataset.train_data,i) logging.info("Train_prob:%s Recommend Top_n:%s cost:%s"%(train_prob,i,recommender.cost_time)) #logging.info("Top_10_song:%s"%(recommender.get_poplist(10))) scores = recommender.score(dataset.test_data) print "Top_n:%s\tScores:%s"%(i,scores) #Find best scores if scores['f_score'] > best_f_score['f_score']: best_f_score = scores best_f_score['top_n'] = i if scores['precision'] > best_precision['precision']: best_precision = scores best_precision['top_n'] = i if scores['recall'] > best_recall['recall']: best_recall = scores best_recall['top_n'] = i print "Best_F_Score: %s"%(best_f_score) print "Best_Precision: %s"%(best_precision) print "Best_Recall: %s"%(best_recall)
def get_lda_topics(args): set_level = args[0] train_prob = args[1] topic_num = int(args[2]) file_template = './song_dataset/user_dataset_%s_%s_%s' #set_level, type, train_prob train_file = file_template%(set_level,'train',train_prob) test_file = file_template%(set_level,'test',train_prob) dataset = BaseDataSet() dataset.build_data(train_file,test_file) recommender = UserLDA() recommender.build_model(dataset.train_data,topic_num) for idx,distrib in enumerate(recommender.model.print_topics(1000)): dist0 = distrib.split()[0].split('*')[0] if float(dist0) > 0: print "Topic#%s\t%s"%(idx,distrib)
def main(): args = sys.argv set_level = args[1] train_prob = args[2] topic_num = int(args[3]) user_k = int(args[4]) try: top_n = int(args[5]) except: top_n = 500 #Log-Config logfile = './log/userLDA_%s_%s_%s.log' % (set_level, train_prob, topic_num) logging.basicConfig( level=logging.INFO, format='%(asctime)s %(levelname)s %(funcName)s %(lineno)d %(message)s', filename=logfile) #logging.basicConfig(level=logging.INFO,format='%(asctime)s %(levelname)s %(funcName)s %(lineno)d %(message)s') #File path config user_sim_file = './song_dataset/mid_data/user_sim_with_lda_%s_%s_%s.json' % ( set_level, train_prob, topic_num) file_template = './song_dataset/user_dataset_%s_%s_%s' #set_level, type, train_prob train_file = file_template % (set_level, 'train', train_prob) test_file = file_template % (set_level, 'test', train_prob) #Build dataset dataset = BaseDataSet() dataset.build_data(train_file, test_file) logging.info("Build dataset cost:%s" % (dataset.cost_time)) #Initiate Recommender recommender = UserLDA() recommender.build_user_similarity(user_sim_file, dataset.train_data, topic_num=topic_num, top_user_k=300) #Recommendation recommender.recommend(dataset.train_data, user_k=user_k, top_n=top_n) logging.info("Train_prob:%s User_k:%s Top_n:%s cost:%s" % (train_prob, user_k, top_n, recommender.cost_time))
def main(): args = sys.argv set_level = args[1] train_prob = args[2] kn = int(args[3]) #File path config file_template = './song_dataset/user_dataset_%s_%s_%s' #set_num,type,train_prob train_file = file_template % (set_level, 'train', train_prob) test_file = file_template % (set_level, 'test', train_prob) #Build dataset dataset = BaseDataSet() dataset.build_data(train_file, test_file) logging.info("Build dataset cost:%s" % (dataset.cost_time)) #Initiate Recommender recommender = SVDModel() recommender.build_model(dataset.train_data, kn) recommender.recommend(dataset.train_data)
def main(): args = sys.argv set_level = args[1] train_prob = args[2] kn = int(args[3]) #File path config file_template = './song_dataset/user_dataset_%s_%s_%s' #set_num,type,train_prob train_file = file_template%(set_level,'train',train_prob) test_file = file_template%(set_level,'test',train_prob) #Build dataset dataset = BaseDataSet() dataset.build_data(train_file,test_file) logging.info("Build dataset cost:%s"%(dataset.cost_time)) #Initiate Recommender recommender = SVDModel() recommender.build_model(dataset.train_data,kn) recommender.recommend(dataset.train_data)
def main(): args = sys.argv set_level = args[1] train_prob = args[2] top_n = int(args[3]) #File path config file_template = './song_dataset/user_dataset_%s_%s_%s' #set_num,type,train_prob user_sim_file = './song_dataset/mid_data/user_sim_%s_%s.json'%(set_level,train_prob) # user-user simiarity matrix train_file = file_template%(set_level,'train',train_prob) test_file = file_template%(set_level,'test',train_prob) #Build dataset dataset = BaseDataSet() dataset.build_data(train_file,test_file) logging.info("Build dataset cost:%s"%(dataset.cost_time)) print "DataForTrain: %s"%(train_file) print "DataForTest: %s"%(test_file) print "Dataset train_set info: %s"%(dataset.get_train_info()) print "Dataset test_set info: %s"%(dataset.get_test_info()) #Record best scores best_f_score = {'f_score':0} best_precision = {'precision':0} best_recall = {'recall':0} #Initiate Recommender recommender = UserCF() recommender.build_user_similarity(dataset.train_data,user_sim_file,top_user_k=1000) #Top_user_k represent keep top k sim_user to file #Recommendation for user_k in [5]+range(10,101,10): recommender.recommend(dataset.train_data,user_k=user_k,top_n=top_n) logging.info("Train_prob:%s User_k:%s Top_n:%s cost:%s"%(train_prob,user_k,top_n,recommender.cost_time)) scores = recommender.score(dataset.test_data,len(dataset.all_songs)) print "User_k:%s\tTop_n:%s\tScores:%s"%(user_k,top_n,scores) #Find Best Score if scores['f_score'] > best_f_score['f_score']: best_f_score = scores best_f_score['user_k'] = user_k best_f_score['top_n'] = top_n if scores['precision'] > best_precision['precision']: best_precision = scores best_precision['user_k']=user_k best_precision['top_n'] = top_n if scores['recall'] > best_recall['recall']: best_recall = scores best_recall['user_k']=user_k best_recall['top_n'] = top_n print "Best_F_Score: %s"%(best_f_score) print "Best_Precision: %s"%(best_precision) print "Best_Recall: %s"%(best_recall)
def main(): args = sys.argv set_level = args[1] train_prob = args[2] e_type = args[3] #Experiment type: song or playlist dataset = BaseDataSet() file_template = './song_dataset/user_dataset_%s_%s_%s' #set_levle,type,train_prob if e_type == 'playlist': file_template = './pl_dataset/user_playlist_%s_%s_%s' #set_levle,type,train_prob train_file = file_template % (set_level, 'train', train_prob) test_file = file_template % (set_level, 'test', train_prob) dataset.build_data(train_file, test_file) logging.info("Build dataset cost:%s", dataset.cost_time) print "DataForTrain: %s" % (train_file) print "DataForTest: %s" % (test_file) print "Dataset train_set info: %s" % (dataset.get_train_info()) print "Dataset test_set info: %s" % (dataset.get_test_info()) #Record best scores best_f_score = {'f_score': 0} best_precision = {'precision': 0} best_recall = {'recall': 0} #Initiate Recommender recommender = Popularity() for i in [1, 50, 100, 150, 200]: recommender.recommend(dataset.train_data, i) logging.info("Train_prob:%s Recommend Top_n:%s cost:%s" % (train_prob, i, recommender.cost_time)) #logging.info("Top_10_song:%s"%(recommender.get_poplist(10))) scores = recommender.score(dataset.test_data) print "Top_n:%s\tScores:%s" % (i, scores) #Find best scores if scores['f_score'] > best_f_score['f_score']: best_f_score = scores best_f_score['top_n'] = i if scores['precision'] > best_precision['precision']: best_precision = scores best_precision['top_n'] = i if scores['recall'] > best_recall['recall']: best_recall = scores best_recall['top_n'] = i print "Best_F_Score: %s" % (best_f_score) print "Best_Precision: %s" % (best_precision) print "Best_Recall: %s" % (best_recall)
def __init__(self): BaseDataSet.__init__(self) self.playlists = []
def main(): args = sys.argv set_level = args[1] train_prob = args[2] topic_num = int(args[3]) top_n = int(args[4]) e_type = args[5] #e_type: song or playlist #Log-Config logfile = './log/userLDA_%s_%s_%s.log'%(set_level,train_prob,topic_num) logging.basicConfig(level=logging.INFO,format='%(asctime)s %(levelname)s %(funcName)s %(lineno)d %(message)s',filename=logfile,filemode='w') #logging.basicConfig(level=logging.INFO,format='%(asctime)s %(levelname)s %(funcName)s %(lineno)d %(message)s') #File path config user_sim_file = './song_dataset/mid_data/user_sim_with_lda_%s_%s_%s_new.json'%(set_level,train_prob,topic_num) file_template = './song_dataset/user_dataset_%s_%s_%s' #set_level, type, train_prob if e_type == 'playlist': user_sim_file = './pl_dataset/mid_data/user_sim_with_lda_%s_%s_%s.json'%(set_level,train_prob,topic_num) file_template = './pl_dataset/user_playlist_%s_%s_%s' #set_level, type, train_prob train_file = file_template%(set_level,'train',train_prob) test_file = file_template%(set_level,'test',train_prob) #Build dataset dataset = BaseDataSet() dataset.build_data(train_file,test_file) logging.info("Build dataset cost:%s"%(dataset.cost_time)) print "DataForTrain: %s"%(train_file) print "DataForTest: %s"%(test_file) print "Dataset train_set info: %s"%(dataset.get_train_info()) print "Dataset test_set info: %s"%(dataset.get_test_info()) #Record best scores best_f_score = {'f_score':0} best_precision = {'precision':0} best_recall = {'recall':0} #Initiate Recommender recommender = UserLDA() recommender.build_user_similarity(user_sim_file,dataset.train_data,topic_num=topic_num, top_user_k=1000) #Recommendation for user_k in [5]+range(10,101,10): recommender.recommend(dataset.train_data,user_k=user_k,top_n=top_n) logging.info("Train_prob:%s User_k:%s Top_n:%s cost:%s"%(train_prob,user_k,top_n,recommender.cost_time)) scores = recommender.score(dataset.test_data,len(dataset.all_songs)) print "User_k:%s\tTop_n:%s\tScores:%s"%(user_k,top_n,scores) #Find Best Score if scores['f_score'] > best_f_score['f_score']: best_f_score = scores best_f_score['user_k'] = user_k best_f_score['top_n'] = top_n if scores['precision'] > best_precision['precision']: best_precision = scores best_precision['user_k']=user_k best_precision['top_n'] = top_n if scores['recall'] > best_recall['recall']: best_recall = scores best_recall['user_k']=user_k best_recall['top_n'] = top_n print "Best_F_Score: %s"%(best_f_score) print "Best_Precision: %s"%(best_precision) print "Best_Recall: %s"%(best_recall)
def main(): args = sys.argv set_level = args[1] train_prob = args[2] top_n = int(args[3]) e_type = args[4] #Experiment type: song or playlist #Filepath config file_template = './song_dataset/user_dataset_%s_%s_%s' #set_num,type,train_prob item_sim_file = './song_dataset/mid_data/item_similarity_%s_%s.json' % ( set_level, train_prob) if e_type == 'playlist': file_template = './pl_dataset/user_playlist_%s_%s_%s' #set_num,type,train_prob item_sim_file = './pl_dataset/mid_data/item_similarity_%s_%s.json' % ( set_level, train_prob) train_file = file_template % (set_level, 'train', train_prob) test_file = file_template % (set_level, 'test', train_prob) #Build dataset dataset = BaseDataSet() dataset.build_data(train_file, test_file) logging.info("Build dataset cost:%s" % (dataset.cost_time)) print "DataForTrain: %s" % (train_file) print "DataForTest: %s" % (test_file) print "Dataset train_set info: %s" % (dataset.get_train_info()) print "Dataset test_set info: %s" % (dataset.get_test_info()) #Record best scores best_f_score = {'f_score': 0} best_precision = {'precision': 0} best_recall = {'recall': 0} #Initiate recommender itemCF_recommender = ItemCF() if os.path.exists(item_sim_file): logging.info("File %s exists, loading item similarity matrix" % (item_sim_file)) itemCF_recommender.load_item_similarity(item_sim_file) logging.info("Load item_similarity cost: %s" % (itemCF_recommender.cost_time)) else: logging.info("File %s doesn't exist, building item similarity matrix" % (item_sim_file)) itemCF_recommender.build_item_similarity(dataset.train_data, item_sim_file) logging.info("Load item_similarity cost: %s" % (itemCF_recommender.cost_time)) #Recommendation for item_k in range(20, 100): itemCF_recommender.recommend(dataset.train_data, item_k=item_k, top_n=top_n) logging.info("Train_prob:%s Item_k:%s Top_n:%s Cost:%s" % (train_prob, item_k, top_n, itemCF_recommender.cost_time)) scores = itemCF_recommender.score(dataset.test_data) print "Item_k:%s\tTop_n:%s\tScores:%s" % (item_k, top_n, scores) #Find Best Score if scores['f_score'] > best_f_score['f_score']: best_f_score = scores best_f_score['item_k'] = item_k best_f_score['top_n'] = top_n if scores['precision'] > best_precision['precision']: best_precision = scores best_precision['item_k'] = item_k best_precision['top_n'] = top_n if scores['recall'] > best_recall['recall']: best_recall = scores best_recall['item_k'] = item_k best_recall['top_n'] = top_n print "Best_F_Score: %s" % (best_f_score) print "Best_Precision: %s" % (best_precision) print "Best_Recall: %s" % (best_recall)
def main(): args = sys.argv set_level = args[1] train_prob = args[2] hybird_type = args[3] recommend_job = args[4] user_k = int(args[5]) top_n = int(args[6]) if hybird_type == 'lda': topic_num = int(args[7]) #Log config log_file = './log/ubase_hybirdModel_%s_%s_%s_%s.log' % ( set_level, train_prob, recommend_job, top_n) logging.basicConfig( level=logging.INFO, format='%(asctime)s %(levelname)s %(funcName)s %(lineno)d %(message)s', filename=log_file, filemode='w') #logging.basicConfig(level=logging.INFO,format='%(asctime)s %(levelname)s %(funcName)s %(lineno)d %(message)s') #Filepath config file_template = './song_dataset/user_dataset_%s_%s_%s' #set_level, type, train_prob user_sim_file = './song_dataset/mid_data/user_sim_%s_%s.json' % ( set_level, train_prob) #Similarity file of userCF if hybird_type == 'tag': userTag_sim_file = './song_dataset/mid_data/user_similarity_withTag_%s_%s.json' % ( set_level, train_prob) if hybird_type == 'lda': userLDA_sim_file = './song_dataset/mid_data/user_sim_with_lda_%s_%s_%s.json' % ( set_level, train_prob, topic_num) train_file = file_template % (set_level, 'train', train_prob) test_file = file_template % (set_level, 'test', train_prob) #Build dataset dataset = BaseDataSet() dataset.build_data(train_file, test_file) logging.info("Build dataset cost:%s" % (dataset.cost_time)) #Data Preparation items_tag_dict = {} users_tag_dict = {} if recommend_job in ('mix_result_reorder', 'mix_sim_reorder'): items_tag_dict = load_tag_distribution( './song_dataset/mid_data/song_tag_distribution.json' ) #Load item_tag_distrib user_tag_file = './song_dataset/mid_data/user_tag_distribution_%s_%s.json' % ( set_level, train_prob) users_tag_dict = load_tag_distribution(user_tag_file) #Initiate Hybird-Model recommender = HybirdModel_UB() if recommend_job in ('mix_sim', 'mix_sim_reorder'): if hybird_type == 'tag': recommender.hybird_user_sim(dataset.train_data, user_sim_file, userTag_sim_file, hybird_type='tag', theta=0.8, mix_type=0) elif hybird_type == 'lda': recommender.hybird_user_sim(dataset.train_data, user_sim_file, userLDA_sim_file, hybird_type='lda', theta=0.9, mix_type=0) elif recommend_job in ('mix_result', 'mix_result_reorder'): if hybird_type == 'tag': recommender.userTag.load_user_similarity(userTag_sim_file, norm=1) elif hybird_type == 'lda': recommender.userLda.load_user_similarity(userLDA_sim_file, norm=1) if recommend_job == 'mix_sim': recommender.recommend(dataset.train_data, users_tag_dict, items_tag_dict, user_k, top_n, reorder=0) elif recommend_job == 'mix_sim_reorder': recommender.recommend(dataset.train_data, users_tag_dict, items_tag_dict, user_k, top_n, reorder=1) elif recommend_job == 'mix_result': recommender.hybird_recommend_result(dataset.train_data, user_k, top_n) elif recommend_job == 'mix_result_reorder': recommender.hybird_result_withReorder(dataset.train_data, users_tag_dict, items_tag_dict, user_k, top_n) logging.info("Train_prob:%s User_k:%s Top_n:%s cost:%s" % (train_prob, user_k, top_n, recommender.cost_time))
def main(): args = sys.argv set_level = args[1] train_prob = args[2] topic_num = int(args[3]) top_n = int(args[4]) recommend_job = args[5] #Log config log_file = './log/hybirdModel_%s_%s_%s_%s_%s.log'%(set_level,train_prob,topic_num,recommend_job,top_n) logging.basicConfig(level=logging.INFO,format='%(asctime)s %(levelname)s %(funcName)s %(lineno)d %(message)s',filename=log_file,filemode='w') #Filepath config file_template = './song_dataset/user_dataset_%s_%s_%s' #set_level, type, train_prob user_sim_file = './song_dataset/mid_data/user_sim_%s_%s.json'%(set_level,train_prob) userTag_sim_file = './song_dataset/mid_data/user_similarity_withTag_%s_%s.json'%(set_level,train_prob) userLDA_sim_file = './song_dataset/mid_data/user_sim_with_lda_%s_%s_%s.json'%(set_level,train_prob,topic_num) train_file = file_template%(set_level,'train',train_prob) test_file = file_template%(set_level,'test', train_prob) #Build dataset dataset = BaseDataSet() dataset.build_data(train_file,test_file) logging.info("Build dataset cost:%s"%(dataset.cost_time)) print "DataForTrain: %s"%(train_file) print "DataForTest: %s"%(test_file) print "Dataset train_set info: %s"%(dataset.get_train_info()) print "Dataset test_set info: %s"%(dataset.get_test_info()) #Record best scores best_f_score = {'f_score':0} best_precision = {'precision':0} best_recall = {'recall':0} #Data Preparation items_tag_dict = {} users_tag_dict = {} if recommend_job in ('mix_result_reorder','mix_sim_reorder'): items_tag_dict = load_tag_distribution('./song_dataset/mid_data/song_tag_distribution.json') #Load item_tag_distrib user_tag_file = './song_dataset/mid_data/user_tag_distribution_%s_%s.json'%(set_level,train_prob) users_tag_dict = load_tag_distribution(user_tag_file) #Initiate Hybird-Model recommender = HybirdModel() if recommend_job in ('mix_sim','mix_sim_reorder'): recommender.hybird_user_sim(dataset.train_data,userTag_sim_file,userLDA_sim_file,theta=0.45) elif recommend_job in ('mix_result','mix_result_reorder'): recommender.userTag.load_user_similarity(userTag_sim_file,norm=1) recommender.userLda.load_user_similarity(userLDA_sim_file,norm=1) for user_k in [5]+range(10,101,10): if recommend_job == 'mix_sim': recommender.recommend(dataset.train_data,users_tag_dict,items_tag_dict,user_k,top_n,reorder=0) elif recommend_job == 'mix_sim_reorder': recommender.recommend(dataset.train_data,users_tag_dict,items_tag_dict,user_k,top_n,reorder=1) elif recommend_job == 'mix_result': recommender.hybird_recommend_result(dataset.train_data,user_k,top_n) elif recommend_job == 'mix_result_reorder': recommender.hybird_result_withReorder(dataset.train_data,users_tag_dict,items_tag_dict,user_k,top_n) logging.info("Train_prob:%s User_k:%s Top_n:%s cost:%s"%(train_prob,user_k,top_n,recommender.cost_time)) scores = recommender.score(dataset.test_data,len(dataset.all_songs)) print "User_k:%s\tTop_n:%s\tScores:%s"%(user_k,top_n,scores) #Find Best Score if scores['f_score'] > best_f_score['f_score']: best_f_score = scores best_f_score['user_k'] = user_k best_f_score['top_n'] = top_n if scores['precision'] > best_precision['precision']: best_precision = scores best_precision['user_k']=user_k best_precision['top_n'] = top_n if scores['recall'] > best_recall['recall']: best_recall = scores best_recall['user_k']=user_k best_recall['top_n'] = top_n print "Best_F_Score: %s"%(best_f_score) print "Best_Precision: %s"%(best_precision) print "Best_Recall: %s"%(best_recall)
def main(): args = sys.argv set_level = args[1] train_prob = args[2] top_n = int(args[3]) e_type = args[4] #Experiment type: song or playlist #Filepath config file_template = './song_dataset/user_dataset_%s_%s_%s' #set_num,type,train_prob item_sim_file = './song_dataset/mid_data/item_similarity_%s_%s.json'%(set_level,train_prob) if e_type == 'playlist': file_template = './pl_dataset/user_playlist_%s_%s_%s' #set_num,type,train_prob item_sim_file = './pl_dataset/mid_data/item_similarity_%s_%s.json'%(set_level,train_prob) train_file = file_template%(set_level,'train',train_prob) test_file = file_template%(set_level,'test',train_prob) #Build dataset dataset = BaseDataSet() dataset.build_data(train_file,test_file) logging.info("Build dataset cost:%s"%(dataset.cost_time)) print "DataForTrain: %s"%(train_file) print "DataForTest: %s"%(test_file) print "Dataset train_set info: %s"%(dataset.get_train_info()) print "Dataset test_set info: %s"%(dataset.get_test_info()) #Record best scores best_f_score = {'f_score':0} best_precision = {'precision':0} best_recall = {'recall':0} #Initiate recommender itemCF_recommender = ItemCF() if os.path.exists(item_sim_file): logging.info("File %s exists, loading item similarity matrix"%(item_sim_file)) itemCF_recommender.load_item_similarity(item_sim_file) logging.info("Load item_similarity cost: %s"%(itemCF_recommender.cost_time)) else: logging.info("File %s doesn't exist, building item similarity matrix"%(item_sim_file)) itemCF_recommender.build_item_similarity(dataset.train_data,item_sim_file) logging.info("Load item_similarity cost: %s"%(itemCF_recommender.cost_time)) #Recommendation for item_k in range(20,100): itemCF_recommender.recommend(dataset.train_data,item_k=item_k,top_n=top_n) logging.info("Train_prob:%s Item_k:%s Top_n:%s Cost:%s"%(train_prob,item_k,top_n,itemCF_recommender.cost_time)) scores = itemCF_recommender.score(dataset.test_data) print "Item_k:%s\tTop_n:%s\tScores:%s"%(item_k,top_n,scores) #Find Best Score if scores['f_score'] > best_f_score['f_score']: best_f_score = scores best_f_score['item_k'] = item_k best_f_score['top_n'] = top_n if scores['precision'] > best_precision['precision']: best_precision = scores best_precision['item_k']=item_k best_precision['top_n'] = top_n if scores['recall'] > best_recall['recall']: best_recall = scores best_recall['item_k']=item_k best_recall['top_n'] = top_n print "Best_F_Score: %s"%(best_f_score) print "Best_Precision: %s"%(best_precision) print "Best_Recall: %s"%(best_recall)
def main(): args = sys.argv set_level = args[1] train_prob = args[2] topic_num = int(args[3]) top_n = int(args[4]) recommend_job = args[5] #Log config log_file = './log/hybirdModel_%s_%s_%s_%s_%s.log' % ( set_level, train_prob, topic_num, recommend_job, top_n) logging.basicConfig( level=logging.INFO, format='%(asctime)s %(levelname)s %(funcName)s %(lineno)d %(message)s', filename=log_file, filemode='w') #Filepath config file_template = './song_dataset/user_dataset_%s_%s_%s' #set_level, type, train_prob user_sim_file = './song_dataset/mid_data/user_sim_%s_%s.json' % ( set_level, train_prob) userTag_sim_file = './song_dataset/mid_data/user_similarity_withTag_%s_%s.json' % ( set_level, train_prob) userLDA_sim_file = './song_dataset/mid_data/user_sim_with_lda_%s_%s_%s.json' % ( set_level, train_prob, topic_num) train_file = file_template % (set_level, 'train', train_prob) test_file = file_template % (set_level, 'test', train_prob) #Build dataset dataset = BaseDataSet() dataset.build_data(train_file, test_file) logging.info("Build dataset cost:%s" % (dataset.cost_time)) print "DataForTrain: %s" % (train_file) print "DataForTest: %s" % (test_file) print "Dataset train_set info: %s" % (dataset.get_train_info()) print "Dataset test_set info: %s" % (dataset.get_test_info()) #Record best scores best_f_score = {'f_score': 0} best_precision = {'precision': 0} best_recall = {'recall': 0} #Data Preparation items_tag_dict = {} users_tag_dict = {} if recommend_job in ('mix_result_reorder', 'mix_sim_reorder'): items_tag_dict = load_tag_distribution( './song_dataset/mid_data/song_tag_distribution.json' ) #Load item_tag_distrib user_tag_file = './song_dataset/mid_data/user_tag_distribution_%s_%s.json' % ( set_level, train_prob) users_tag_dict = load_tag_distribution(user_tag_file) #Initiate Hybird-Model recommender = HybirdModel() if recommend_job in ('mix_sim', 'mix_sim_reorder'): recommender.hybird_user_sim(dataset.train_data, userTag_sim_file, userLDA_sim_file, theta=0.45) elif recommend_job in ('mix_result', 'mix_result_reorder'): recommender.userTag.load_user_similarity(userTag_sim_file, norm=1) recommender.userLda.load_user_similarity(userLDA_sim_file, norm=1) for user_k in [5] + range(10, 101, 10): if recommend_job == 'mix_sim': recommender.recommend(dataset.train_data, users_tag_dict, items_tag_dict, user_k, top_n, reorder=0) elif recommend_job == 'mix_sim_reorder': recommender.recommend(dataset.train_data, users_tag_dict, items_tag_dict, user_k, top_n, reorder=1) elif recommend_job == 'mix_result': recommender.hybird_recommend_result(dataset.train_data, user_k, top_n) elif recommend_job == 'mix_result_reorder': recommender.hybird_result_withReorder(dataset.train_data, users_tag_dict, items_tag_dict, user_k, top_n) logging.info("Train_prob:%s User_k:%s Top_n:%s cost:%s" % (train_prob, user_k, top_n, recommender.cost_time)) scores = recommender.score(dataset.test_data, len(dataset.all_songs)) print "User_k:%s\tTop_n:%s\tScores:%s" % (user_k, top_n, scores) #Find Best Score if scores['f_score'] > best_f_score['f_score']: best_f_score = scores best_f_score['user_k'] = user_k best_f_score['top_n'] = top_n if scores['precision'] > best_precision['precision']: best_precision = scores best_precision['user_k'] = user_k best_precision['top_n'] = top_n if scores['recall'] > best_recall['recall']: best_recall = scores best_recall['user_k'] = user_k best_recall['top_n'] = top_n print "Best_F_Score: %s" % (best_f_score) print "Best_Precision: %s" % (best_precision) print "Best_Recall: %s" % (best_recall)