def __init__(self, threadMaxNums=1):
        self.THREAD_MAX = threadMaxNums
        self.currentAvailThreads = self.THREAD_MAX
        self.availThreadCondi = threading.Condition()
        self.databaseWriteInCondi = threading.Condition()
        self.lock_availableThreads = threading.Lock()
        self.dataSpace = ThreadSafeData()

        #initialize the user seed list
        self.dataSpace.userSeedsList.put(340056317)
        self.dataSpace.userSeedsList.put(350714427)
        self.dataSpace.userSeedsList.put(20888663)
        self.dataSpace.userSeedsList.put(588707084)
        db_temp = db_cls("pj_data.db")
        user_tables = db_temp.read_Data("Select * from Follow")
        for i in range(3):
            self.dataSpace.userSeedsList.put(
                list(random.choice(user_tables))[1])
        db_temp.close_db()

        candidate_songs_file = open("./candidate_songs.txt")
        lines = candidate_songs_file.readlines()
        for line in lines:
            seed = int(line.strip())
            # debug_print_thread(seed, True)
            self.dataSpace.songSeedsList.put(seed)

        self.__first_db_initialize_flag = False
Ejemplo n.º 2
0
def main():
    #从数据库中读取所需信息,并做格式化预处理
    db = db_cls(db_filename="pj_data.db")
    user_list = db.read_Data(sql_query="Select id FROM User_Table")
    user_list = [ite[0] for ite in user_list]
    #item_list_dict = dict(db.read_Data(sql_query="Select id,song_name FROM Song"))
    #item_list = list(item_list_dict)
    item_list1 = db.read_Data(sql_query="Select song_id FROM User2Song")
    item_list2 = db.read_Data(sql_query="Select song_id FROM SongList2Song")
    item_list = list(set(item_list1 + item_list2))
    item_list = [ite[0] for ite in item_list]
    user_list = sorted(user_list)
    item_list = sorted(item_list)
    item_list_dict = dict(
        db.read_Data(sql_query="Select id,song_name FROM Song"))
    for item in item_list:
        item_list_dict[item] = item_list_dict.get(item, "歌名未知")
    #创建“最近听歌”字典,key为user_id,value为该用户最近听歌的歌曲id组成的列表
    recent_listen = db.read_Data(sql_query="Select * FROM User2Song")
    recent_listen_dict = dict()
    for it in recent_listen:
        recent_listen_dict[it[0]] = recent_listen_dict.get(it[0], []) + [it[1]]
    for it in user_list:
        recent_listen_dict[it] = recent_listen_dict.get(it, [])
    #创建“歌单音乐”字典,key为user_id,value为该用户收藏歌单中歌曲id组成的列表
    list_music = db.read_Data(sql_query="Select a.user_id,b.song_id \
                                         FROM User2SongList a INNER JOIN SongList2Song b\
                                         ON a.songlist_id = b.songlist_id")
    list_music_dict = dict()
    for it in list_music:
        list_music_dict[it[0]] = list_music_dict.get(it[0], []) + [it[1]]
    for it in user_list:
        list_music_dict[it] = list_music_dict.get(it, [])

    #获取用户-音乐评价矩阵
    user_item_matrix = cf.get_matrix(user_list, item_list, recent_listen_dict,
                                     list_music_dict)

    #获取用户相似度矩阵
    user_similarity_matrix = cf.get_similarity(user_item_matrix)

    #获取基于用户相似性得到的推荐矩阵
    Recommender = cf.get_recommender(user_item_matrix, user_similarity_matrix,
                                     5)

    #为id为393718733的用户推荐5首歌曲
    #cf.recommend(393718733, user_list, item_list_dict, Recommender, 20)
    cf.recommend(2141581764, user_list, item_list_dict, Recommender, 10)
    def create_db(self, db_filename="pj_data.db"):
        self.db = db_cls(db_filename=db_filename)
        global visited_song_list
        global visited_user_list

        vsongs = self.db.read_Data("Select * from Song")
        for eachSong in vsongs:
            id = list(eachSong)[0]
            if id not in visited_song_list:
                visited_song_list.append(id)

        vusers = self.db.read_Data("Select * from User_Table")
        for eachUser in vusers:
            id = list(eachUser)[0]
            if id not in visited_user_list:
                visited_user_list.append(list(eachUser)[0])
Ejemplo n.º 4
0
def user_song_relation(db_filename):

    # 载入数据库
    db = db_cls(db_filename)
    # 第一次使用可创建索引,加快数据库查询速度
    # db.create_index()

    # 分别载入歌曲与用户的数据
    song_pos2id, song_id2pos, song_num = song2user_load_songs(db)
    user_pos2id, user_id2pos, user_num = song2user_load_users(db)

    # 构建歌曲-用户关系邻接表
    R_song = [[] for i in range(song_num)]
    R_user = [[] for i in range(user_num)]
    song_user_relation = db.read_Data(sql_query="Select * from User2Song")
    for user_id, song_id in song_user_relation:
        user_pos = user_id2pos[user_id]
        song_pos = song_id2pos[song_id]
        R_song[song_pos].append(user_pos)
        R_user[user_pos].append(song_pos)

    db.close_db()
    return song_pos2id, song_id2pos, song_num, user_pos2id, user_id2pos, user_num, R_song, R_user
Ejemplo n.º 5
0
    start_time = time.time()
    [song_infos_list, song2singer_list] = ScrapeAndSaveSong(seedUrl, maxNum)

    # db.write_Song_infos(song_infos_list, song2singer_list)
    global_song_infos_list.extend(song_infos_list)
    global_song2singer_list.extend(song2singer_list)
    end_time = time.time()
    print('process finished with execution time ', end_time - start_time)
    global global_count
    global_count += 1
    # db.create_index()


if __name__ == "__main__":

    db = db_cls(db_filename="pj_data.db")

    thread1 = threading.Thread(
        target=Thread_Scrapy,
        args=('https://music.163.com/#/song?id=31877628', 25))
    thread2 = threading.Thread(target=Thread_Scrapy,
                               args=('https://music.163.com/#/song?id=186453',
                                     25))
    thread3 = threading.Thread(
        target=Thread_Scrapy,
        args=('https://music.163.com/#/song?id=399410693', 25))
    thread4 = threading.Thread(target=Thread_Scrapy,
                               args=('https://music.163.com/#/song?id=1989355',
                                     25))
    thread1.start()
    thread2.start()
c = 0.8

# 迭代次数
iter_num = 20

# 作为样本的歌曲,用于观察迭代后得到的相似歌曲结果
sample_songs = [
    29947420, 461347998, 31356499, 1313354324, 436514312, 483671599
]  # 用于我们自己爬的数据
#sample_songs = [139764, 144619, 408055928, 180280]    # 用于老师给的数据

if __name__ == "__main__":
    # 从数据库中载入数据
    song_pos2id, song_id2pos, song_num, user_pos2id, user_id2pos, user_num, R_song, R_user = user_song_relation(
        db_filename)
    db = db_cls(db_filename)

    # 分别构建歌曲及用户相似度矩阵,初始为单位阵
    S_song = np.eye(song_num)
    S_user = np.eye(user_num)

    # Simrank 迭代
    for iter in range(iter_num):
        print("Iteration: " + str(iter + 1))

        # 遍历歌曲相似矩阵,通过用户相似度来计算歌曲相似度
        for x in tqdm(range(song_num), unit='rows'):
            for y in range(song_num):
                if x < y:
                    S_val = 0
                    # 分别连接歌曲x和y的用户列表