def save_singer_all_hot_comments_to_file(self): ''' 保存歌手的全部热门评论到磁盘 :param singer_name: 歌手名字 :param singer_id:歌手 id ''' save_path = self.singer_all_hot_comments_file_path Helper.check_file_exits_and_overwrite(save_path) song_ids = Helper.get_singer_hot_songs_ids( self.singer_url) # 歌手全部歌曲id list if len(song_ids) == 0: self.logger.error( "crawl from %s to get %s all hot songs ids failed!" % (self.singer_url, self.singer_name)) return # first line is headers all_hot_comments_list = [] for song_id in song_ids: url = "http://music.163.com/weapi/v1/resource/comments/R_SO_4_{song_id}/?csrf_token=".format( song_id=song_id) hot_comments_list = self.get_hot_comments(url) all_hot_comments_list.extend(hot_comments_list) all_hot_comments_json_str_list = [ self.extract_comment_info_as_json_str(comment) for comment in all_hot_comments_list ] Helper.save_lines_to_file(all_hot_comments_json_str_list, save_path) self.logger.info( "Write {singer_name}'s {num} hot songs hot comments successfully!". format(singer_name=self.singer_name, num=len(song_ids)))
def download_singer_hot_songs_by_name_with_multi_threading( self, singer_name, threads=20): ''' 通过输入歌手名字来下载歌手的全部热门歌曲,多线程实现 :param singer_name: 歌手名字 :param threads: 线程数 :return: ''' start_time = time.time() # 热门歌曲保存地址 save_path = os.path.join(Constants.SINGER_SAVE_DIR, singer_name, Constants.HOT_SONGS_SAVE_NAME) # 根据名字得到歌手id uid = self.get_singer_id_by_name(singer_name) # 歌手主页地址 singer_url = "http://music.163.com/artist?id=%d" % uid # 歌手全部热门歌曲id list hot_songs_ids = Helper.get_singer_hot_songs_ids(singer_url) # 通过歌曲id得到下载url urls_list = self.get_download_urls_by_ids(hot_songs_ids) # 通过歌曲id获得歌曲名 songs_name_and_singer_name_str_list = self.get_songs_name_and_singer_name_str_list_by_ids_list( hot_songs_ids) # 全部热门歌曲数 total = len(urls_list) Helper.mkdir(save_path) self.logger.info("%s has total %d hot songs!" % (singer_name, total)) self.logger.info( "(multi threads,thread_num = %d)Now start download hot musics of %s(save path is:%s):" % (threads, singer_name, save_path)) # 计数器初始化为 self.no_counter = 0 threads_list = [] pack = total // threads for i in range(threads): begin_index = i * pack if i < threads - 1: end_index = (i + 1) * pack else: end_index = total urls = urls_list[begin_index:end_index] save_list = [ os.path.join(save_path, "%s.mp3" % name) for name in songs_name_and_singer_name_str_list[begin_index:end_index] ] t = Thread(target=self._download_list_songs_to_file, args=(urls, save_list, total)) threads_list.append(t) for thread in threads_list: thread.start() for thread in threads_list: thread.join() end_time = time.time() self.logger.info("Download %s's %d hot songs to %s succeed!" "Costs %.2f seconds!" % (singer_name, total, save_path, (end_time - start_time)))
def download_singer_hot_songs_by_name(self, singer_name): ''' 通过输入歌手名字来下载歌手的全部热门歌曲,单线程实现 :param singer_name: 歌手名字 :return: ''' start_time = time.time() # 热门歌曲保存地址 save_path = os.path.join(Constants.SINGER_SAVE_DIR, singer_name, Constants.HOT_SONGS_SAVE_NAME) # 根据名字得到歌手id uid = self.get_singer_id_by_name(singer_name) # 歌手主页地址 singer_url = "http://music.163.com/artist?id=%d" % uid # 歌手全部热门歌曲id list hot_songs_ids = Helper.get_singer_hot_songs_ids(singer_url) # 通过歌曲id得到下载url urls_list = self.get_download_urls_by_ids(hot_songs_ids) # 通过歌曲id获得歌曲名 songs_name_and_singer_name_str_list = self.get_songs_name_and_singer_name_str_list_by_ids_list( hot_songs_ids) # 全部热门歌曲数 total = len(urls_list) Helper.mkdir(save_path) self.logger.info("%s has total %d hot songs!" % (singer_name, total)) self.logger.info( "(single thread)Now start download hot musics of %s(save path is:%s):" % (singer_name, save_path)) for index, url in enumerate(urls_list, 1): try: # 下载 Helper.download_network_resource( url, os.path.join( save_path, "%s.mp3" % songs_name_and_singer_name_str_list[index - 1])) self.logger.info( "Successfully download %d/%d(%s)!" % (index, total, songs_name_and_singer_name_str_list[index - 1])) except Exception: self.logger.info( "Fail download %d/%d(%s)!" % (index, total, songs_name_and_singer_name_str_list[index - 1])) continue end_time = time.time() self.logger.info( "It costs %.2f seconds to download singer %s's %d hot songs to %s " "using single thread!" % ((end_time - start_time), singer_name, total, save_path))
def test_get_singer_hot_songs_ids(self): self.logger.info(Helper.get_singer_hot_songs_ids(self.singer_url))
def test_get_songs_name_list_by_ids_list(self): singer_url = "http://music.163.com/artist?id=7214" ids_list = Helper.get_singer_hot_songs_ids(singer_url) self.logger.info( self.netcloud_login.get_songs_name_list_by_ids_list(ids_list))
def test_get_download_urls_by_ids(self): singer_url = "http://music.163.com/artist?id=9621" ids_list = Helper.get_singer_hot_songs_ids(singer_url) self.logger.info( self.netcloud_login.get_download_urls_by_ids(ids_list))