예제 #1
0
    def save_singer_all_hot_comments_to_file(self):
        '''
        保存歌手的全部热门评论到磁盘
        :param singer_name: 歌手名字
        :param singer_id:歌手 id
        '''
        save_path = self.singer_all_hot_comments_file_path
        Helper.check_file_exits_and_overwrite(save_path)
        song_ids = Helper.get_singer_hot_songs_ids(
            self.singer_url)  # 歌手全部歌曲id list
        if len(song_ids) == 0:
            self.logger.error(
                "crawl from %s to get %s all hot songs ids failed!" %
                (self.singer_url, self.singer_name))
            return
        # first line is headers
        all_hot_comments_list = []
        for song_id in song_ids:
            url = "http://music.163.com/weapi/v1/resource/comments/R_SO_4_{song_id}/?csrf_token=".format(
                song_id=song_id)
            hot_comments_list = self.get_hot_comments(url)
            all_hot_comments_list.extend(hot_comments_list)
        all_hot_comments_json_str_list = [
            self.extract_comment_info_as_json_str(comment)
            for comment in all_hot_comments_list
        ]
        Helper.save_lines_to_file(all_hot_comments_json_str_list, save_path)

        self.logger.info(
            "Write {singer_name}'s {num} hot songs hot comments successfully!".
            format(singer_name=self.singer_name, num=len(song_ids)))
예제 #2
0
    def download_singer_hot_songs_by_name_with_multi_threading(
            self, singer_name, threads=20):
        '''
		通过输入歌手名字来下载歌手的全部热门歌曲,多线程实现
		:param singer_name: 歌手名字
		:param threads: 线程数
		:return:
		'''
        start_time = time.time()
        # 热门歌曲保存地址
        save_path = os.path.join(Constants.SINGER_SAVE_DIR, singer_name,
                                 Constants.HOT_SONGS_SAVE_NAME)
        # 根据名字得到歌手id
        uid = self.get_singer_id_by_name(singer_name)
        # 歌手主页地址
        singer_url = "http://music.163.com/artist?id=%d" % uid
        # 歌手全部热门歌曲id list
        hot_songs_ids = Helper.get_singer_hot_songs_ids(singer_url)
        # 通过歌曲id得到下载url
        urls_list = self.get_download_urls_by_ids(hot_songs_ids)
        # 通过歌曲id获得歌曲名
        songs_name_and_singer_name_str_list = self.get_songs_name_and_singer_name_str_list_by_ids_list(
            hot_songs_ids)
        # 全部热门歌曲数
        total = len(urls_list)
        Helper.mkdir(save_path)
        self.logger.info("%s has total %d hot songs!" % (singer_name, total))
        self.logger.info(
            "(multi threads,thread_num = %d)Now start download hot musics of %s(save path is:%s):"
            % (threads, singer_name, save_path))
        # 计数器初始化为
        self.no_counter = 0
        threads_list = []
        pack = total // threads
        for i in range(threads):
            begin_index = i * pack
            if i < threads - 1:
                end_index = (i + 1) * pack
            else:
                end_index = total
            urls = urls_list[begin_index:end_index]
            save_list = [
                os.path.join(save_path, "%s.mp3" % name) for name in
                songs_name_and_singer_name_str_list[begin_index:end_index]
            ]
            t = Thread(target=self._download_list_songs_to_file,
                       args=(urls, save_list, total))
            threads_list.append(t)
        for thread in threads_list:
            thread.start()
        for thread in threads_list:
            thread.join()
        end_time = time.time()
        self.logger.info("Download %s's %d hot songs to %s succeed!"
                         "Costs %.2f seconds!" %
                         (singer_name, total, save_path,
                          (end_time - start_time)))
예제 #3
0
    def download_singer_hot_songs_by_name(self, singer_name):
        '''
		通过输入歌手名字来下载歌手的全部热门歌曲,单线程实现
		:param singer_name: 歌手名字
		:return:
		'''
        start_time = time.time()
        # 热门歌曲保存地址
        save_path = os.path.join(Constants.SINGER_SAVE_DIR, singer_name,
                                 Constants.HOT_SONGS_SAVE_NAME)
        # 根据名字得到歌手id
        uid = self.get_singer_id_by_name(singer_name)
        # 歌手主页地址
        singer_url = "http://music.163.com/artist?id=%d" % uid
        # 歌手全部热门歌曲id list
        hot_songs_ids = Helper.get_singer_hot_songs_ids(singer_url)
        # 通过歌曲id得到下载url
        urls_list = self.get_download_urls_by_ids(hot_songs_ids)
        # 通过歌曲id获得歌曲名
        songs_name_and_singer_name_str_list = self.get_songs_name_and_singer_name_str_list_by_ids_list(
            hot_songs_ids)
        # 全部热门歌曲数
        total = len(urls_list)
        Helper.mkdir(save_path)
        self.logger.info("%s has total %d hot songs!" % (singer_name, total))
        self.logger.info(
            "(single thread)Now start download hot musics of %s(save path is:%s):"
            % (singer_name, save_path))
        for index, url in enumerate(urls_list, 1):
            try:
                # 下载
                Helper.download_network_resource(
                    url,
                    os.path.join(
                        save_path, "%s.mp3" %
                        songs_name_and_singer_name_str_list[index - 1]))
                self.logger.info(
                    "Successfully download %d/%d(%s)!" %
                    (index, total,
                     songs_name_and_singer_name_str_list[index - 1]))
            except Exception:
                self.logger.info(
                    "Fail download %d/%d(%s)!" %
                    (index, total,
                     songs_name_and_singer_name_str_list[index - 1]))
                continue
        end_time = time.time()
        self.logger.info(
            "It costs %.2f seconds to download singer %s's %d hot songs to %s "
            "using single thread!" %
            ((end_time - start_time), singer_name, total, save_path))
예제 #4
0
 def test_get_singer_hot_songs_ids(self):
     self.logger.info(Helper.get_singer_hot_songs_ids(self.singer_url))
예제 #5
0
 def test_get_songs_name_list_by_ids_list(self):
     singer_url = "http://music.163.com/artist?id=7214"
     ids_list = Helper.get_singer_hot_songs_ids(singer_url)
     self.logger.info(
         self.netcloud_login.get_songs_name_list_by_ids_list(ids_list))
예제 #6
0
 def test_get_download_urls_by_ids(self):
     singer_url = "http://music.163.com/artist?id=9621"
     ids_list = Helper.get_singer_hot_songs_ids(singer_url)
     self.logger.info(
         self.netcloud_login.get_download_urls_by_ids(ids_list))