Example #1
0
    def save_singer_all_hot_comments_to_file(self):
        '''
        保存歌手的全部热门评论到磁盘
        :param singer_name: 歌手名字
        :param singer_id:歌手 id
        '''
        save_path = self.singer_all_hot_comments_file_path
        Helper.check_file_exits_and_overwrite(save_path)
        song_ids = Helper.get_singer_hot_songs_ids(
            self.singer_url)  # 歌手全部歌曲id list
        if len(song_ids) == 0:
            self.logger.error(
                "crawl from %s to get %s all hot songs ids failed!" %
                (self.singer_url, self.singer_name))
            return
        # first line is headers
        all_hot_comments_list = []
        for song_id in song_ids:
            url = "http://music.163.com/weapi/v1/resource/comments/R_SO_4_{song_id}/?csrf_token=".format(
                song_id=song_id)
            hot_comments_list = self.get_hot_comments(url)
            all_hot_comments_list.extend(hot_comments_list)
        all_hot_comments_json_str_list = [
            self.extract_comment_info_as_json_str(comment)
            for comment in all_hot_comments_list
        ]
        Helper.save_lines_to_file(all_hot_comments_json_str_list, save_path)

        self.logger.info(
            "Write {singer_name}'s {num} hot songs hot comments successfully!".
            format(singer_name=self.singer_name, num=len(song_ids)))
Example #2
0
 def save_all_users_info_to_file(self):
     '''
     保存一首歌曲下全部用户信息到磁盘
     :return:
     '''
     Helper.check_file_exits_and_overwrite(self.users_info_file_path)
     users_info_list = self.get_users_info_list()
     Helper.save_lines_to_file(users_info_list,self.users_info_file_path)
Example #3
0
 def save_users_info(self,users_url,total_urls_num):
     '''
     保存用户信息到磁盘,该函数会被save_users_info_to_file_by_multi_threading 多线程函数调用
     :param users_url: 待处理的用户url list
     :param total:全部用户url数量
     :param total_urls_num:全部url数量
     '''
     # 追加写入
     users_info_list = self.get_users_info_list(users_url,total_urls_num)
     # 写入文件需要加锁
     if self.lock.acquire():
         Helper.save_lines_to_file(users_info_list,self.users_info_file_path,"a")
         self.lock.release()
Example #4
0
 def save_all_comments_to_file(self):
     '''
     顺序保存全部评论到磁盘
     :return:
     '''
     Helper.check_file_exits_and_overwrite(self.comments_file_path)
     start_time = time.time()
     all_comments_list = self.get_all_comments()
     # comment dict to json str
     all_comments_json_str_list = [
         self.extract_comment_info_as_json_str(comment)
         for comment in all_comments_list
     ]
     Helper.save_lines_to_file(all_comments_json_str_list,
                               self.comments_file_path)
     end_time = time.time()
     print("It costs %.2f seconds to crawler <%s>." %
           (end_time - start_time, self.song_name))
Example #5
0
 def save_pages_comments(self, begin_page, end_page, total_comments_num):
     '''
     保存从begin_page 到 end_page的评论(called by multi threading)
     :param begin_page: 开始页数
     :param end_page: 结束页数
     :param total_comments_num:全部评论数
     '''
     comments_info_list = []  # 保存全部评论的list,每条评论以json 字符串形式表示
     for i in range(begin_page, end_page):
         json_dict = self.get_page_comments_format_dict(
             self.comments_url, i + 1)
         try:
             for item in json_dict[Constants.COMMENTS_KEY]:
                 json_str = self.extract_comment_info_as_json_str(item)
                 # 更新计数器,需要加锁
                 if self.lock.acquire():
                     self.no_counter += 1
                     self.logger.info("get %d/%d music comment succeed!" %
                                      (self.no_counter, total_comments_num))
                     self.lock.release()
                 comments_info_list.append(json_str)
         except KeyError as key_error:
             self.logger.error("Fail to get page {page}.".format(page=i +
                                                                 1))
             self.logger.error(
                 "Server parse error:{error}".format(error=key_error))
         except Exception as e:
             self.logger.error("Fail to get page {page}.".format(page=i +
                                                                 1))
             self.logger.error(e)
         else:
             self.logger.info(
                 "Successfully to save page {page}.".format(page=i + 1))
     # 追加,加锁写入
     if self.lock.acquire():
         Helper.save_lines_to_file(comments_info_list,
                                   self.comments_file_path, "a")
         self.lock.release()
     self.logger.info(
         "Write page {begin_page} to {end_page} successfully!".format(
             begin_page=begin_page, end_page=end_page))