def req_to_dict(raw_req_data):
     """
     :param raw_req_data:_type['req_data'] 它只是5种请求数据的一种
     :return:将anyproxy获取的req文件内容解析成为request参数所需要的字典
     """
     req_data = {}
     url_lsit = raw_req_data['url'].split('?')
     url = url_lsit[0] + '?'
     req_data['url'] = url
     req_data['method'] = raw_req_data['requestOptions']['method']
     req_data['headers'] = raw_req_data['requestOptions']['headers']
     body_str = raw_req_data['requestData']
     body_dict = str_to_dict(body_str, "&", "=")
     url_param_str = url_lsit[1]
     url_param_dict = str_to_dict(url_param_str, "&", "=")
     req_data['body_dict'] = body_dict
     req_data['url_param_dict'] = url_param_dict
     # 添加一个测试字段
     req_data['url_param_str'] = url_param_str
     return req_data
 def process_request(self, request, spider):
     current_req_data = self.req_data_list[self.counter % self.wx_num]
     req_data = TidyReqData.req_to_dict(
         current_req_data['getappmsgext']['req_data'])
     content_url = request._get_url()
     content_url_param_dict = str_to_dict(
         content_url.split('?')[-1], '&', '=')
     body_dict = req_data['body_dict']
     body_dict.update(content_url_param_dict)
     body_dict['comment_id'] = request.get_ext_data['comment_id']
     body_dict['is_need_reward'] = 1
     url = req_data['url'] + req_data['url_param_str']
     request._set_url(url)
     request.set_method(req_data['method'])
     request.set_headers(req_data['headers'])
     body_str = dict_to_str(body_dict)
     request._set_body(body_str)
     self.counter += 1
     return None
    def prepare_req_data(self, current_req_data, request, _type):
        """
        :param current_req_data: 本轮请求需要使用的请求参数
        :param request: Request对象
        :return: 准备爬取阅读数据的请求参数
        """
        request_data = {}

        if _type in ['getappmsgext', 'appmsg_comment']:
            req_data = TidyReqData.req_to_dict(
                current_req_data[_type]['req_data'])
        else:
            return request_data

        #根据原始文章的url构建body参数
        content_url = request._get_url()
        content_url_param_dict = str_to_dict(
            content_url.split('?')[-1], '&', '=')
        body_dict = copy(req_data['body_dict'])
        from tools.utils import update_dict_by_dict
        update_dict_by_dict(body_dict, content_url_param_dict,
                            ['mid', 'sn', 'idx', 'scene'])
        body_dict['comment_id'] = request.meta['comment_id']
        body_dict['is_need_reward'] = 1
        # 如果请求的是评论内容
        if "comment_id" in req_data['url_param_dict']:
            url_param_dict = copy(req_data['url_param_dict'])
            url_param_dict['comment_id'] = request.meta['comment_id']
            url_param_dict['idx'] = content_url_param_dict['idx']
            from tools.utils import dict_to_str
            url_param_str = dict_to_str(url_param_dict)
            request_data['url_str'] = req_data['url'] + url_param_str
        # 如果请求的是阅读量
        else:
            request_data[
                'url_str'] = req_data['url'] + req_data['url_param_str']
        request_data['header_dict'] = req_data['headers']
        request_data['body_dict'] = body_dict

        return request_data
Exemple #4
0
def on_phone_crawler_add(data):
    data = str_to_dict(data, '&', '=')
    gc.add_crawler(data)
    report_data = gc.report_crawler()
    socketio.emit('phone_crawler_data', report_data)
Exemple #5
0
def on_gzhs_todolist_add(data):
    data = str_to_dict(data, '&', '=')
    gc.add_gzh(data)
    report_data = gc.report_gzh_doing()
    socketio.emit('gzhs_todolist_data', report_data)