예제 #1
0
    def craw(self, request=aispider.request):

        for key, value in url_parse_dict.iteritems():
            if value.get('status') == 'active':
                # 网站名
                website_name = key
                # 网站url
                website_urls = value.get('url')
                # 请求方法
                method = value.get('request_method')
                # 请求需要提交的数据
                post_datas = value.get('submit_data')
                # 解析方法
                parse_func = value.get('parse_func', 'system')
                if parse_func == 'system':
                    parser = self.parse_to_get_ip
                else:
                    parser = parse_func

                # 自定义头
                diy_header = value.get('header')

                for url in website_urls:
                    # 调用format_put_data 构造放入队列中的数据
                    if post_datas:
                        for post_data in post_datas:
                            put_data = format_put_data(
                                args={
                                    "url": url,
                                    "method": method,
                                    'submit_data': post_data,
                                    'diy_header': diy_header
                                },
                                work_func=request,
                                follow_func=self.get_and_check,
                                meta={
                                    'value': value,
                                    'website_name': website_name,
                                    'parser': parser
                                })
                            # 放入队列
                            work_queue.put(put_data)

                    else:
                        put_data = format_put_data(
                            args={
                                "url": url,
                                "method": method,
                                'data': post_datas
                            },
                            work_func=request,
                            follow_func=self.get_and_check,
                            meta={
                                'value': value,
                                'website_name': website_name,
                                'parser': parser
                            })
                        # 放入队列
                        work_queue.put(put_data)
예제 #2
0
 def start_check(self, ips, website_name):
     if ips:
         # 检测
         for _ip in ips:
             for target_url in self.target_urls:
                 url = target_url
                 # 调用format_put_data 构造放入队列中的数据
                 put_data = format_put_data(args={
                     "url": url,
                     'ip': _ip,
                     'time_out': 5
                 },
                                            work_func=valid,
                                            need_save=True,
                                            save_func=self.save_ip,
                                            meta={
                                                'website_name':
                                                website_name,
                                                'target_url': target_url
                                            })
                 # 放入队列
                 work_queue.put(put_data)
     else:
         msg = 'There Are No Available From [{website_name}] Can Be Used To Check, Please Check!!!!!!!'.format(
             website_name=website_name)
         logger.error(msg)
	def craw(self,singer_mids):
		for singer_mid in singer_mids:
			url = root_url_format.format(singermid = singer_mid,begin = begin,num = num)
			# use format_put_data
			put_data = format_put_data(args = {'url':url,'method':'get','submit_data':None},
				work_func = self.downloader.request,
				follow_func = self.get_total_num)
			work_queue.put(put_data)
예제 #4
0
    def craw(self, request=aispider.request):

        for key, value in url_parse_dict.iteritems():
            if value.get('status') == 'active':
                # 网站名
                website_name = key
                # 网站url
                website_urls = value.get('url')
                # 请求方法
                method = value.get('request_method')
                # 请求需要提交的数据
                post_datas = value.get('submit_data')
                # 解析方法
                parse_func = value.get('parse_func', 'system')
                if parse_func == 'system':
                    parser = self.parse_to_get_ip
                else:
                    parser = parse_func

                # 自定义头
                diy_header = value.get('header')

                for url in website_urls:
                    # 调用format_put_data 构造放入队列中的数据
                    if post_datas:
                        for post_data in post_datas:
                            put_data = format_put_data(
                                args={"url": url, "method": method, 'submit_data': post_data, 'diy_header': diy_header},
                                work_func=request,
                                follow_func=self.get_and_check,
                                meta={'value': value, 'website_name': website_name, 'parser': parser})
                            # 放入队列
                            work_queue.put(put_data)

                    else:
                        put_data = format_put_data(args={"url": url, "method": method, 'data': post_datas},
                                                   work_func=request,
                                                   follow_func=self.get_and_check,
                                                   meta={'value': value, 'website_name': website_name,
                                                         'parser': parser})
                        # 放入队列
                        work_queue.put(put_data)
예제 #5
0
 def craw(self, singer_mids, begin=0, num=1):
     for singer_mid in singer_mids:
         url = root_url_format.format(singermid=singer_mid,
                                      begin=begin,
                                      num=num)
         # 调用format_put_data 构造放入队列中的数据
         put_data = format_put_data(args={"url": url},
                                    work_func=self.downloader.request,
                                    follow_func=self.get_total_num)
         # 放入队列
         work_queue.put(put_data)
예제 #6
0
 def start_check(self, ips, website_name):
     if ips:
         # 检测
         for _ip in ips:
             for target_url in self.target_urls:
                 url = target_url
                 # 调用format_put_data 构造放入队列中的数据
                 put_data = format_put_data(args={"url": url, 'ip': _ip, 'time_out': 5}, work_func=valid,
                                            need_save=True,
                                            save_func=self.save_ip,
                                            meta={'website_name': website_name, 'target_url': target_url})
                 # 放入队列
                 work_queue.put(put_data)
     else:
         msg = 'There Are No Available From [{website_name}] Can Be Used To Check, Please Check!!!!!!!'.format(
             website_name=website_name)
         logger.error(msg)
예제 #7
0
파일: test.py 프로젝트: njxshr/codes
    def get_total_num(self, response):
        #  获取请求的内容
        html_content = response.get('content')
        # 调用解析方法
        datas = parser.get_data_by_json(html_content)
        total_num = datas.get('data').get('total', '')

        if total_num:
            singer_mid = datas.get('data').get('singer_mid')
            url = root_url_format.format(singermid=singer_mid,
                                         begin=0,
                                         num=total_num)
            # 再次调用format_put_data方法构造将要放入工作队列中的数据
            put_data = format_put_data(args={"url": url},
                                       work_func=self.downloader.request,
                                       need_save=True,
                                       save_func=self.save)
            work_queue.put(put_data)