예제 #1
0
 def judgeUrlCount(self):
     if self.max_url_num == 0:
         return
     if self.urlcount < self.max_url_num:
         self.urlcount += 1
     else:
         self.stop = True
         DEBUG("now urlcount:%s,kill pool start" % self.urlcount)
         self.pool.kill()
         DEBUG("kill pool end")
예제 #2
0
 def run(self):
     DEBUG("ScanEngine start")
     joinall([
         #spawn(self.scheduleDomain),
         spawn(self.scheduleUrl),
         spawn(self.scheduleDomain)
     ])
     self.pool.join()
     self.update_progress('END')
     DEBUG("ScanEngine end")
예제 #3
0
 def start(cls):
     url = conf.url
     basepath = conf.base
     concurrency = 10
     depth = conf.depth
     urlcount = conf.count
     duplicates = 1
     assert url
     urls = list(url) if isinstance(url, (list, tuple)) else [url]
     schedule = Schedule(urls, concurrency, depth, urlcount, duplicates,
                         basepath)
     DEBUG('CrawlEngine start')
     schedule.doSchedule()
     DEBUG('CrawlEngine end')
예제 #4
0
    def doSchedule(self):
        DEBUG("Schedule start")
        self.task.update_spider_flag('start')
        while not self.stop and (len(self.pool) > 0
                                 or not self.pendings.empty()):
            try:
                request = self.pendings.get(block=False)
            except queue.Empty:
                gevent.sleep(0)
            else:
                self.pool.spawn(Spider.start, request, self)

        self.task.update_spider_flag('finish')
        code = (self.stop, self.urlcount, len(self.pool),
                self.pendings.qsize())
        DEBUG(
            "Schedule end,stop:%s,now urlcount:%s,:pool size:%s,pendings size:%s"
            % code)
예제 #5
0
def run_url(req, rule):
    def _contains(content, chars):
        content = re.sub(r"\\[%s]" % "".join(chars), "", content,
                         re.S) if chars else content
        return all(char in content for char in chars)

    details = []
    response = None
    params = req.params
    for match in PARAMS_PATTERN.finditer(params):
        found = False
        prefix, suffix = [
            "".join(random.sample(string.ascii_lowercase,
                                  PREFIX_SUFFIX_LENGTH)) for i in xrange(2)
        ]
        for pool in (LARGER_CHAR_POOL, SMALLER_CHAR_POOL):
            if not found:
                tampered = params.replace(
                    match.group('value'),
                    "%s%s%s%s" % (match.group('value'), prefix, "".join(
                        random.sample(pool, len(pool))), suffix))
                res = requestUrl(req, tampered)
                if not res:
                    continue
                content = res.text
                for sample in re.finditer("%s(.+?)%s" % (prefix, suffix),
                                          content, re.I | re.S):
                    for regex, condition, info in XSS_PATTERNS:
                        context = re.search(
                            regex % dict((("chars",
                                           reduce(
                                               lambda filtered, char: filtered.
                                               replace(char, "\\%s" % char),
                                               REGEX_SPECIAL_CHARS,
                                               sample.group(0))), )), content,
                            re.I | re.S)
                        if context and not found and sample.group(1).strip():
                            #print sample.group(1),condition
                            if _contains(sample.group(1), condition):
                                msg = info % dict((("filtering", "no" if all(
                                    char in sample.group(1)
                                    for char in LARGER_CHAR_POOL) else
                                                    "some"), ))
                                DEBUG(msg)
                                found = True
                                if response is None:
                                    response = res
                                details.append(u"漏洞参数:%s" % match.group('key'))
                                break
                #end for
        #end for
    #end for
    if response is not None:
        return Result(response, details)
예제 #6
0
    def scheduleUrl(self):
        """
        run_type为1,脚本需定义run_url方法
        """
        DEBUG("scheduleUrl start")
        sql = "SELECT `rule_id`,`risk`,`file_name` FROM `%s` WHERE `run_type` = 1 ORDER BY  `priority`" % RULE_TABLE
        # rules = []
        # for rule in db.iter(sql):
        #     rules.append((str(rule.rule_id), rule.file_name, rule.risk))
        rules = [(str(rule.rule_id), rule.file_name, rule.risk)
                 for rule in db.iter(sql)
                 if str(rule.rule_id) not in self.finished_progress]

        if not conf.spider_finish:  #spider not finished, start crawler
            CrawlEngine.start()

        sql = "SELECT `url`,`method`,`params`,`referer` FROM %s WHERE `task_id`=%s" % (
            URL_TABLE, self.task_id)
        # reqs = []
        # for url in db.iter(sql):
        #     reqs.append(Url(url.url, url.method, url.params, url.referer))
        reqs = [
            Url(url.url, url.method, url.params, url.referer)
            for url in db.iter(sql)
        ]

        for rule_id, filename, risk in rules:
            run_url = attr_from_script(filename, RUN_URL_DEFAULT_FUN)
            if run_url:
                DEBUG("rule_id:%s filename:%s run_url start" %
                      (rule_id, filename))
                for req in reqs:
                    self.pool.spawn(self.runUrl, rule_id, run_url, req,
                                    filename, risk)
                    gevent.sleep(0)
                DEBUG("rule_id:%s filename:%s run_url end" %
                      (rule_id, filename))
        DEBUG("scheduleUrl end")
예제 #7
0
 def scheduleDomain(self):
     """
     run_type为2,脚本需定义run_domain方法
     """
     DEBUG("scheduleDomain start")
     sql = "SELECT `rule_id`,`risk`,`file_name` FROM `%s` WHERE `run_type` = 2 ORDER BY  `priority`" % RULE_TABLE
     # domainRule = []
     # for rule in db.iter(sql):
     #     domainRule.append((str(rule.rule_id), rule.file_name, rule.risk))
     domainRule = [(str(rule.rule_id), rule.file_name, rule.risk)
                   for rule in db.iter(sql)
                   if str(rule.rule_id) not in self.finished_progress]
     for rule_id, filename, risk in domainRule:
         run_domain = attr_from_script(filename, RUN_DOMAIN_DEFAULT_FUN)
         if run_domain:
             DEBUG("rule_id:%s filename:%s run_domain start" %
                   (rule_id, filename))
             self.pool.spawn(self.runDomain, rule_id, run_domain, filename,
                             risk)
             gevent.sleep(0)
             DEBUG("rule_id:%s filename:%s run_domain end" %
                   (rule_id, filename))
     DEBUG("scheduleDomain end")
예제 #8
0
 def init_request(self):
     urls = self.task.get_exist_url()
     self.urlcount += len(urls)
     for url in urls:
         request = Request(url.url, url.method, url.params, url.referer)
         if self.visited[request] < self.duplicates:
             if not discard(request.url) and not url.end_time:
                 request.id = url.id
                 self.pendings.put(request)
                 DEBUG("-----request:%s not crawler,add queue" % request)
             self.visited[request] += 1
         else:
             #DEBUG("duplicates url:%s" %request)
             pass
     return self.urlcount
예제 #9
0
 def __call__(self, parser, namespace, values, option_string=None):
     # 如果参数使用了该Action,会自动调用__call__方法,从而调用DEBUG函数
     setattr(namespace, self.dest, True)
     DEBUG()