def update_spider_flag(self, action): flag = {'start':1, 'finish':3}.get(action, 1) sql = "UPDATE task SET `spider_flag`=%s where id=%s" try: db.execute(sql, flag, self.task_id) except Exception: ERROR('update_spider_flag exception,task_id:%s' % self.task_id)
def update_sitemap_parsed(self, action): flag = {'start': 0, 'finish': 1}.get(action, 1) sql = "UPDATE task SET `sitemap_parsed`=%s where id=%s" try: db.execute(sql, flag, self.task_id) except Exception: ERROR('update_sitemap_parsed exception,task_id:%s' % self.task_id)
def update_sitemap_parsed(self, action): flag = {'start':0, 'finish':1}.get(action, 1) sql = "UPDATE task SET `sitemap_parsed`=%s where id=%s" try: db.execute(sql, flag, self.task_id) except Exception: ERROR('update_sitemap_parsed exception,task_id:%s' % self.task_id)
def update_url_end_time(self, request): if request.id is not None: sql = "UPDATE url SET `end_time`=%s WHERE id=%s" try: db.execute(sql, datetime.now(), request.id) except Exception: ERROR('update_url_end_time exception,task_id:%s' % self.task_id)
def update_spider_flag(self, action): flag = {'start': 1, 'finish': 3}.get(action, 1) sql = "UPDATE task SET `spider_flag`=%s where id=%s" try: db.execute(sql, flag, self.task_id) except Exception: ERROR('update_spider_flag exception,task_id:%s' % self.task_id)
def update_progress(self, rule_id): try: sql = "SELECT `progress` FROM task WHERE id=%s" % self.task_id progress = db.get(sql).progress if rule_id not in progress.split('|'): progress += '|%s' % rule_id sql = "UPDATE task SET `progress`='%s' WHERE id=%s" % (progress, self.task_id) db.execute(sql) except Exception: ERROR("update_progress exception")
def update_progress(self, rule_id): try: sql = "SELECT `progress` FROM task WHERE id=%s" % self.task_id progress = db.get(sql).progress if rule_id not in progress.split('|'): progress += '|%s' % rule_id sql = "UPDATE task SET `progress`='%s' WHERE id=%s" % ( progress, self.task_id) db.execute(sql) except Exception: ERROR("update_progress exception")
def analyse_result(self, rule_id, risk, result, url): response = result.response details = result.details risk = {'low':1, 'middle':2, 'high':3}.get(risk, 1) details = "\r\n".join(details) if isinstance(details,(list,tuple)) else details requrl = response.request.url request = self.generateRequest(response.request,requrl) response = self.generateResponse(response) sql = "INSERT INTO %s" % RESULT_TABLE data = [ attr.encode('utf-8') for attr in (self.task_id,rule_id,str(risk),requrl,details,request,response) ] sql += "(`task_id`,`rule_id`,`risk`,`url`,`detail`,`request`,`response`) VALUES(%s,%s,%s,%s,%s,%s,%s)" db.execute(sql,*data)
def analyse_result(self, rule_id, risk, result, url): response = result.response details = result.details risk = {'low': 1, 'middle': 2, 'high': 3}.get(risk, 1) details = "\r\n".join(details) if isinstance(details, (list, tuple)) else details requrl = response.request.url request = self.generateRequest(response.request, requrl) response = self.generateResponse(response) sql = "INSERT INTO %s" % RESULT_TABLE data = [ attr.encode('utf-8') for attr in (self.task_id, rule_id, str(risk), requrl, details, request, response) ] sql += "(`task_id`,`rule_id`,`risk`,`url`,`detail`,`request`,`response`) VALUES(%s,%s,%s,%s,%s,%s,%s)" db.execute(sql, *data)
def pipeline(request): try: data = [getattr(request,attr).encode('utf-8') for attr in ('url','method','params','referer')] sql_c = "SELECT COUNT(1) as `c` FROM %s" % (URL_TABLE) # sql_c += " WHERE `task_id`='%s' and `url`='%s' and `method`='%s' and `params`='%s'" # sql_c = sql_c % (conf.taskid, data[0], data[1], data[2]) sql_c += " WHERE `task_id`=%s and `url`=%s and `method`=%s and `params`=%s" if db.get(sql_c, conf.taskid, data[0], data[1], data[2]).c > 0: return sql = "INSERT INTO %s" % (URL_TABLE) sql += "(`task_id`,`url`,`method`,`params`,`referer`,`start_time`) VALUES(%s,%s,%s,%s,%s,%s)" data.append(datetime.now()) return db.execute(sql, conf.taskid, *data) except Exception: ERROR("Crawler.pipeline Exception")
def pipeline(request): try: data = [ getattr(request, attr).encode('utf-8') for attr in ('url', 'method', 'params', 'referer') ] sql_c = "SELECT COUNT(1) as `c` FROM %s" % (URL_TABLE) # sql_c += " WHERE `task_id`='%s' and `url`='%s' and `method`='%s' and `params`='%s'" # sql_c = sql_c % (conf.taskid, data[0], data[1], data[2]) sql_c += " WHERE `task_id`=%s and `url`=%s and `method`=%s and `params`=%s" if db.get(sql_c, conf.taskid, data[0], data[1], data[2]).c > 0: return sql = "INSERT INTO %s" % (URL_TABLE) sql += "(`task_id`,`url`,`method`,`params`,`referer`,`start_time`) VALUES(%s,%s,%s,%s,%s,%s)" data.append(datetime.now()) return db.execute(sql, conf.taskid, *data) except Exception: ERROR("Crawler.pipeline Exception")
def set_unreachable_flag(task_id): sql = "UPDATE task SET `reachable`=0 WHERE id=%s" % task_id try: db.execute(sql) except Exception: ERROR('set_unreachable failed,task_id:%s,please check' % task_id)
def update_end_time(task_id): sql = "UPDATE task SET `end_time`=%s WHERE id=%s" try: db.execute(sql, datetime.now(), task_id) except Exception: ERROR('update_end_time failed,task_id:%s,please check' % task_id)
def update_task_status(task_id): sql = "UPDATE task SET `status`=3 WHERE id=%s" % task_id try: db.execute(sql) except Exception: ERROR('update_task_status failed,task_id:%s,please check' % task_id)