def GET(self): try: redis_obj = gputils.get_redis_obj() if redis_obj is not None: return redis_obj.dbsize() except Exception, e: logging.error(str(e)) logging.error(traceback.format_exc())
def GET(self): obj = gputils.get_redis_obj() if obj is None: return 'conn fail' url = web.input().get('url') if url is None or len(url) == 0: return 'error pars' url = url.lower() info = obj.hgetall(url) return info
def init(self): cfgobj = basedef.GCS.get_config_obj() url_checker_pub_redis_num = cfgobj.getint('boot', 'url_checker_pub_redis_num') self.url_checker_pub_channel = cfgobj.get('boot', 'url_checker_pub_channel') self.redobj = redis.Redis(db=url_checker_pub_redis_num) logging.info('url_checker_pub_redis_num:%s,%s', url_checker_pub_redis_num, self.redobj) logging.info('url_checker_pub_channel:%s', self.url_checker_pub_channel) queryobj = http_query.HttpQuery() queryobj.init() redis_match_obj = gputils.get_redis_obj()
def do_update(name): global gstart_update timeNow = datetime.datetime.now() gstart_update = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) gstart_update = name + " " + gstart_update global redis_obj redis_obj = get_unknow_redis_db() if redis_obj is None: return redis_match_obj = gputils.get_redis_obj() redis_match_obj.set('update_info', gstart_update) queryobj = http_query.HttpQuery() queryobj.init() lasttime = 0 while True: try: check1 = time.strftime("%H", time.localtime()) if check1 == "00" and time.time() - lasttime > 3600 * 24: lasttime = time.time() zwonderwoman() except: pass try: if redis_match_obj is None: raise NameError('redis not init') unknow_urls = pop_all_unknow_urls(redis_obj) count = len(unknow_urls) if count > 0: pass #logging.info('unknow urls:%s on %s', len(unknow_urls), name) else: logging.warn("no unknow urls to check") time.sleep(2) continue updating_url_infos = {} for it in unknow_urls: if it is None: continue checking_url_info = eval(it) url = checking_url_info['url'] url_info = None trytimes = 10 while trytimes > 0: url_info = queryobj.http_check_url_type(url) if url_info != 1: break trytimes = trytimes - 1 if trytimes == 0: logging.warning('exceed try times!!') if url_info is None or url_info == 1: continue url_type = url_info[0] #if url_type != 2: # continue urlinfo = {} urlinfo['urltype'] = url_type urlinfo['eviltype'] = url_info[1] urlinfo['evilclass'] = url_info[2] urlinfo['redirect_type'] = 'file' urlinfo['redirect_target'] = 'safe_navigate.html' urlinfo['urlclass'] = url_info[3] urlinfo['urlsubclass'] = url_info[4] urlinfo['update_time'] = int(time.time()) urlinfo['info_src'] = 'tx_online_query' if urlinfo['redirect_type'] == 'url': if len(urlinfo['redirect_target']) > 0 and not urlinfo[ 'redirect_target'].startswith('http://'): urlinfo['redirect_target'] = 'http://' + urlinfo[ 'redirect_target'] if checking_url_info[ 'need_save_log_redis'] == 1 and basedef.GSaveLogRedisPub: sip = checking_url_info['sip'] sport = checking_url_info['sport'] visit_time = checking_url_info['visit_time'] useragent = checking_url_info['useragent'] sip = checking_url_info['sip'] sport = checking_url_info['sport'] referer = checking_url_info['referer'] basedef.GSaveLogRedisPub.save_url_info_with_src( sip, sport, url, urlinfo['urltype'], urlinfo['evilclass'], urlinfo['urlclass'], visit_time, referer, useragent) updating_url_infos[url] = urlinfo if len(updating_url_infos) > 0: pip = gputils.get_redis_obj().pipeline() for url, update_info in updating_url_infos.items(): host1 = gputils.make_real_host(url.lower()) pip.hmset(host1, update_info) # logging.info('url updator pip.execute():%s',len(pip.execute())) except Exception, e: logging.error(str(e)) logging.error(traceback.format_exc()) try: basedef.GWARNING.sys_warning() except: pass time.sleep(10)
def GET(self): return "start already ", gputils.get_redis_obj().get('update_info')