def parseforgeurl(src, urlsinfo): urlsinfo = urlsinfo[0] dbobj = db.getDbOrCreate() urltype = URL_TYPE_FORGE for urlitem in urlsinfo: if src == 'evelsearcheng_txwz': bn = urlitem['engine'] url = urlitem['url'] urltype = URL_TYPE_SEARCH_ENG_EVIL else: bn = urlitem['bn'] url = urlitem['n'] url = url.lower() url = url.encode('utf-8') bn = bn.encode('utf-8') url = url.replace("%2f", "/") url = url.replace("%3a", ":") if not isurlin(dbobj, url): result = dbobj.execute( "insert into forgeurls (urlsrc,forgewho,url,urltype) values(%s,%s,%s,%s)", (src, bn, url, urltype))
def dumpsql2xml(fname=FILE_URL_RULES_NAME): if os.path.isfile(fname): inp = raw_input('file {0} existed, ok to overwrite ? press 1 to continue\n'.format(fname)) if not inp.isdigit(): return if (int)(inp) != 1: return dbobj = db.getDbOrCreate() number = dbobj.execute('select * from redirecturlrules') if number==0: print 'there is no records in database' return result = dbobj.fetchall() rule={} doc = ET.Element('doc') doc.attrib['version'] = '1.0' for row in result: rule[RULE_ATTR_NAME_name] = row[1] rule[RULE_ATTR_NAME_host] = row[2] rule[RULE_ATTR_NAME_redirect_type] = row[3] rule[RULE_ATTR_NAME_req] = row[4] rule[RULE_ATTR_NAME_redirect_target] = row[5] rule[RULE_ATTR_NAME_req_match_method] = row[6] rule[RULE_ATTR_NAME_full_url] = row[7] rulenode = ET.SubElement(doc,RULE_ATTR_NAME_rule) rulenode.attrib[RULE_ATTR_NAME_name] = rule[RULE_ATTR_NAME_name] host = ET.SubElement(rulenode, RULE_ATTR_NAME_host) redirect_type = ET.SubElement(rulenode, RULE_ATTR_NAME_redirect_type) req = ET.SubElement(rulenode, RULE_ATTR_NAME_req) redirect_target = ET.SubElement(rulenode, RULE_ATTR_NAME_redirect_target) req_match_method = ET.SubElement(rulenode, RULE_ATTR_NAME_req_match_method) full_url = ET.SubElement(rulenode, RULE_ATTR_NAME_full_url) host.text = rule[RULE_ATTR_NAME_host] redirect_type.text = rule[RULE_ATTR_NAME_redirect_type] req.text = rule[RULE_ATTR_NAME_req] redirect_target.text = rule[RULE_ATTR_NAME_redirect_target] req_match_method.text = rule[RULE_ATTR_NAME_req_match_method] full_url.text = rule[RULE_ATTR_NAME_full_url] tree = ET.ElementTree(doc) tree.write(fname, encoding="UTF-8") pretty_xmlfile(fname) print '\n\n'
def update_url_check_stat(ginfo): url_visit_count = ginfo['url_visit_count'] url_block_count = ginfo['url_block_count'] ginfo['url_visit_count'] = 0 today = db.get_today_visit_count_date() obj = db.getDbOrCreate() update = '''update url_check_stat set url_visit_count=url_visit_count+%s, url_block_count=url_block_count +%s where date=%s''' result = obj.execute( update, (url_visit_count, url_block_count, db.get_today_visit_count_date())) ginfo['url_visit_count'] = 0 ginfo['url_block_count'] = 0 return result
def CollectThread(name): pl = platform.platform() iround = 1 while 1: for src, url in urlsrcs.items(): logging.info('collecting..from:{0}. url:{1}..'.format(src, url)) try: response = urllib2.urlopen(url) except urllib2.HTTPError, e: logging.warning(e.code) res = json.load(response) if res['reCode'] != 0: logging.warning('error code:' + str(res['reCode'])) continue if 'data' in res.keys(): parseforgeurl(src, res.values()) logging.info('round:{0} done!!!'.format(iround)) iround = iround + 1 dbobj = db.getDbOrCreate() path = "" if pl.startswith("Window") is False: path = '/usr/share/nginx/html/info.txt' else: path = 'g:/info.txt' query = '''select count(*) from forgeurls''' dbobj.execute(query) result = dbobj.fetchall() print result if result is not None and len(result) > 0: logging.info('write count') count = result[0][0] fp = open(path, "w") fp.write(str(count)) fp.close() time.sleep(60 * 60)
def load_url_rules(self): self.dictRules = {} dbobj = db.getDbOrCreate() query = '''select * from redirecturlrules''' dbobj.execute(query) result = dbobj.fetchall() for item in result: v = {} rulename = item[1] if len(rulename) == 0: rulename = "通用保护重定向" ruleitem = basedef.InterceptRule() v['name'] = rulename v['host'] = item[2] v['RedirectType'] = item[3] v['reqrule'] = item[4] v['newurldata'] = item[5] v['urlMatchMethod'] = item[6] v['full_url'] = item[7] if v['host'] is None: v['host'] = "" if v['full_url'] is None: v['full_url'] = "" if v['reqrule'] is None: v['reqrule'] = "" for k2, v2 in v.items(): v[k2] = str(v2) ruleitem.strRedirectData = v['newurldata'] ruleitem.strRedirectType = v['RedirectType'] ruleitem.mstrUrlHost = v['host'].lower() ruleitem.strUrlReq = v['reqrule'].lower() ruleitem.strMatchMethod = v['urlMatchMethod'] ruleitem.strRuleName = v['name'] ruleitem.strfullUrl = v['full_url'].lower() if ruleitem.strRedirectType == basedef.RULE_ATTR_NAME_redirect_type_buf: ruleitem.strRedirectData = str(ruleitem.strRedirectData) if ruleitem.strRedirectType == basedef.RULE_ATTR_NAME_redirect_type_file and os.path.isfile( ruleitem.strRedirectData): ruleitem.strRedirectDataIfFile = open(ruleitem.strRedirectData, 'r').read() full_url = v['full_url'] if len(full_url) > 0: full_url = v['full_url'].lower() if full_url.startswith('http://'): full_url = full_url[7:] pos1 = full_url.find('/') if pos1 != -1: ruleitem.mstrUrlHost = full_url[:pos1] ruleitem.strUrlReq = full_url[pos1:] else: ruleitem.mstrUrlHost = full_url ruleitem.strUrlReq = "/" else: if ruleitem.mstrUrlHost.startswith('http://'): ruleitem.mstrUrlHost = ruleitem.mstrUrlHost[7:] if len(ruleitem.mstrUrlHost) == 0 and len(ruleitem.strUrlReq) == 0: logging.error('wrong rule!!!!!!:' + rulename) l = [] newkey = None l.append(ruleitem) dickey = full_url if len(dickey) == 0: dickey = ruleitem.mstrUrlHost else: if full_url.startswith('www.'): newkey = full_url[4:] else: newkey = "www." + full_url dickey = str(dickey) if dickey.find("/") == -1: dickey = dickey + "/" if dickey in self.dictRules.keys(): self.dictRules[dickey].append(ruleitem) else: self.dictRules[dickey] = l ruleitem2 = basedef.InterceptRule() ruleitem2 = copy.deepcopy(ruleitem) if newkey is not None: if newkey.find("/") == -1: newkey = newkey + "/" l2 = [] if newkey.startswith('www.'): ruleitem2.mstrUrlHost = "www." + ruleitem2.mstrUrlHost else: ruleitem2.mstrUrlHost = ruleitem2.mstrUrlHost[4:] l2.append(ruleitem2) newkey = str(newkey) if newkey in self.dictRules.keys(): self.dictRules[newkey].append(ruleitem2) else: self.dictRules[newkey] = l2