Example #1
0
def parseforgeurl(src, urlsinfo):
    urlsinfo = urlsinfo[0]
    dbobj = db.getDbOrCreate()

    urltype = URL_TYPE_FORGE

    for urlitem in urlsinfo:
        if src == 'evelsearcheng_txwz':
            bn = urlitem['engine']
            url = urlitem['url']
            urltype = URL_TYPE_SEARCH_ENG_EVIL
        else:
            bn = urlitem['bn']
            url = urlitem['n']

        url = url.lower()
        url = url.encode('utf-8')
        bn = bn.encode('utf-8')
        url = url.replace("%2f", "/")
        url = url.replace("%3a", ":")

        if not isurlin(dbobj, url):
            result = dbobj.execute(
                "insert into forgeurls (urlsrc,forgewho,url,urltype) values(%s,%s,%s,%s)",
                (src, bn, url, urltype))
Example #2
0
def dumpsql2xml(fname=FILE_URL_RULES_NAME):

    if os.path.isfile(fname):
        inp = raw_input('file {0} existed, ok to overwrite ? press 1 to continue\n'.format(fname))

        if not inp.isdigit():
            return

        if (int)(inp) != 1:
            return


    dbobj = db.getDbOrCreate()
    number = dbobj.execute('select * from redirecturlrules')
    if number==0:
        print 'there is no records in database'
        return
    result = dbobj.fetchall()

    rule={}

    doc = ET.Element('doc')
    doc.attrib['version'] = '1.0'

    for row in result:
        rule[RULE_ATTR_NAME_name] = row[1]
        rule[RULE_ATTR_NAME_host] = row[2]
        rule[RULE_ATTR_NAME_redirect_type] = row[3]
        rule[RULE_ATTR_NAME_req] = row[4]
        rule[RULE_ATTR_NAME_redirect_target] = row[5]
        rule[RULE_ATTR_NAME_req_match_method] = row[6]
        rule[RULE_ATTR_NAME_full_url] = row[7]


        rulenode = ET.SubElement(doc,RULE_ATTR_NAME_rule)
        rulenode.attrib[RULE_ATTR_NAME_name] = rule[RULE_ATTR_NAME_name]

        host            = ET.SubElement(rulenode, RULE_ATTR_NAME_host)
        redirect_type   = ET.SubElement(rulenode, RULE_ATTR_NAME_redirect_type)
        req             = ET.SubElement(rulenode, RULE_ATTR_NAME_req)
        redirect_target = ET.SubElement(rulenode, RULE_ATTR_NAME_redirect_target)
        req_match_method = ET.SubElement(rulenode, RULE_ATTR_NAME_req_match_method)
        full_url = ET.SubElement(rulenode, RULE_ATTR_NAME_full_url)

        host.text           = rule[RULE_ATTR_NAME_host]
        redirect_type.text  = rule[RULE_ATTR_NAME_redirect_type]
        req.text            = rule[RULE_ATTR_NAME_req]
        redirect_target.text = rule[RULE_ATTR_NAME_redirect_target]
        req_match_method.text = rule[RULE_ATTR_NAME_req_match_method]
        full_url.text = rule[RULE_ATTR_NAME_full_url]

    tree = ET.ElementTree(doc)
    tree.write(fname, encoding="UTF-8")
    pretty_xmlfile(fname)
    print '\n\n'
Example #3
0
def update_url_check_stat(ginfo):
    url_visit_count = ginfo['url_visit_count']
    url_block_count = ginfo['url_block_count']
    ginfo['url_visit_count'] = 0

    today = db.get_today_visit_count_date()

    obj = db.getDbOrCreate()
    update = '''update url_check_stat set url_visit_count=url_visit_count+%s, url_block_count=url_block_count +%s where date=%s'''
    result = obj.execute(
        update,
        (url_visit_count, url_block_count, db.get_today_visit_count_date()))

    ginfo['url_visit_count'] = 0
    ginfo['url_block_count'] = 0
    return result
Example #4
0
def CollectThread(name):

    pl = platform.platform()

    iround = 1
    while 1:
        for src, url in urlsrcs.items():

            logging.info('collecting..from:{0}. url:{1}..'.format(src, url))
            try:
                response = urllib2.urlopen(url)

            except urllib2.HTTPError, e:
                logging.warning(e.code)

            res = json.load(response)
            if res['reCode'] != 0:
                logging.warning('error code:' + str(res['reCode']))
                continue

            if 'data' in res.keys():
                parseforgeurl(src, res.values())

        logging.info('round:{0} done!!!'.format(iround))
        iround = iround + 1

        dbobj = db.getDbOrCreate()
        path = ""
        if pl.startswith("Window") is False:
            path = '/usr/share/nginx/html/info.txt'
        else:
            path = 'g:/info.txt'

        query = '''select count(*) from forgeurls'''
        dbobj.execute(query)
        result = dbobj.fetchall()
        print result
        if result is not None and len(result) > 0:
            logging.info('write count')
            count = result[0][0]
            fp = open(path, "w")
            fp.write(str(count))
            fp.close()
        time.sleep(60 * 60)
Example #5
0
    def load_url_rules(self):
        self.dictRules = {}
        dbobj = db.getDbOrCreate()
        query = '''select * from redirecturlrules'''
        dbobj.execute(query)
        result = dbobj.fetchall()
        for item in result:
            v = {}
            rulename = item[1]
            if len(rulename) == 0:
                rulename = "通用保护重定向"
            ruleitem = basedef.InterceptRule()

            v['name'] = rulename
            v['host'] = item[2]
            v['RedirectType'] = item[3]
            v['reqrule'] = item[4]
            v['newurldata'] = item[5]
            v['urlMatchMethod'] = item[6]
            v['full_url'] = item[7]
            if v['host'] is None:
                v['host'] = ""
            if v['full_url'] is None:
                v['full_url'] = ""
            if v['reqrule'] is None:
                v['reqrule'] = ""

            for k2, v2 in v.items():
                v[k2] = str(v2)

            ruleitem.strRedirectData = v['newurldata']
            ruleitem.strRedirectType = v['RedirectType']
            ruleitem.mstrUrlHost = v['host'].lower()
            ruleitem.strUrlReq = v['reqrule'].lower()
            ruleitem.strMatchMethod = v['urlMatchMethod']
            ruleitem.strRuleName = v['name']
            ruleitem.strfullUrl = v['full_url'].lower()
            if ruleitem.strRedirectType == basedef.RULE_ATTR_NAME_redirect_type_buf:
                ruleitem.strRedirectData = str(ruleitem.strRedirectData)

            if ruleitem.strRedirectType == basedef.RULE_ATTR_NAME_redirect_type_file and os.path.isfile(
                    ruleitem.strRedirectData):
                ruleitem.strRedirectDataIfFile = open(ruleitem.strRedirectData,
                                                      'r').read()

            full_url = v['full_url']
            if len(full_url) > 0:
                full_url = v['full_url'].lower()
                if full_url.startswith('http://'):
                    full_url = full_url[7:]
                pos1 = full_url.find('/')
                if pos1 != -1:
                    ruleitem.mstrUrlHost = full_url[:pos1]
                    ruleitem.strUrlReq = full_url[pos1:]
                else:
                    ruleitem.mstrUrlHost = full_url
                    ruleitem.strUrlReq = "/"
            else:
                if ruleitem.mstrUrlHost.startswith('http://'):
                    ruleitem.mstrUrlHost = ruleitem.mstrUrlHost[7:]

            if len(ruleitem.mstrUrlHost) == 0 and len(ruleitem.strUrlReq) == 0:
                logging.error('wrong rule!!!!!!:' + rulename)

            l = []

            newkey = None
            l.append(ruleitem)
            dickey = full_url
            if len(dickey) == 0:
                dickey = ruleitem.mstrUrlHost
            else:
                if full_url.startswith('www.'):
                    newkey = full_url[4:]
                else:
                    newkey = "www." + full_url

            dickey = str(dickey)
            if dickey.find("/") == -1:
                dickey = dickey + "/"
            if dickey in self.dictRules.keys():
                self.dictRules[dickey].append(ruleitem)
            else:
                self.dictRules[dickey] = l

            ruleitem2 = basedef.InterceptRule()
            ruleitem2 = copy.deepcopy(ruleitem)
            if newkey is not None:
                if newkey.find("/") == -1:
                    newkey = newkey + "/"
                l2 = []
                if newkey.startswith('www.'):
                    ruleitem2.mstrUrlHost = "www." + ruleitem2.mstrUrlHost
                else:
                    ruleitem2.mstrUrlHost = ruleitem2.mstrUrlHost[4:]

                l2.append(ruleitem2)
                newkey = str(newkey)
                if newkey in self.dictRules.keys():
                    self.dictRules[newkey].append(ruleitem2)
                else:
                    self.dictRules[newkey] = l2