#encoding=utf-8 import psyco psyco.full() import re from time import time kwd = [x.rstrip() for x in file("keyword_base_s.txt").readlines()] text = file("text.txt").read() regfilter = re.compile("|".join(kwd)) from smallgfw import GFW gfw = GFW() gfw.set(kwd) def test_check(ct): print "check " + str(ct) + " times" start = time() for i in xrange(0, ct): mc = regfilter.findall(text) cost = time() - start print "re cost:", cost start = time() for i in xrange(0, ct): mc = gfw.check(text) cost = time() - start print "smallgfw cost:", cost print "===================================" def test_replace(ct): print "replace " + str(ct) + " times"
import time from BeautifulSoup import BeautifulSoup import os import traceback import datetime import gridfs from kds import get_tieba_reply from smallgfw import GFW import os mktime = lambda dt: time.mktime(dt.utctimetuple()) ######################db.init###################### con = pymongo.Connection("localhost", 27017) kds = con.kds ######################db.init###################### gfw = GFW() gfw.set(open(os.path.join("keyword.txt")).read().split("\n")) def get_html(url): print "url:", url html = "" try: crl = pycurl.Curl() crl.setopt(pycurl.VERBOSE, 1) crl.setopt(pycurl.FOLLOWLOCATION, 1) crl.setopt(pycurl.MAXREDIRS, 5) crl.setopt(pycurl.CONNECTTIMEOUT, 5) crl.setopt(pycurl.TIMEOUT, 30) crl.fp = StringIO.StringIO() crl.setopt(pycurl.URL, url)
# encoding=utf-8 import re from time import time kwd = [x.rstrip() for x in file("keyword_base_s.txt").readlines()] text = file("text.txt").read() regfilter = re.compile("|".join(kwd)) from smallgfw import GFW gfw = GFW() gfw.set(kwd) def test_check(ct): print "check " + str(ct) + " times" start = time() for i in xrange(0, ct): mc = regfilter.findall(text) cost = time() - start print "re cost:", cost start = time() for i in xrange(0, ct): mc = gfw.check(text) cost = time() - start print "smallgfw cost:", cost print "===================================" def test_replace(ct): print "replace " + str(ct) + " times"
mktime=lambda dt:time.mktime(dt.utctimetuple()) ######################db.init###################### connection = pymongo.Connection('localhost', 27017) kds=connection.kds post=kds.post kdsuser=kds.user #fs=gridfs.GridFS(kds,'postfile') tieba = connection.tieba tieba_post = tieba.post tieba_user = tieba.user browser = requests.session() ######################gfw.init###################### gfw = GFW() gfw.set(open(os.path.join(os.path.dirname(__file__),'keyword.txt')).read().split('\n')) lgfw = GFW() lgfw.set(['thunder://','magnet:','ed2k://']) tongji = """ <center> <script language="javascript" type="text/javascript" src="http://js.users.51.la/5988086.js"></script> <noscript><a href="http://www.51.la/?5988086" target="_blank"> <img alt="我要啦免费统计" src="http://img.users.51.la/5988086.asp" style="border:none" /></a> </noscript> </center> """