''' 采集配置型号 ''' import kl_http,kl_db,os,json,kl_log from postdata import postdata addnum=0 http=kl_http.kl_http() log=kl_log.kl_log('brand') db=kl_db.mysql({ 'host':'localhost', 'user':'******', 'passwd':'adminrootkl', 'db':'qiche', 'prefix':'kl_', 'charset':'utf8' }) http.autoUserAgent=True http.setheaders('''\ Host:www.epicc.com.cn Origin:http://www.epicc.com.cn Referer:http://www.epicc.com.cn/ecar/proposal/normalProposal User-Agent:Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36 X-Requested-With:XMLHttpRequestContent-Type: application/x-www-form-urlencoded\ ''') try: url='http://www.epicc.com.cn/ecar/car/carModel/getCarModelFromJYDB' brandlist=db.table('4shoudong').where({'status':0}).order('id asc').getarr() for i in brandlist: tjdata=postdata['peizhixinghao'].replace('[GROUPID]',i['groupId']) tjdata=tjdata.replace('[ENGINEDESC]',i['engineDesc']) r=http.posturl(url,tjdata)
import sys, re, random sys.path.append('../../lib/') import kl_http, kl_db http = kl_http.kl_http() http.setproxy('', '', '127.0.0.1:8087') db = kl_db.mysql({ 'host': 'localhost', 'user': '******', 'passwd': 'adminrootkl', 'db': 'douban', 'prefix': 'kl_', 'charset': 'utf8' }) http.autoUserAgent = True try: for m in range(1989, 2000, 1): for n in range(0, 550, 15): reurl = 'http://www.douban.com/tag/%s/movie?start=%s' % (m, n) print(reurl) r = http.geturl(reurl).read().decode() http.resetsession() #查找电影列表 data = re.findall('movie\-list[\s\S]*?paginator', r, re.S | re.I) #查找单个电影 restr = 'dl[ALL]<a.*?href="(.*?)"[ALL]</a>[ALL]<a.*?>([ALL])</a>[ALL]</dl' restr = restr.replace('[ALL]', '[\s\S]*?') item = re.findall(restr, data[0])
import sys, time, msvcrt sys.path.append("../../lib/") import kl_db db = kl_db.mysql( { "host": "localhost", "user": "******", "passwd": "adminrootkl", "db": "spiders_db", "prefix": "kl_", "charset": "utf8", } ) keywords = input("导出区域关键字:") niming = input("导出匿名等级:") proxylist = ( db.table("proxy") .where( { "status": "1", "area": ["like", "%" + keywords + "%"], "niming": ["like", "%" + niming + "%"], "response_time": ["lt", "5"], } ) .order("response_time asc") .select()