import re from urllib.parse import quote import requests from datetime import datetime # 中国海关政府采购 # hgcg_list = [5, 20, 'hgcg_list_url', 'hgcg_customs_gov_cn'] # http://hgcg.customs.gov.cn pagenum = config.hgcg_list[0] # 翻页深度 threads = config.hgcg_list[1] # 线程数 dbname = config.hgcg_list[2] # 列表url队列 # 数据存储实例化 save = Redis.save() Save = Mongo.save(config.hgcg_list[3]) session = requests.session() header = { 'Host': 'hgcg.customs.gov.cn', 'Origin': 'http://hgcg.customs.gov.cn', 'Content-Type': 'application/x-www-form-urlencoded', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36', }
from lxml import etree, html from Storage import Redis, Mongo import config import re from datetime import datetime # 湖北公共资源交易中心 # hbggzy_list = [5, 20, 'hbggzy_list_url', 'hbggzy_gov_cn'] # http://www.hbggzy.cn pagenum = config.hbggzy_list[0] # 翻页深度 threads = config.hbggzy_list[1] # 线程数 dbname = config.hbggzy_list[2] # 列表url队列 # 数据存储实例化 save = Redis.save() Save = Mongo.save(config.hbggzy_list[3]) # 列表链接构造 def list_url(): l1 = [ 'http://www.hbggzy.cn/jydt/003001/003001001/{}.html'.format(i) for i in range(1, pagenum) ] l2 = [ 'http://www.hbggzy.cn/jydt/003001/003001002/{}.html'.format(i) for i in range(1, pagenum) ] l3 = [ 'http://www.hbggzy.cn/jydt/003001/003001003/{}.html'.format(i) for i in range(1, pagenum)
from lxml import etree, html from Storage import Redis, Mongo import config import re from datetime import datetime # 广西壮族自治区政府采购 # gxgp_list = [5, 20, 'gxgp_list_url', 'gxgp_gov_cn'] # http://www.gxgp.gov.cn pagenum = config.gxgp_list[0] # 翻页深度 threads = config.gxgp_list[1] # 线程数 dbname = config.gxgp_list[2] # 列表url队列 # 数据存储实例化 save = Redis.save() Save = Mongo.save(config.gxgp_list[3]) # 列表链接构造 def list_url(): l1 = [ 'http://www.gxgp.gov.cn/ygswz/index_{}.htm'.format(i) for i in range(1, pagenum) ] l2 = [ 'http://www.gxgp.gov.cn/cggkzb/index_{}.htm'.format(i) for i in range(1, pagenum) ] l3 = [ 'http://www.gxgp.gov.cn/cgjz/index_{}.htm'.format(i) for i in range(1, pagenum)
from lxml import etree, html from Storage import Redis, Mongo import config import re from datetime import datetime # 南宁市政府集中采购中心 # purchase_list = [5, 20, 'purchase_list_url', 'purchase_gov_cn'] # http://www.purchase.gov.cn pagenum = config.purchase_list[0] # 翻页深度 threads = config.purchase_list[1] # 线程数 dbname = config.purchase_list[2] # 列表url队列 # 数据存储实例化 save = Redis.save() Save = Mongo.save(config.purchase_list[3]) # 列表链接构造 def list_url(): l1 = [ 'http://www.purchase.gov.cn//cxqgsgg/index_{}.htm'.format(i) for i in range(1, pagenum) ] l2 = [ 'http://www.purchase.gov.cn//xqgshfgg/index_{}.htm'.format(i) for i in range(1, pagenum) ] l3 = [ 'http://www.purchase.gov.cn//sjcggg/index_{}.htm'.format(i) for i in range(1, pagenum)
from lxml import etree, html from Storage import Redis, Mongo import config import re from datetime import datetime # 内蒙古政府采购中心 # nmgzfcg_list = [5, 20, 'nmgzfcg_list_url', 'nmgzfcg_gov_cn'] # http://www.nmgzfcg.gov.cn pagenum = config.nmgzfcg_list[0] # 翻页深度 threads = config.nmgzfcg_list[1] # 线程数 dbname = config.nmgzfcg_list[2] # 列表url队列 # 数据存储实例化 save = Redis.save() Save = Mongo.save(config.nmgzfcg_list[3]) # 列表链接构造 def list_url(): l1 = ['http://www.nmgzfcg.gov.cn/nmzc/cgxmygg/A0918web_{}.htm'.format(i) for i in range(1, pagenum)] l2 = ['http://www.nmgzfcg.gov.cn/nmzc/jygg/zbgkyqgg/A094401web_{}.htm'.format(i) for i in range(1, pagenum)] l3 = ['http://www.nmgzfcg.gov.cn/nmzc/jygg/zbbggg/A094402web_{}.htm'.format(i) for i in range(1, pagenum)] l4 = ['http://www.nmgzfcg.gov.cn/nmzc/jygg/jzxtpgg/A094403web_{}.htm'.format(i) for i in range(1, pagenum)] l5 = ['http://www.nmgzfcg.gov.cn/nmzc/jygg/jtbggg/A094404web_{}.htm'.format(i) for i in range(1, pagenum)] l6 = ['http://www.nmgzfcg.gov.cn/nmzc/jygg/xjcggg/A094405web_{}.htm'.format(i) for i in
from lxml import etree, html from Storage import Redis, Mongo from datetime import datetime # import config import config # 网址 http://www.cqzb.gov.cn/ pagenum = config.cqzb_list[0] # 翻页深度 threads = config.cqzb_list[1] # 线程数 dbname = config.cqzb_list[2] # 列表url队列 # 数据存储实例化 save = Redis.save() Save = Mongo.save(config.cqzb_list[3]) # 列表链接构造 def list_url(): zhaobs = [ 'http://www.cqzb.gov.cn/class-5-1({}).aspx'.format(i) for i in range(1, pagenum) ] zhongbs = [ 'http://www.cqzb.gov.cn/class-5-45({}).aspx'.format(i) for i in range(1, pagenum) ] return zhaobs + zhongbs
from lxml import etree, html from Storage import Redis, Mongo import config import re from datetime import datetime # 太原市政府采购 # tyzfcg_list = [5, 20, 'tyzfcg_list_url', 'tyzfcg_gov_cn'] # http://www.tyzfcg.gov.cn pagenum = config.tyzfcg_list[0] # 翻页深度 threads = config.tyzfcg_list[1] # 线程数 dbname = config.tyzfcg_list[2] # 列表url队列 # 数据存储实例化 save = Redis.save() Save = Mongo.save(config.tyzfcg_list[3]) # 列表链接构造 def list_url(): l1 = ['http://www.tyzfcg.gov.cn/view.php?app=bid&type=A&id_name=&page={}'.format(i) for i in range(1, pagenum)] l2 = ['http://www.tyzfcg.gov.cn/view.php?app=bid&type=B&id_name=&page={}'.format(i) for i in range(1, pagenum)] l3 = ['http://www.tyzfcg.gov.cn/view.php?app=bid&type=D&id_name=&page={}'.format(i) for i in range(1, pagenum)] return l1 + l2 + l3 # return test # 列表解析(详情url)
monkey.patch_all() import time from Parse import request from lxml import etree, html from Storage import Redis, Mongo import config from datetime import datetime # 网址 http://zzcg.ccgp.gov.cn pagenum = config.ccgp_list[0] # 翻页深度 threads = config.ccgp_list[1] # 线程数 dbname = config.ccgp_list[2] # 列表url队列 # 数据存储实例化 save = Redis.save() Save = Mongo.save(config.ccgp_list[3]) # 列表链接构造 def list_url(): zbgg = [ 'http://zzcg.ccgp.gov.cn/zbgg/index_{}.jhtml'.format(i) for i in range(1, pagenum) ] bggg = [ 'http://zzcg.ccgp.gov.cn/bggg/index_{}.jhtml'.format(i) for i in range(1, pagenum) ] jggg = [ 'http://zzcg.ccgp.gov.cn/jggg/index_{}.jhtml'.format(i) for i in range(1, pagenum)
from Parse import request from lxml import etree, html from Storage import Redis, Mongo from datetime import datetime import config # 网址 http://www.hebpr.cn/ pagenum = config.hebpr_list[0] # 翻页深度 threads = config.hebpr_list[1] # 线程数 dbname = config.hebpr_list[2] # 列表url队列 # 数据存储实例化 save = Redis.save() Save = Mongo.save(config.hebpr_list[3]) # 列表链接构造 def list_url(): zbgg = [ 'http://www.hebpr.cn/hbjyzx/002/002009/002009001/{}.html'.format(i) for i in range(1, pagenum) ] return zbgg # 列表解析(详情url) def list_parse(): while True: list_url = save.pop(name=dbname)
from lxml import etree, html from Storage import Redis, Mongo import config import re from datetime import datetime # 国家税务总局 # chinatax_list = [5, 20, 'chinatax_list_url', 'chinatax_gov_cn'] # http://www.chinatax.gov.cn pagenum = config.chinatax_list[0] # 翻页深度 threads = config.chinatax_list[1] # 线程数 dbname = config.chinatax_list[2] # 列表url队列 # 数据存储实例化 save = Redis.save() Save = Mongo.save(config.chinatax_list[3]) # 列表链接构造 def list_url(): l1 = [ 'http://www.chinatax.gov.cn/n810214/n810621/n810668/index.html', 'http://www.chinatax.gov.cn/n810214/n810621/n810673/index.html', 'http://www.chinatax.gov.cn/n810214/n810621/n2217729/index.html', 'http://www.chinatax.gov.cn/n810214/n810621/n3014142/index.html', ] l2 = [ 'http://www.chinatax.gov.cn/n810214/n810621/n810668/index_831221_{}.html' .format(i) for i in range(1, pagenum) ] l3 = [
import time, re from Parse import request from lxml import etree, html from Storage import Redis, Mongo import config from datetime import datetime # 网址 http://ggzy.sz.gov.cn pagenum = config.szggzy_list[0] # 翻页深度 threads = config.szggzy_list[1] # 线程数 dbname = config.szggzy_list[2] # 列表url队列 # 数据存储实例化 save = Redis.save() Save = Mongo.save(config.szggzy_list[3]) # 列表链接构造 def list_url(): gg = [ 'http://ggzy.sz.gov.cn/jyxx/zfcgxm/zbgg_zf/', 'http://ggzy.sz.gov.cn/jyxx/zfcgxm/zbcjgg_zf/' ] zbgg = [ 'http://ggzy.sz.gov.cn/jyxx/zfcgxm/zbcjgg_zf/index_{}.htm'.format(i) for i in range(1, pagenum) ] zbcjgg = [ 'http://ggzy.sz.gov.cn/jyxx/zfcgxm/zbgg_zf/index_{}.htm'.format(i) for i in range(1, pagenum)
from lxml import etree, html from Storage import Redis, Mongo import config import re from datetime import datetime # 全国招标信息网 # zbtb_list = [5, 20, 'zbtb_list_url', 'zbtb_com_cn'] # http://zbtb.com.cn pagenum = config.zbtb_list[0] # 翻页深度 threads = config.zbtb_list[1] # 线程数 dbname = config.zbtb_list[2] # 列表url队列 # 数据存储实例化 save = Redis.save() Save = Mongo.save(config.zbtb_list[3]) # 列表链接构造 def list_url(): l1 = ['http://zbtb.com.cn/caigou/gonggao-38046--{}.html'.format(i) for i in range(1, pagenum)] # 2857 l2 = ['http://zbtb.com.cn/caigou/gongshi-38047--{}.html'.format(i) for i in range(1, pagenum)] l3 = ['http://zbtb.com.cn/caigou/yugao-38048--{}.html'.format(i) for i in range(1, pagenum)] l4 = ['http://zbtb.com.cn/caigou/mianfei-38049--{}.html'.format(i) for i in range(1, pagenum)] l5 = ['http://zbtb.com.cn/caigou/huiyuan-38093--{}.html'.format(i) for i in range(1, pagenum)] l6 = ['http://zbtb.com.cn/caigou/xixun-38094--{}.html'.format(i) for i in range(1, pagenum)] return l1+l2 # 列表解析(详情url) def list_parse():
from lxml import etree, html from Storage import Redis, Mongo import config import re from datetime import datetime # 吉林省政府采购中心 # jlszfcg_list = [5, 20, 'jlszfcg_list_url', 'jlszfcg_gov_cn'] # http://www.jlszfcg.gov.cn pagenum = config.jlszfcg_list[0] # 翻页深度 threads = config.jlszfcg_list[1] # 线程数 dbname = config.jlszfcg_list[2] # 列表url队列 # 数据存储实例化 save = Redis.save() Save = Mongo.save(config.jlszfcg_list[3]) # 列表链接构造 def list_url(): api = 'http://www.jlszfcg.gov.cn/jilin/zbxxController.form?bidWay=' l1 = [ api + 'GKZB&declarationType=ZHAOBGG&declarationType=GSGG&pageNo={}'.format(i) for i in range(0, pagenum) ] l2 = [ api + 'YQZB&declarationType=ZHAOBGG&declarationType=GSGG&pageNo={}'.format(i) for i in range(0, pagenum) ]
from lxml import etree, html from Storage import Redis, Mongo import config import re from datetime import datetime # 湖北政府采购中心 # hubeigp_list = [5, 20, 'hubeigp_list_url', 'hubeigp_gov_cn'] # http://www.hubeigp.gov.cn pagenum = config.hubeigp_list[0] # 翻页深度 threads = config.hubeigp_list[1] # 线程数 dbname = config.hubeigp_list[2] # 列表url队列 # 数据存储实例化 save = Redis.save() Save = Mongo.save(config.hubeigp_list[3]) # 列表链接构造 def list_url(): l1 = [ 'http://www.hubeigp.gov.cn/hbscgzx/139295/139299/d6d12652-{}.html'. format(i) for i in range(1, pagenum) ] l2 = [ 'http://www.hubeigp.gov.cn/hbscgzx/139295/139315/a6618415-{}.html'. format(i) for i in range(1, pagenum) ] l3 = [ 'http://www.hubeigp.gov.cn/hbscgzx/139295/139303/7889ebc8-{}.html'. format(i) for i in range(1, pagenum)
import config import json import re, time from Parse import request from Storage import Redis, Mongo from lxml import etree # 网址 http://www.gzzbw.cn from datetime import datetime pagenum = config.gzzbw_list[0] # 翻页深度 threads = config.gzzbw_list[1] # 线程数 dbname = config.gzzbw_list[2] # 列表url队列 # 数据存储实例化 save = Redis.save() Save = Mongo.save(config.gzzbw_list[3]) # 列表链接构造 def list_url(): api = 'http://www.gzzbw.cn/api/trade/search?pubDate=all®ion=5200&industry=all&prjType=all¬iceType=all¬iceClassify=all&pageIndex={}&args=' zhaobs = [api.format(i) for i in range(1, pagenum)] return zhaobs # 列表解析(详情url) def list_parse(): while True: list_url = save.pop(name=dbname) if list_url is not None: response = request.get(list_url, header={
import config import re from datetime import datetime # 长沙公共资源交易监管网 # csggzy_list = [5, 20, 'csggzy_list_url', 'csggzy_gov_cn'] # https://csggzy.gov.cn pagenum = config.csggzy_list[0] # 翻页深度 threads = config.csggzy_list[1] # 线程数 dbname = config.csggzy_list[2] # 列表url队列 # 数据存储实例化 save = Redis.save() Save = Mongo.save(config.csggzy_list[3]) # 列表链接构造 def list_url(): l1 = ['https://csggzy.gov.cn/NoticeFile.aspx/Index/{}?type=undefined&Ptype=政府采购&Sm2=全部&Sm=政府采购'.format(i) for i in range(1, pagenum)] return l1 # 列表解析(详情url) def list_parse(): while True: list_url = save.pop(name=dbname) if list_url is not None: response = request.get(list_url)
from lxml import etree, html from Storage import Redis, Mongo import config import re from datetime import datetime # 海南省公共资源交易平台 # hainan_list = [5, 20, 'hainan_list_url', 'hainan_gov_cn'] # http://zw.hainan.gov.cn pagenum = config.hainan_list[0] # 翻页深度 threads = config.hainan_list[1] # 线程数 dbname = config.hainan_list[2] # 列表url队列 # 数据存储实例化 save = Redis.save() Save = Mongo.save(config.hainan_list[3]) # 列表链接构造 def list_url(): l1 = ['http://zw.hainan.gov.cn/ggzy/ggzy/cgzbgg/index_{}.jhtml'.format(i) for i in range(1, pagenum)] l2 = ['http://zw.hainan.gov.cn/ggzy/ggzy/cggg/index_{}.jhtml'.format(i) for i in range(1, pagenum)] return l1 + l2 # return l5 # 列表解析(详情url) def list_parse(): while True: list_url = save.pop(name=dbname) if list_url is None:
monkey.patch_all() import time, re from Parse import request from lxml import etree, html from Storage import Redis, Mongo import config from datetime import datetime # 网址 http://ggzyjy.dl.gov.cn/ pagenum = config.ggzyjy_list[0] # 翻页深度 threads = config.ggzyjy_list[1] # 线程数 dbname = config.ggzyjy_list[2] # 列表url队列 # 数据存储实例化 save = Redis.save() Save = Mongo.save(config.ggzyjy_list[3]) # 列表链接构造 def list_url(): cggg = [ 'http://ggzyjy.dl.gov.cn/TPFront/jyxx/071002/071002001/?pageing={}'. format(i) for i in range(1, pagenum) ] cgwjgs = [ 'http://ggzyjy.dl.gov.cn/TPFront/jyxx/071002/071002002/?pageing={}'. format(i) for i in range(1, pagenum) ] zbtz = [ 'http://ggzyjy.dl.gov.cn/TPFront/jyxx/071002/071002003/?pageing={}'. format(i) for i in range(1, pagenum)
monkey.patch_all() import time, re from Parse import request from lxml import etree, html from Storage import Redis, Mongo import config from datetime import datetime # 网址 http://www.sjzsxzspj.gov.cn pagenum = config.sjzsxzspj_list[0] # 翻页深度 threads = config.sjzsxzspj_list[1] # 线程数 dbname = config.sjzsxzspj_list[2] # 列表url队列 # 数据存储实例化 save = Redis.save() Save = Mongo.save(config.sjzsxzspj_list[3]) # 列表链接构造 def list_url(): zbgg = [ 'http://www.sjzsxzspj.gov.cn/zfzbgg/index_{}.jhtml'.format(i) for i in range(1, pagenum) ] zbggs = [ 'http://www.sjzsxzspj.gov.cn/zfzbgga/index_{}.jhtml'.format(i) for i in range(1, pagenum) ] bcgg = [ 'http://www.sjzsxzspj.gov.cn/gzgg/index_{}.jhtml'.format(i) for i in range(1, pagenum)