def __init__(self): # 采集几天内的数据,设置为1的时候表示采集当天 self.limit_days = 2 # 设置一个最大的页数 self.max_page = 50 # 招标公告列表页 self.bidding_index_url='http://api.jszbtb.com/DataSyncApi/HomeTenderBulletin?PageSize=20&CurrentPage={}' # 资格预审公告列表页 self.check_index_url='http://api.jszbtb.com/DataSyncApi/HomeQulifyBulletin?PageSize=20&CurrentPage={}' # 招标公告详情url self.bidding_detail_url = 'http://api.jszbtb.com/DataSyncApi/TenderBulletin/id/{}' # 招标公告详情真实url self.bidding_true_url = 'http://www.jszbtb.com/#/bulletindetail/TenderBulletin/{}' # 资格预审公告详情url self.check_detail_url = 'http://api.jszbtb.com/DataSyncApi/QulifyBulletin/id/{}' # 资格预审公告真实url self.check_true_url = 'http://www.jszbtb.com/#/bulletindetail/QulifyBulletin/{}' self.wait_time = 2 self.headers = get_project_settings().get('HEADERS') self.tml_logger = init_tml_logger("bidding") self.bat_folder = get_project_settings().get('BAT_FOLDER') self.zip_folder = get_project_settings().get('ZIP_FOLDER') #建立保存文件 if os.path.exists(os.path.join(get_project_settings().get('SAVE_DIGEST_FOLDER'))) == False: os.makedirs(os.path.join(get_project_settings().get('SAVE_DIGEST_FOLDER'))) if os.path.exists(self.bat_folder ) == False: os.makedirs(self.bat_folder ) if os.path.exists(self.zip_folder ) == False: os.makedirs(self.zip_folder )
def __init__(self): # 采集几天内的数据,设置为1的时候表示采集当天 self.limit_days = 2 # 设置一个最大的页数 self.max_page = 50 # 招标公告列表页 self.bidding_post_url = 'http://deal.ggzy.gov.cn/ds/deal/dealList_find.jsp' self.end_day = datetime.datetime.now().strftime('%Y-%m-%d') delta_day =3 self.begin_day = (datetime.datetime.now() - datetime.timedelta(days=delta_day)).strftime('%Y-%m-%d') self.wait_time = 2 self.headers = { 'Origin': 'http://deal.ggzy.gov.cn', 'Referer': 'http://deal.ggzy.gov.cn/ds/deal/dealList.jsp', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36' } self.tml_logger = init_tml_logger("bidding_qgggzy") self.bat_folder = get_project_settings().get('BAT_FOLDER') self.zip_folder = get_project_settings().get('ZIP_FOLDER') # 建立保存文件 if os.path.exists(os.path.join(get_project_settings().get('SAVE_DIGEST_FOLDER'))) == False: os.makedirs(os.path.join(get_project_settings().get('SAVE_DIGEST_FOLDER'))) if os.path.exists(self.bat_folder) == False: os.makedirs(self.bat_folder) if os.path.exists(self.zip_folder) == False: os.makedirs(self.zip_folder)
def __init__(self): # 采集几天内的数据,设置为1的时候表示采集当天 self.limit_days = 2 # 设置一个最大的页数 self.max_page = 50 # 招标公告列表页 self.bidding_index_url = 'http://zbtb.gxi.gov.cn:9000/xxfbcms/category/bulletinList.html?searchDate=1995-07-25&dates=300&word=&categoryId=88&industryName=&area=&status=&publishMedia=&sourceInfo=&showStatus=1&page={}' self.wait_time = 2 self.headers = get_project_settings().get('HEADERS') self.tml_logger = init_tml_logger("bidding") self.bat_folder = get_project_settings().get('BAT_FOLDER') self.zip_folder = get_project_settings().get('ZIP_FOLDER') # 建立保存文件 if os.path.exists( os.path.join(get_project_settings().get( 'SAVE_DIGEST_FOLDER'))) == False: os.makedirs( os.path.join(get_project_settings().get('SAVE_DIGEST_FOLDER'))) if os.path.exists(self.bat_folder) == False: os.makedirs(self.bat_folder) if os.path.exists(self.zip_folder) == False: os.makedirs(self.zip_folder)
def __init__(self): # 采集几天内的数据,设置为1的时候表示采集当天 self.limit_days = 2 # 设置一个最大的页数 self.max_page = 50 #公开招标公告 self.gkzbgg_url = 'http://www.ccgp-jiangsu.gov.cn/ggxx/gkzbgg/index_{}.html' #资格审查公告 self.zgysgg_url='http://www.ccgp-jiangsu.gov.cn/ggxx/zgysgg/index_{}.html' #邀请招标公告 self.yqzbgg_url='http://www.ccgp-jiangsu.gov.cn/ggxx/yqzbgg/index_{}.html' #竞争谈判 self.jztbgg_url = 'http://www.ccgp-jiangsu.gov.cn/ggxx/jztbgg/index_{}.html' #竞争切磋 self.jzqsgg_url='http://www.ccgp-jiangsu.gov.cn/ggxx/jzqsgg/index_{}.html' #单一来源 self.dylygg_url='http://www.ccgp-jiangsu.gov.cn/ggxx/dylygg/index_{}.html' #询价公告 self.xjgg_url='http://www.ccgp-jiangsu.gov.cn/ggxx/xjgg/index_{}.html' #中标公告 self.zbgg_url ='http://www.ccgp-jiangsu.gov.cn/ggxx/zbgg/index_{}.html' # 成交公告 self.cgcjgg_url ='http://www.ccgp-jiangsu.gov.cn/ggxx/cgcjgg/index_{}.html' self.end_day = datetime.datetime.now().strftime('%Y-%m-%d') delta_day =3 self.begin_day = (datetime.datetime.now() - datetime.timedelta(days=delta_day)).strftime('%Y-%m-%d') self.wait_time = 2 self.headers = { 'Origin': 'http://deal.ggzy.gov.cn', 'Referer': 'http://deal.ggzy.gov.cn/ds/deal/dealList.jsp', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36' } self.tml_logger = init_tml_logger("bidding_jszfcg") self.bat_folder = get_project_settings().get('BAT_FOLDER') self.zip_folder = get_project_settings().get('ZIP_FOLDER') # 建立保存文件 if os.path.exists(os.path.join(get_project_settings().get('SAVE_DIGEST_FOLDER'))) == False: os.makedirs(os.path.join(get_project_settings().get('SAVE_DIGEST_FOLDER'))) if os.path.exists(self.bat_folder) == False: os.makedirs(self.bat_folder) if os.path.exists(self.zip_folder) == False: os.makedirs(self.zip_folder)
def __init__(self): self.Count_url = 'http://hndzzbtb.hndrc.gov.cn/services/hl/getCount?response=application/json&day=&sheng=x1&qu=&xian=&title=×tart=&timeend=&categorynum={}' self.List_url = 'http://hndzzbtb.hndrc.gov.cn/services/hl/getSelect?response=application/json&pageIndex={}&pageSize=22&day=&sheng=x1&qu=&xian=&title=×tart=&timeend=&categorynum={}' #招标公告 self.bidding_categorynum = '002001001' #采购公告 self.buy_categorynum = '002002001' #结果公告 self.end_categorynum = '002002003' # 采集几天内的数据,设置为1的时候表示采集当天 self.limit_days = 2 # 设置一个最大的页数 self.max_page = 50 self.end_day = datetime.datetime.now().strftime('%Y:%m:%d') self.delta_day = 90 # self.time_type = int(self.crawler_settings['limit_days']) # 0:今天, 1:近三天, 2:近一周, 3:近一月, 4:近三月, 5:近半年, 6:指定时间 self.begin_day = ( datetime.datetime.now() - datetime.timedelta(days=self.delta_day)).strftime('%Y:%m:%d') self.headers = { 'accept': "application/json, text/javascript, */*; q=0.01", 'referer': "http://hndzzbtb.hndrc.gov.cn/002/tradePublic.html", 'x-requested-with': "XMLHttpRequest", 'user-agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36", 'cache-control': "no-cache", } self.wait_time = 2 self.tml_logger = init_tml_logger("bidding") self.bat_folder = get_project_settings().get('BAT_FOLDER') self.zip_folder = get_project_settings().get('ZIP_FOLDER') # 建立保存文件 if os.path.exists( os.path.join(get_project_settings().get( 'SAVE_DIGEST_FOLDER'))) == False: os.makedirs( os.path.join(get_project_settings().get('SAVE_DIGEST_FOLDER'))) if os.path.exists(self.bat_folder) == False: os.makedirs(self.bat_folder) if os.path.exists(self.zip_folder) == False: os.makedirs(self.zip_folder)
def __init__(self): """""" #政府采购列表页 self.gov_url='http://www.lnwlzb.com/EpointWebBuilder_lngc/jyxxInfoAction.action?cmd=getInfolist&fbdate=&jyfrom=&ywtype=&xxtype=&title=&pageSize=10&pageIndex={}' # 采集几天内的数据,设置为1的时候表示采集当天 self.limit_days = 2 # 设置一个最大的页数 self.max_page = 50 self.headers = { 'pragma': "no-cache", 'accept-encoding': "gzip, deflate", 'accept-language': "zh-CN,zh;q=0.9", 'user-agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36", 'content-type': "application/json;charset=utf-8", 'accept': "application/json, text/javascript, */*; q=0.01", 'cache-control': "no-cache", 'x-requested-with': "XMLHttpRequest", 'cookie': "JSESSIONID=CC1DC68DD50AB48580A71BBFCC7360D5", 'connection': "keep-alive", 'referer': "http://www.lnwlzb.com/tradeinfo.html", } self.wait_time = 2 self.tml_logger = init_tml_logger("bidding") self.bat_folder = get_project_settings().get('BAT_FOLDER') self.zip_folder = get_project_settings().get('ZIP_FOLDER') #建立保存文件 if os.path.exists(os.path.join(get_project_settings().get('SAVE_DIGEST_FOLDER'))) == False: os.makedirs(os.path.join(get_project_settings().get('SAVE_DIGEST_FOLDER'))) if os.path.exists(self.bat_folder ) == False: os.makedirs(self.bat_folder ) if os.path.exists(self.zip_folder ) == False: os.makedirs(self.zip_folder )
def __init__(self): self.Count_url = 'http://hndzzbtb.hndrc.gov.cn/services/hl/getCount?response=application/json&day=&sheng=x1&qu=&xian=&title=×tart=&timeend=&categorynum={}' self.List_url = 'http://ztb.guizhou.gov.cn/api/trade/search?pubDate=all®ion=5200&industry=all&prjType=all¬iceType={}¬iceClassify=all&pageIndex={}&args=' #招标公告 self.bidding_categorynum = 'affiche' # 中标公告 self.win_categorynum = 'publicity' # 采集几天内的数据,设置为1的时候表示采集当天 self.limit_days = 2 # 设置一个最大的页数 self.max_page = 50 self.headers = { 'accept': "application/json, text/javascript, */*; q=0.01", 'referer': "http://hndzzbtb.hndrc.gov.cn/002/tradePublic.html", 'x-requested-with': "XMLHttpRequest", 'user-agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36", 'cache-control': "no-cache", } self.wait_time = 2 self.tml_logger = init_tml_logger("bidding_gzzb") self.bat_folder = get_project_settings().get('BAT_FOLDER') self.zip_folder = get_project_settings().get('ZIP_FOLDER') # 建立保存文件 if os.path.exists( os.path.join(get_project_settings().get( 'SAVE_DIGEST_FOLDER'))) == False: os.makedirs( os.path.join(get_project_settings().get('SAVE_DIGEST_FOLDER'))) if os.path.exists(self.bat_folder) == False: os.makedirs(self.bat_folder) if os.path.exists(self.zip_folder) == False: os.makedirs(self.zip_folder)