def extract_kbdd(province, html, text, lines, table_line, table): result = [] # try: # result1 = \ # re.findall(r'(资格审查地点:|开标地点:?|地点为:?|网上递交网址为|送达:|网址|开标方式:)(.*?)()|\d+\.|。|,|\s)', text, re.S)[0][1] # result1 = re.sub(' ', '', result1).strip().replace(':', '') # if result1: result.append(result1) # except: # pass get_value_by_split_maohao(lines, '开标地点',result) get_value_by_next_td(html, '开标地点',result) res = get_finally_result(result) return res
def extract_jsgm(province, html, text, lines, table_line, table): result=[] # try: # result1 = re.findall( # r'(招标范围及内容|规模|拟建规模|内容和规模|招标项目简介:|建设概况:|工程规模:|项目概况:|工程内容及规模:|项目规模:|建设规模.|建设内容及规模|建设内容:)(.*?)(2[.]2|2[.]3|2[.]4|2[.]5|招标范围|建设地点)', # text, re.S)[0][1] # result1 = re.sub(' ', '', result1).strip() # except: # pass get_value_by_split_maohao(lines, '规模',result) get_value_by_next_td(html,'规模',result) res=get_finally_result(result) return res
def extract_xmbh(province, html, text, lines, table_line, table): result = [] if province == 'bt': try: res = get_data_from_table(table, '标段编号')[0] if res:result.append(res) except: pass pattern = r'(标段|项目|招标).*?编号:?([0-9a-zA-Z_-]+)' res = re.findall(pattern, text) if res: result.append(res[0][1]) get_value_by_next_td(html, '工程编号',result) bt_get_value_by_next_td(html, '项目编号',result) get_value_by_split_maohao(lines,'项目编号',result) res = get_finally_result(result) return res
def ln_jyw_zbjggs(province, html, text, lines, table_line, table): res = {} # txts = ['项目名称', '工程名称', '标段名称'] # res['xmmc'] = get_value_by_next_td2(html, txts) res['xmbh'] = get_value_by_next_td(html, '标段编号') res['type'] = get_value_by_next_td(html, '工程类别') res['zbdl'] = get_value_by_next_td(html, '代理机构') res['zbr'] = get_value_by_next_td(html, '建设单位') kbsj = get_value_by_next_td(html, '开标日期') res['kbsj'] = deal_date_str(kbsj) # res['jhgq'] = get_value_by_next_td(html, '有效工期') res['gsmc'] = get_value_by_next_td(html, '中标单位') res['xmyz'] = get_value_by_next_td(html, '建设单位') res['htjg'] = get_value_by_next_td(html, '中标价') res['name'] = get_value_by_next_td(html, '项目负责人姓名') res['jb'] = get_value_by_next_td(html, '中标负责人级别') return res
def extract_xmmc(province, html, text, lines, table_line, table): result = [] # 项目及标段名称 texts = ['项目名称', '标段名称'] r1 = get_value_by_split_maohao2(lines, texts) if ':' not in r1: result.append(r1) get_value_by_table_line2(table_line, texts,result) get_value_by_next_td(html, '项目名称',result) get_value_by_next_td(html, '工程名称',result) try: result1 = \ re.findall(r'(项目名称:|标段名称:)(.*?)(\s|,|。)', text, re.S)[0][1].strip() if len(result1) < 50: result1 = re.sub(' ', '', result1).replace('为', ' ').replace(':', ' ').strip() result.append(result1) except: pass res = get_finally_result(result) return res
def bt_jyw_zbjggg(province, html, text, lines, table_line, table): res = {} res['xmbh'] = bt_get_value_by_next_td(html, '项目编号') res['type'] = bt_get_value_by_next_td(html, '项目类别') res['zbr'] = bt_get_value_by_next_td(html, '招标人') # res['xmmc'] = bt_get_value_by_next_td(html, '项目名称') company = get_data_from_table(table, '中标单位') if len(company) == 1: company = company[0] res['company'] = company res['gsmc'] = company # jhgq = get_data_from_table(table, '工期') # res['jhgq'] = jhgq if jhgq else None res['name'] = get_data_from_table(table, '项目经理') gsq_start = get_value_by_next_td(html, '公示开始时间') gsq_end = get_value_by_next_td(html, '公示结束时间') res['gsq'] = gsq_start + '-' + gsq_end if gsq_start else None return res
def extract_gsq(province, html, text, lines, table_line, table): result = [] try: r = re.findall('公示期为(.*?)(,|,|。)', text)[0][0] if r: result.append(r) except: pass try: gsq_start = get_value_by_next_td(html, '公示开始时间') gsq_end = get_value_by_next_td(html, '公示截止时间') if not gsq_end: gsq_end = get_value_by_next_td(html, '公示结束时间') if gsq_start: gsq = gsq_start + '-' + gsq_end result.append(gsq) except: pass texts=['公示时间','公示日期'] get_value_by_split_maohao2(lines, texts,result) texts=['公示时间',] get_value_by_next_td(lines, texts,result) res = get_finally_result(result) return res
def extract_kbdd(province, html, text, lines, table_line, table): result1, result2, result3 = '', '', '' # try: # result1 = \ # re.findall(r'(资格审查地点:|开标地点:?|地点为:?|网上递交网址为|送达:|开标方式:)(.*?)()|\d+\.|。|,|\s)', text, re.S)[0][1] # result1 = re.sub(' ', '', result1).strip().replace(':', '') # except: # pass result2 = get_value_by_split_maohao(lines, '开标地点') if not result2: result2 = get_value_by_next_td(html, '开标地点') if result2: return result2 else: return result1
def extract_kbsj(province, html, text, lines, table_line, table): res = '' result1, result2, result3 = None, None, None try: result1 = \ re.findall(r'(开标时间:?|时间为:?)(.*?)()|\d+\.|。|,|\s)', text, re.S)[0][1] result1 = result1.strip().replace(':', '') except: try: result1 = re.findall('于(.*?)进行开标', text)[0] except: pass result2 = get_value_by_next_td(html, '开标时间') if not result2: result2 = get_value_by_split_maohao(lines, '开标时间') if not res: if result2: res = result2 else: res = result1 res = deal_date_str(res) return res
def ln_jyw_zbhxr(province, html, text, lines, table_line, table): res = {} # res['xmmc'] = get_value_by_next_td(html, '工程名称') res['type'] = get_value_by_next_td(html, '工程类别') res['xmbh'] = get_value_by_next_td(html, '编号') res['zbr'] = get_value_by_next_td(html, '建设单位') res['gsmc'] = get_data_from_table(table, '单位名称') # jhgq = get_data_from_table(table, '工期') # if not jhgq: # jhgq = None # res['jhgq'] = jhgq res['tbbj'] = get_data_from_table(table, '报价') res['name'] = get_data_from_table(table, '项目负责人') text_list = ['注册资格', '证书'] res['zsmc'] = get_data_from_table2(table, text_list) res['zsbh'] = get_data_from_table(table, '证书编号') gsq_start = get_value_by_next_td(html, '公示开始时间') gsq_end = get_value_by_next_td(html, '公示截止时间') if not gsq_end: gsq_end = get_value_by_next_td(html, '公示结束时间') res['gsq'] = gsq_start + '-' + gsq_end return res
def extract_ssd(province, html, text, lines, table_line, table): texts = ['招标项目所在地区', '建设地点'] res = get_value_by_split_maohao2(lines, texts) if not res: res = get_value_by_next_td(html, '建设地点') return res
def extract_tb_jzrq(province, html, text, lines, table_line, table): res = get_value_by_split_maohao(lines, '投标截止') if not res: res = get_value_by_next_td(html, '投标截止') res = deal_date_str(res) return res
def extract_type(province, html, text, lines, table_line, table): res = get_value_by_next_td(html, '工程类型') if not res: res = bt_get_value_by_next_td(html, '项目类别') return res