def extract_zbr(province, html, text, lines, table_line, table): result = [] text_list = ['招标人', '招标单位'] get_value_by_split_maohao2(lines, text_list,result) texts=['建设单位','招标单位'] r = get_value_by_next_td2(html, texts,result) res = get_finally_result(result) return res
def extract_jsgm(province, html, text, lines, table_line, table): result=[] # try: # result1 = re.findall( # r'(招标范围及内容|规模|拟建规模|内容和规模|招标项目简介:|建设概况:|工程规模:|项目概况:|工程内容及规模:|项目规模:|建设规模.|建设内容及规模|建设内容:)(.*?)(2[.]2|2[.]3|2[.]4|2[.]5|招标范围|建设地点)', # text, re.S)[0][1] # result1 = re.sub(' ', '', result1).strip() # except: # pass get_value_by_split_maohao(lines, '规模',result) get_value_by_next_td(html,'规模',result) res=get_finally_result(result) return res
def extract_kbdd(province, html, text, lines, table_line, table): result = [] # try: # result1 = \ # re.findall(r'(资格审查地点:|开标地点:?|地点为:?|网上递交网址为|送达:|网址|开标方式:)(.*?)()|\d+\.|。|,|\s)', text, re.S)[0][1] # result1 = re.sub(' ', '', result1).strip().replace(':', '') # if result1: result.append(result1) # except: # pass get_value_by_split_maohao(lines, '开标地点',result) get_value_by_next_td(html, '开标地点',result) res = get_finally_result(result) return res
def extract_jhgq(province, html, text, lines, table_line, table): result = [] get_value_by_split_maohao(lines, '工期',result) result3 = get_data_from_table(table, '工期') if result3: result.append(result3[0]) res = get_finally_result(result) if res: try: res = int(re.findall('\d+', res)[0]) except: pass return res
def extract_xmbh(province, html, text, lines, table_line, table): result = [] if province == 'bt': try: res = get_data_from_table(table, '标段编号')[0] if res:result.append(res) except: pass pattern = r'(标段|项目|招标).*?编号:?([0-9a-zA-Z_-]+)' res = re.findall(pattern, text) if res: result.append(res[0][1]) get_value_by_next_td(html, '工程编号',result) bt_get_value_by_next_td(html, '项目编号',result) get_value_by_split_maohao(lines,'项目编号',result) res = get_finally_result(result) return res
def extract_xmmc(province, html, text, lines, table_line, table): result = [] # 项目及标段名称 texts = ['项目名称', '标段名称'] r1 = get_value_by_split_maohao2(lines, texts) if ':' not in r1: result.append(r1) get_value_by_table_line2(table_line, texts,result) get_value_by_next_td(html, '项目名称',result) get_value_by_next_td(html, '工程名称',result) try: result1 = \ re.findall(r'(项目名称:|标段名称:)(.*?)(\s|,|。)', text, re.S)[0][1].strip() if len(result1) < 50: result1 = re.sub(' ', '', result1).replace('为', ' ').replace(':', ' ').strip() result.append(result1) except: pass res = get_finally_result(result) return res
def extract_zbdl(province, html, text, lines, table_line, table): result = [] try: result1 = \ re.findall(r'(招标代理机构是:?|招?标?代理机?构?单?位?:|招标代理:|招标代理为)(.*?)(\s|,|。|地|统一|\s$|地.{0.4}址)', text, re.S)[0][1].strip() if len(result1) < 50: result1 = re.sub(' ', '', result1).replace('为', ' ').replace(':', ' ').strip() if result1:result.append(result1) except: pass texts = ['招标代理单位', '代理单位', '代理机构', '招标代理'] result2 = get_value_by_split_maohao2(lines, texts) if result2: if ':' not in result2: result.append(result2) texts = ['招标代理单位', '代理单位', '代理机构', '招标代理'] get_value_by_next_td2(lines, texts,result) res = get_finally_result(result) return res
def extract_gsq(province, html, text, lines, table_line, table): result = [] try: r = re.findall('公示期为(.*?)(,|,|。)', text)[0][0] if r: result.append(r) except: pass try: gsq_start = get_value_by_next_td(html, '公示开始时间') gsq_end = get_value_by_next_td(html, '公示截止时间') if not gsq_end: gsq_end = get_value_by_next_td(html, '公示结束时间') if gsq_start: gsq = gsq_start + '-' + gsq_end result.append(gsq) except: pass texts=['公示时间','公示日期'] get_value_by_split_maohao2(lines, texts,result) texts=['公示时间',] get_value_by_next_td(lines, texts,result) res = get_finally_result(result) return res