def get_detail_info(self, detail_url, region_name, city_name, province_name, id_, html_type, auction_type): aution = Auction(source, auction_type) try: info = [] response = s.get(detail_url, headers=self.headers) html = response.text tree = etree.HTML(html) aution.region = region_name aution.auction_id = id_ aution.city = city_name aution.html_type = html_type aution.source_html = html aution.province = province_name aution.auction_name = tree.xpath('//div[contains(@class,"pm-main clearfix")]/h1/text()')[0].strip() start_auction_price = tree.xpath('//*[@id="J_HoverShow"]/tr[1]/td[1]/span[2]/span/text()')[0] \ .replace(',', '').replace(' ', '') aution.start_auction_price = float(start_auction_price) earnest_money = tree.xpath('//*[@id="J_HoverShow"]/tr[2]/td[1]/span[2]/span/text()')[0] \ .replace(',', '').replace(' ', '') aution.earnest_money = float(earnest_money) try: assess_value = tree.xpath('//*[@id="J_HoverShow"]/tr[3]/td[1]/span[2]/span/text()')[0].replace(',', '') aution.assess_value = float(assess_value) except Exception: aution.assess_value = None aution.court = tree.xpath('//p[@class="subscribe-unit"]/span/a/text()')[0] aution.contacts = tree.xpath('//p[@class="subscribe-unit"]/span/em/text()')[0] aution.phone_number = tree.xpath('//p[@class="subscribe-unit"][2]/span[2]/text()')[1] info.append(tree.xpath('string(//*[@id="J_DetailTabMain"]/div[4])')) info.append(tree.xpath('string(//*[@id="J_DetailTabMain"]/div[5])')) aution.info = info logo = tree.xpath('//h1[@class="bid-fail"]/text()') if logo: if '撤回' in logo[0] or '以物抵债' in logo[0] or '中止' in logo[0] or '暂缓' in logo[0] \ or '撤拍' in logo[0] or '待确认' in logo[0]: return elif '已结束' in logo[0]: # 时间字符串 auction_time = tree.xpath('//span[@class="countdown J_TimeLeft"]/text()')[0] aution.auction_time = datetime.datetime.strptime(auction_time, "%Y/%m/%d %H:%M:%S") else: # 时间戳 auction_time = tree.xpath('//li[@id="sf-countdown"]/@data-start')[0] aution.auction_time = datetime.datetime.fromtimestamp(int(auction_time) / 1000) else: # 时间戳 auction_time = tree.xpath('//li[@id="sf-countdown"]/@data-start')[0] aution.auction_time = datetime.datetime.fromtimestamp(int(auction_time) / 1000) aution.insert_db() except Exception as e: log.error('解析错误,url="{}",e="{}"'.format(detail_url, e))
def get_detail(self, id_, auction_time, html_type, auction_type, province, city, region): auction = Auction(source=source, auction_type=auction_type) auction.html_type = html_type auction.auction_type = auction_type auction.province = province auction.city = city auction.region = region detail_url = 'http://www1.rmfysszc.gov.cn/Handle/' + id_ + '.shtml' try: response = requests.get(detail_url, headers=self.headers) html = response.content.decode() auction.source_html = html info_list = [] try: if 'GetRecord()' in html: tree = etree.HTML(html) auction.auction_name = tree.xpath( '//div[@id="Title"]/h1/text()')[0] start_auction_price = tree.xpath( '//*[@id="price"]/div[1]/span/text()')[0] auction.start_auction_price = self.get_float( start_auction_price) assess_value = tree.xpath( '//*[@id="bg1"]/div[1]/table/tr[1]/td/span[2]/text()' )[0] try: auction.assess_value = self.get_float(assess_value) except Exception as e: auction.assess_value = None earnest_money = tree.xpath( '//*[@id="bg1"]/div[1]/table/tr[2]/td/span[2]/text()' )[0] auction.earnest_money = self.get_float(earnest_money) announcement_date = tree.xpath( '//*[@id="bg1"]/div[1]/table/tr[3]/td/span/text()')[0] announcement_date_ = re.search(': (.*?)$', announcement_date, re.S | re.M).group(1) auction.announcement_date = datetime.datetime.strptime( announcement_date_, "%Y.%m.%d") auction_level = tree.xpath( '//*[@id="bg1"]/div[1]/table/tr[4]/td/span/text()')[0] auction.auction_level = re.search(': (.*?)$', auction_level, re.S | re.M).group(1) court = tree.xpath( '//*[@id="bg1"]/div[2]/table/tr[1]/td/span/text()')[0] auction.court = re.search(': (.*?)$', court, re.S | re.M).group(1) info_list.append( tree.xpath( 'string(//*[@id="bdjs11"])').encode().decode()) info_list.append( tree.xpath( 'string(//*[@id="jjjl"])').encode().decode()) contacts = tree.xpath( '//*[@id="bg1"]/div[2]/table/tr[2]/td/span/text()')[0] auction.contacts = re.search(': (.*?)$', contacts, re.S | re.M).group(1) phone_number = tree.xpath( '//*[@id="bg1"]/div[2]/table/tr[3]/td/span/text()')[0] auction.phone_number = re.search(': (.*?)$', phone_number, re.S | re.M).group(1) auction.info = info_list try: auction.build_type = tree.xpath( '//*[@id="bdjs11"]/table[1]/tr[2]/td[4]/text()')[0] except Exception as e: auction.build_type = None auction.auction_id = id_ auction.auction_time = self.get_date(date=auction_time) auction.insert_db() elif 'bmnumber()' in html: tree = etree.HTML(html) auction.auction_name = tree.xpath( '//div[@id="Title"]/h1/text()')[0] start_auction_price = tree.xpath( '//*[@id="price"]/div[1]/span/text()')[0] auction.start_auction_price = self.get_float( start_auction_price) assess_value = tree.xpath( '//*[@id="bg1"]/div[1]/table/tr[1]/td/span[2]/text()' )[0] auction.assess_value = self.get_float(assess_value) earnest_money = tree.xpath( '//*[@id="bg1"]/div[1]/table/tr[2]/td/span[2]/text()' )[0] auction.earnest_money = self.get_float(earnest_money) announcement_date = tree.xpath( '//*[@id="bg1"]/div[1]/table/tr[3]/td/span/text()')[0] announcement_date_ = re.search(': (.*?)$', announcement_date, re.S | re.M).group(1) auction.announcement_date = datetime.datetime.strptime( announcement_date_, "%Y-%m-%d") auction_level = tree.xpath( '//*[@id="bg1"]/div[1]/table/tr[4]/td/span/text()')[0] auction.auction_level = re.search(': (.*?)$', auction_level, re.S | re.M).group(1) court = tree.xpath( '//*[@id="bg1"]/div[2]/table/tr[1]/td/span/text()')[0] auction.court = re.search(': (.*?)$', court, re.S | re.M).group(1) info_list.append( tree.xpath( 'string(//*[@id="bdjs"])').encode().decode()) contacts = tree.xpath( '//*[@id="bg1"]/div[2]/table/tr[2]/td/span/text()')[0] auction.contacts = re.search(': (.*?)$', contacts, re.S | re.M).group(1) phone_number = tree.xpath( '//*[@id="bg1"]/div[2]/table/tr[3]/td/span/text()')[0] auction.phone_number = re.search(': (.*?)$', phone_number, re.S | re.M).group(1) auction.info = info_list try: auction.build_type = tree.xpath( '//*[@id="bdjs11"]/table[1]/tr[2]/td[4]/text()')[0] except Exception as e: auction.build_type = None auction.auction_id = id_ auction.auction_time = self.get_date(date=auction_time) auction.insert_db() else: tree = etree.HTML(html) auction.auction_name = tree.xpath( '//*[@id="xmgg"]/div/div[1]/text()')[0] assess_value = tree.xpath( '/html/body/div[6]/table/tr/td/ul/li[3]/span/text()' )[0] auction.assess_value = self.get_float(assess_value) announcement_date = tree.xpath( '/html/body/div[6]/table/tr/td/ul/li[2]/span/text()' )[0] try: auction.announcement_date = datetime.datetime.strptime( announcement_date, "%Y-%m-%d") except Exception as e: auction.announcement_date = datetime.datetime.strptime( announcement_date, "%Y/%m/%d") auction.court = tree.xpath( '/html/body/div[6]/table/tr/td/ul/li[1]/span/text()' )[0] info_list.append( tree.xpath( 'string(//*[@id="bdxx"]/div)').encode().decode()) info_list.append( tree.xpath('string(//*[@id="tjzl"]/div/div[2])'). encode().decode()) auction.contacts = tree.xpath( '/html/body/div[6]/table/tr/td/ul/li[4]/span/text()' )[0] auction.phone_number = tree.xpath( '/html/body/div[6]/table/tr/td/ul/li[5]/span/text()' )[0] auction.info = info_list try: auction.build_type = tree.xpath( '//*[@id="bdxx"]/div/div[2]/table/tr[2]/td[3]/text()' )[0] except Exception as e: auction.build_type = None auction.auction_id = id_ auction.auction_time = self.get_date(date=auction_time) auction.insert_db() except Exception as e: log.error('解析错误,url="{}",e="{}"'.format(detail_url, e)) except Exception as e: log.error('详情页请求错误,url="{}",e="{}"'.format(detail_url, e))
def get_detail(self, aution_url, aution_id, aution_time, region_name, city_name, html_type, auction_type): info = [] aution = Auction(source, auction_type) response = requests.get(aution_url, headers=self.headers) try: html = response.text tree = etree.HTML(html) aution.auction_id = aution_id aution.region = region_name aution.city = city_name aution.source_html = html aution.html_type = html_type try: aution.start_auction_price = float( tree.xpath('//*[@id="Price_Start"]/text()')[0].replace( ',', '')) except Exception as e: aution.start_auction_price = None if 'item2' in aution_url: aution.auction_name = tree.xpath( '//div[@class="d-m-title"]/b/text()')[0] aution.auction_level = tree.xpath( '//div[@class="d-m-tb"]/table[1]/tr[1]/td[2]/text()')[0] try: assess_value = tree.xpath( '//div[@class="d-m-tb"]/table[1]/tr[4]/td[1]/text()' )[0] aution.assess_value = float( re.search('(\d+),?(\d+)', assess_value, re.S | re.M).group(1).replace(',', '')) except Exception as e: aution.assess_value = None earnest_money = tree.xpath( '//div[@class="d-m-tb"]/table[1]/tr[3]/td[2]/text()')[0] aution.earnest_money = float( re.search('(\d+),?(\d+)', earnest_money, re.S | re.M).group(1).replace(',', '')) court = tree.xpath('//td[@class="pr7"]/text()')[0] aution.court = re.search('法院:(.*?)$', court, re.S | re.M).group(1) aution.contacts = tree.xpath('//td[@valign="top"]/text()')[0] phone_number = tree.xpath('//td[@colspan="2"]/text()')[0] try: aution.phone_number = re.search('联系电话:(.*?)$', phone_number, re.S | re.M).group(1) except Exception as e: aution.phone_number = None info.append( tree.xpath( 'string(//div[@class="panel-con"]/div[@class="d-block"][2])' )) info.append( tree.xpath( 'string(//div[@class="panel-con"]/div[@class="d-article d-article2"][3])' )) aution.info = info if aution_time: aution.auction_time = datetime.datetime.strptime( aution_time, "%Y-%m-%d %H:%M:%S") else: aution.auction_name = tree.xpath( '//div[@class="DivItemName"]/text()')[0] aution.auction_level = tree.xpath( '/html/body/div[1]/div[7]/div[2]/div[1]/div[2]/div[4]/li[4]/text()' )[0] try: assess_value = tree.xpath( '/html/body/div[1]/div[7]/div[2]/div[1]/div[2]/div[4]/li[5]/text()' )[0] aution.assess_value = float( re.search('(\d+),?(\d+)', assess_value, re.S | re.M).group(1).replace(',', '')) except Exception as e: aution.assess_value = None earnest_money = tree.xpath( '/html/body/div[1]/div[7]/div[2]/div[1]/div[2]/div[4]/li[6]/text()' )[0] aution.earnest_money = float( re.search('(\d+),?(\d+)', earnest_money, re.S | re.M).group(1).replace(',', '')) court = tree.xpath( '/html/body/div[1]/div[7]/div[2]/div[1]/div[2]/div[4]/li[8]/text()' )[0] aution.court = re.search('法院:(.*?)$', court, re.S | re.M).group(1) area = tree.xpath( '/html/body/div[1]/div[7]/div[2]/div[1]/div[2]/div[4]/li[2]/text()' )[0] aution.area = float( re.search('(\d+)\.(\d+)', area, re.S | re.M).group(1).replace(',', '')) info.append(tree.xpath('string(//div[@id="Tab1"])')) info.append( tree.xpath('string(//div[@class="bootstrap-table"])')) aution.info = info if aution_time: aution.auction_time = datetime.datetime.strptime( aution_time, "%Y-%m-%d %H:%M:%S") aution.insert_db() except Exception as e: log.error('解析错误,url="{}",e="{}"'.format(aution_url, e))