Ejemplo n.º 1
0
    def get_detail(self, id_, auction_time, html_type, auction_type, province,
                   city, region):
        auction = Auction(source=source, auction_type=auction_type)
        auction.html_type = html_type
        auction.auction_type = auction_type
        auction.province = province
        auction.city = city
        auction.region = region
        detail_url = 'http://www1.rmfysszc.gov.cn/Handle/' + id_ + '.shtml'
        try:
            response = requests.get(detail_url, headers=self.headers)
            html = response.content.decode()
            auction.source_html = html
            info_list = []
            try:
                if 'GetRecord()' in html:
                    tree = etree.HTML(html)
                    auction.auction_name = tree.xpath(
                        '//div[@id="Title"]/h1/text()')[0]
                    start_auction_price = tree.xpath(
                        '//*[@id="price"]/div[1]/span/text()')[0]
                    auction.start_auction_price = self.get_float(
                        start_auction_price)
                    assess_value = tree.xpath(
                        '//*[@id="bg1"]/div[1]/table/tr[1]/td/span[2]/text()'
                    )[0]
                    try:
                        auction.assess_value = self.get_float(assess_value)
                    except Exception as e:
                        auction.assess_value = None
                    earnest_money = tree.xpath(
                        '//*[@id="bg1"]/div[1]/table/tr[2]/td/span[2]/text()'
                    )[0]
                    auction.earnest_money = self.get_float(earnest_money)
                    announcement_date = tree.xpath(
                        '//*[@id="bg1"]/div[1]/table/tr[3]/td/span/text()')[0]
                    announcement_date_ = re.search(': (.*?)$',
                                                   announcement_date,
                                                   re.S | re.M).group(1)
                    auction.announcement_date = datetime.datetime.strptime(
                        announcement_date_, "%Y.%m.%d")
                    auction_level = tree.xpath(
                        '//*[@id="bg1"]/div[1]/table/tr[4]/td/span/text()')[0]
                    auction.auction_level = re.search(': (.*?)$',
                                                      auction_level,
                                                      re.S | re.M).group(1)
                    court = tree.xpath(
                        '//*[@id="bg1"]/div[2]/table/tr[1]/td/span/text()')[0]
                    auction.court = re.search(': (.*?)$', court,
                                              re.S | re.M).group(1)
                    info_list.append(
                        tree.xpath(
                            'string(//*[@id="bdjs11"])').encode().decode())
                    info_list.append(
                        tree.xpath(
                            'string(//*[@id="jjjl"])').encode().decode())
                    contacts = tree.xpath(
                        '//*[@id="bg1"]/div[2]/table/tr[2]/td/span/text()')[0]
                    auction.contacts = re.search(': (.*?)$', contacts,
                                                 re.S | re.M).group(1)
                    phone_number = tree.xpath(
                        '//*[@id="bg1"]/div[2]/table/tr[3]/td/span/text()')[0]
                    auction.phone_number = re.search(': (.*?)$', phone_number,
                                                     re.S | re.M).group(1)
                    auction.info = info_list
                    try:
                        auction.build_type = tree.xpath(
                            '//*[@id="bdjs11"]/table[1]/tr[2]/td[4]/text()')[0]
                    except Exception as e:
                        auction.build_type = None
                    auction.auction_id = id_
                    auction.auction_time = self.get_date(date=auction_time)
                    auction.insert_db()
                elif 'bmnumber()' in html:
                    tree = etree.HTML(html)
                    auction.auction_name = tree.xpath(
                        '//div[@id="Title"]/h1/text()')[0]
                    start_auction_price = tree.xpath(
                        '//*[@id="price"]/div[1]/span/text()')[0]
                    auction.start_auction_price = self.get_float(
                        start_auction_price)
                    assess_value = tree.xpath(
                        '//*[@id="bg1"]/div[1]/table/tr[1]/td/span[2]/text()'
                    )[0]
                    auction.assess_value = self.get_float(assess_value)
                    earnest_money = tree.xpath(
                        '//*[@id="bg1"]/div[1]/table/tr[2]/td/span[2]/text()'
                    )[0]
                    auction.earnest_money = self.get_float(earnest_money)
                    announcement_date = tree.xpath(
                        '//*[@id="bg1"]/div[1]/table/tr[3]/td/span/text()')[0]
                    announcement_date_ = re.search(': (.*?)$',
                                                   announcement_date,
                                                   re.S | re.M).group(1)
                    auction.announcement_date = datetime.datetime.strptime(
                        announcement_date_, "%Y-%m-%d")
                    auction_level = tree.xpath(
                        '//*[@id="bg1"]/div[1]/table/tr[4]/td/span/text()')[0]
                    auction.auction_level = re.search(': (.*?)$',
                                                      auction_level,
                                                      re.S | re.M).group(1)
                    court = tree.xpath(
                        '//*[@id="bg1"]/div[2]/table/tr[1]/td/span/text()')[0]
                    auction.court = re.search(': (.*?)$', court,
                                              re.S | re.M).group(1)
                    info_list.append(
                        tree.xpath(
                            'string(//*[@id="bdjs"])').encode().decode())
                    contacts = tree.xpath(
                        '//*[@id="bg1"]/div[2]/table/tr[2]/td/span/text()')[0]
                    auction.contacts = re.search(': (.*?)$', contacts,
                                                 re.S | re.M).group(1)
                    phone_number = tree.xpath(
                        '//*[@id="bg1"]/div[2]/table/tr[3]/td/span/text()')[0]
                    auction.phone_number = re.search(': (.*?)$', phone_number,
                                                     re.S | re.M).group(1)
                    auction.info = info_list
                    try:
                        auction.build_type = tree.xpath(
                            '//*[@id="bdjs11"]/table[1]/tr[2]/td[4]/text()')[0]
                    except Exception as e:
                        auction.build_type = None
                    auction.auction_id = id_
                    auction.auction_time = self.get_date(date=auction_time)
                    auction.insert_db()
                else:
                    tree = etree.HTML(html)
                    auction.auction_name = tree.xpath(
                        '//*[@id="xmgg"]/div/div[1]/text()')[0]
                    assess_value = tree.xpath(
                        '/html/body/div[6]/table/tr/td/ul/li[3]/span/text()'
                    )[0]
                    auction.assess_value = self.get_float(assess_value)
                    announcement_date = tree.xpath(
                        '/html/body/div[6]/table/tr/td/ul/li[2]/span/text()'
                    )[0]
                    try:
                        auction.announcement_date = datetime.datetime.strptime(
                            announcement_date, "%Y-%m-%d")
                    except Exception as e:
                        auction.announcement_date = datetime.datetime.strptime(
                            announcement_date, "%Y/%m/%d")
                    auction.court = tree.xpath(
                        '/html/body/div[6]/table/tr/td/ul/li[1]/span/text()'
                    )[0]
                    info_list.append(
                        tree.xpath(
                            'string(//*[@id="bdxx"]/div)').encode().decode())
                    info_list.append(
                        tree.xpath('string(//*[@id="tjzl"]/div/div[2])').
                        encode().decode())
                    auction.contacts = tree.xpath(
                        '/html/body/div[6]/table/tr/td/ul/li[4]/span/text()'
                    )[0]
                    auction.phone_number = tree.xpath(
                        '/html/body/div[6]/table/tr/td/ul/li[5]/span/text()'
                    )[0]
                    auction.info = info_list
                    try:
                        auction.build_type = tree.xpath(
                            '//*[@id="bdxx"]/div/div[2]/table/tr[2]/td[3]/text()'
                        )[0]
                    except Exception as e:
                        auction.build_type = None
                    auction.auction_id = id_
                    auction.auction_time = self.get_date(date=auction_time)
                    auction.insert_db()
            except Exception as e:
                log.error('解析错误,url="{}",e="{}"'.format(detail_url, e))

        except Exception as e:
            log.error('详情页请求错误,url="{}",e="{}"'.format(detail_url, e))
Ejemplo n.º 2
0
 def get_detail(self, aution_url, aution_id, aution_time, region_name,
                city_name, html_type, auction_type):
     info = []
     aution = Auction(source, auction_type)
     response = requests.get(aution_url, headers=self.headers)
     try:
         html = response.text
         tree = etree.HTML(html)
         aution.auction_id = aution_id
         aution.region = region_name
         aution.city = city_name
         aution.source_html = html
         aution.html_type = html_type
         try:
             aution.start_auction_price = float(
                 tree.xpath('//*[@id="Price_Start"]/text()')[0].replace(
                     ',', ''))
         except Exception as e:
             aution.start_auction_price = None
         if 'item2' in aution_url:
             aution.auction_name = tree.xpath(
                 '//div[@class="d-m-title"]/b/text()')[0]
             aution.auction_level = tree.xpath(
                 '//div[@class="d-m-tb"]/table[1]/tr[1]/td[2]/text()')[0]
             try:
                 assess_value = tree.xpath(
                     '//div[@class="d-m-tb"]/table[1]/tr[4]/td[1]/text()'
                 )[0]
                 aution.assess_value = float(
                     re.search('(\d+),?(\d+)', assess_value,
                               re.S | re.M).group(1).replace(',', ''))
             except Exception as e:
                 aution.assess_value = None
             earnest_money = tree.xpath(
                 '//div[@class="d-m-tb"]/table[1]/tr[3]/td[2]/text()')[0]
             aution.earnest_money = float(
                 re.search('(\d+),?(\d+)', earnest_money,
                           re.S | re.M).group(1).replace(',', ''))
             court = tree.xpath('//td[@class="pr7"]/text()')[0]
             aution.court = re.search('法院:(.*?)$', court,
                                      re.S | re.M).group(1)
             aution.contacts = tree.xpath('//td[@valign="top"]/text()')[0]
             phone_number = tree.xpath('//td[@colspan="2"]/text()')[0]
             try:
                 aution.phone_number = re.search('联系电话:(.*?)$',
                                                 phone_number,
                                                 re.S | re.M).group(1)
             except Exception as e:
                 aution.phone_number = None
             info.append(
                 tree.xpath(
                     'string(//div[@class="panel-con"]/div[@class="d-block"][2])'
                 ))
             info.append(
                 tree.xpath(
                     'string(//div[@class="panel-con"]/div[@class="d-article d-article2"][3])'
                 ))
             aution.info = info
             if aution_time:
                 aution.auction_time = datetime.datetime.strptime(
                     aution_time, "%Y-%m-%d %H:%M:%S")
         else:
             aution.auction_name = tree.xpath(
                 '//div[@class="DivItemName"]/text()')[0]
             aution.auction_level = tree.xpath(
                 '/html/body/div[1]/div[7]/div[2]/div[1]/div[2]/div[4]/li[4]/text()'
             )[0]
             try:
                 assess_value = tree.xpath(
                     '/html/body/div[1]/div[7]/div[2]/div[1]/div[2]/div[4]/li[5]/text()'
                 )[0]
                 aution.assess_value = float(
                     re.search('(\d+),?(\d+)', assess_value,
                               re.S | re.M).group(1).replace(',', ''))
             except Exception as e:
                 aution.assess_value = None
             earnest_money = tree.xpath(
                 '/html/body/div[1]/div[7]/div[2]/div[1]/div[2]/div[4]/li[6]/text()'
             )[0]
             aution.earnest_money = float(
                 re.search('(\d+),?(\d+)', earnest_money,
                           re.S | re.M).group(1).replace(',', ''))
             court = tree.xpath(
                 '/html/body/div[1]/div[7]/div[2]/div[1]/div[2]/div[4]/li[8]/text()'
             )[0]
             aution.court = re.search('法院:(.*?)$', court,
                                      re.S | re.M).group(1)
             area = tree.xpath(
                 '/html/body/div[1]/div[7]/div[2]/div[1]/div[2]/div[4]/li[2]/text()'
             )[0]
             aution.area = float(
                 re.search('(\d+)\.(\d+)', area,
                           re.S | re.M).group(1).replace(',', ''))
             info.append(tree.xpath('string(//div[@id="Tab1"])'))
             info.append(
                 tree.xpath('string(//div[@class="bootstrap-table"])'))
             aution.info = info
             if aution_time:
                 aution.auction_time = datetime.datetime.strptime(
                     aution_time, "%Y-%m-%d %H:%M:%S")
         aution.insert_db()
     except Exception as e:
         log.error('解析错误,url="{}",e="{}"'.format(aution_url, e))