Ejemplo n.º 1
0
    def crawler_detail_page(self, auction_id, province_name, city_name,
                            type_name, auction_type):
        detail_url = 'http://www.chinesesfpm.com/index/index/info/biao_id/' + auction_id
        res = requests.get(detail_url)
        tree = etree.HTML(res.text)
        a = Auction(source=source, auction_type=auction_type)
        a.auction_id = auction_id
        a.auction_name = tree.xpath(
            '/html/body/div/div[6]/div/div[2]/div[1]/div[1]/text()')[0]
        a.html_type = type_name
        auction_time = tree.xpath(
            '/html/body/div/div[6]/div/div[2]/div[1]/div[2]/div[2]/div[2]/text()'
        )[0]
        auction_time_ = re.search('开始时间: (.*?)$', auction_time,
                                  re.S | re.M).group(1)
        a.auction_time = datetime.datetime.strptime(auction_time_,
                                                    "%Y年%m月%d日  %H时%M分%S秒")
        a.province = province_name
        a.city = city_name
        a.info = [
            tree.xpath('string(//*[@id="f4"])'),
            tree.xpath('string(//*[@id="f6"])')
        ]
        start_auction_price = \
            tree.xpath('/html/body/div/div[6]/div/div[2]/div[1]/div[2]/div[2]/div[5]/div[1]/em[3]/text()')[0]
        s = start_auction_price.encode('utf-8').decode()
        a.start_auction_price = float(
            re.search('起拍价: ¥(.*)', s, re.S | re.M).group(1))

        court = tree.xpath(
            '/html/body/div/div[6]/div/div[2]/div[1]/div[2]/div[2]/div[5]/div[2]/em[1]/text()'
        )[0]
        a.court = re.search('拍卖机构:(.*)', court, re.S | re.M).group(1)
        a.source_html = res.text
        a.insert_db()
Ejemplo n.º 2
0
 def parse(self, html):
     auction_list = html.xpath("//dl/dd/a/@href")
     for auction_url in auction_list:
         try:
             url = 'http://www.shjiapai.cn' + auction_url
             auction_res = requests.get(url, headers=self.headers)
             con = auction_res.text
             auction_id = re.search('id/(\d+).html', auction_url).group(1)
             if not check_auction(source=source, auction_id=auction_id):
                 auction = Auction(source=source, auction_type=auction_type)
                 auction.source_html = con
                 auction.auction_id = auction_id
                 auction.auction_name = re.search('楼盘名称.*?">(.*?)</td', con,
                                                  re.S | re.M).group(1)
                 auction.city = '上海'
                 auction.html_type = '房产'
                 auction.start_auction_price = re.search(
                     '预计售价.*?">(.*?)</td', con, re.S | re.M).group(1)
                 auction.floor = re.search('层.*?">(.*?)楼</td', con,
                                           re.S | re.M).group(1)
                 auction.area = re.search('户型面积.*?">(.*?)</td', con,
                                          re.S | re.M).group(1)
                 auction.build_type = re.search('物业类型.*?">(.*?)</td', con,
                                                re.S | re.M).group(1)
                 auction.info = re.search('其它.*?>(.*?)</div', con,
                                          re.S | re.M).group(1)
                 auction.insert_db()
             else:
                 log.info("数据已存在")
         except Exception as e:
             log.error("{}解析失败".format(auction_url))
Ejemplo n.º 3
0
 def get_list_info(self, url_page, html_type, auction_type):
     response = requests.get(url_page, headers=self.headers)
     html = response.text
     tree = etree.HTML(html)
     div_list = tree.xpath('//div[@class="sflistdiv"]')
     for i in div_list:
         info = []
         auction = Auction(source, auction_type)
         auction.province = '上海'
         auction.city = '上海'
         auction.html_type = html_type
         auction.source_html = html
         auction_id = i.xpath(
             'div[@class="sflistdivn2"]/div[@class="f20hei"]/a/@href'
         )[0].split('/')[-1]
         is_exist = coll.find_one({
             'auction_id': str(auction_id),
             'source': source
         })
         if is_exist:
             log.info('id已存在,id="{}"'.format(str(auction_id)))
             continue
         auction.auction_id = auction_id
         try:
             auction_name_ = i.xpath(
                 'div[@class="sflistdivn2"]/div[@class="f20hei"]/a/text()'
             )[0]
         except Exception as e:
             auction_name_ = ''
         region = i.xpath(
             'div[@class="sflistdivn2"]/div[@class="sflistban"]/text()')[0]
         auction.region = re.search(' - (.*?)$', region,
                                    re.S | re.M).group(1)
         auction_time_ = i.xpath(
             'div[@class="sflistdivn2"]/div[@class="sflisttime"]/text()')[0]
         address = i.xpath(
             'div[@class="sflistdivn2"]/div[@class="sflistcan"]/text()'
         )[3].encode().decode()
         auction.auction_name = auction_name_ + address
         try:
             auction_time = re.search('拍卖时间:(.*?)$', auction_time_,
                                      re.S | re.M).group(1)
             auction.auction_time = datetime.datetime.strptime(
                 auction_time, "%y.%m.%d")
         except Exception as e:
             auction.auction_time = None
         info.append(i.xpath('string(div[@class="sflistdivn2"])'))
         area_ = i.xpath(
             'div[@class="sflistdivn2"]/div[@class="sflistcan"]/span[1]/text()'
         )[0]
         auction.area = re.search('面积:(.*?)$', area_, re.S | re.M).group(1)
         floor = i.xpath(
             'div[@class="sflistdivn2"]/div[@class="sflistcan"]/span[3]/text()'
         )[0]
         auction.floor = re.search('楼层:(.*?)$', floor, re.S | re.M).group(1)
         start_auction_price = i.xpath('//div[@class="f34hong"]/text()')[0]
         auction.start_auction_price = float(
             re.search('(\d+),?(\d+)', start_auction_price,
                       re.S | re.M).group(1).replace(',', '')) * 10000
         auction.insert_db()
Ejemplo n.º 4
0
 def detail_parse(auction_res, auction_type, html_type, auction_id):
     con = auction_res.json()
     auction = Auction(source=source, auction_type=auction_type)
     auction.source_html = con
     auction.html_type = html_type
     auction.auction_id = auction_id
     auction.auction_name = con['object_title']
     auction.start_auction_price = con['start_price']
     auction.assess_value = con['appraise_price']
     auction.earnest_money = con['bond_price']
     auction.court = con['court_name']
     auction_time = con['start_time']
     location = con['location']
     auction.auction_time = datetime.datetime.strptime(
         auction_time, "%Y-%m-%d %H:%M:%S")
     province, city, region = location.split(' ')
     auction.province = province
     auction.city = city
     auction.region = region
     if html_type == '房产':
         auction.floor = con['detail']['house_floor']
         auction.area = con['detail']['gross_floor_area']
     elif html_type == '土地':
         auction.area = con['detail']['l_land_area']
     auction.insert_db()
Ejemplo n.º 5
0
 def start_crawler(self):
     for type_num in type_list:
         page_num = self.get_page(type_num.code)
         for page in range(1, int(page_num) + 1):
             url = 'http://auction.jd.com/getJudicatureList.html?page=' + str(
                 page) + '&limit=40&childrenCateId=' + type_num.code
             try:
                 response = s.get(url, headers=self.headers)
                 html = response.json()
                 try:
                     for info in html['ls']:
                         auction = Auction(
                             source=source,
                             auction_type=type_num.auction_type)
                         auction.html_type = type_num.html_type
                         auction.auction_name = info['title']  # 商品名
                         auction.assess_value = info[
                             'assessmentPrice']  # 评估值
                         try:
                             auction.province = info['province']  # 省
                             auction.city = info['city']  # 城市
                         except Exception as e:
                             auction.province = None
                             auction.city = None
                         auction.auction_time = datetime.datetime.fromtimestamp(
                             int(info['startTime']) / 1000)  # 评估值
                         auction.earnest_money = info['currentPrice']  # 保证金
                         auction.auction_id = str(info['id'])  # 商品id
                         is_exist = coll.find_one({
                             'auction_id':
                             str(info['id']),
                             'source':
                             source
                         })
                         if is_exist:
                             log.info('id已存在,id="{}"'.format(str(
                                 info['id'])))
                             continue
                         self.get_detail(str(info['id']), auction)
                 except Exception as e:
                     log.error('解析错误,url="{}"'.format(url))
             except Exception as e:
                 log.error('请求错误,url="{}"'.format(url))
Ejemplo n.º 6
0
    def get_detail_info(self, detail_url, region_name, city_name, province_name, id_, html_type, auction_type):
        aution = Auction(source, auction_type)
        try:
            info = []
            response = s.get(detail_url, headers=self.headers)
            html = response.text
            tree = etree.HTML(html)
            aution.region = region_name
            aution.auction_id = id_
            aution.city = city_name
            aution.html_type = html_type
            aution.source_html = html
            aution.province = province_name
            aution.auction_name = tree.xpath('//div[contains(@class,"pm-main clearfix")]/h1/text()')[0].strip()
            start_auction_price = tree.xpath('//*[@id="J_HoverShow"]/tr[1]/td[1]/span[2]/span/text()')[0] \
                .replace(',', '').replace(' ', '')
            aution.start_auction_price = float(start_auction_price)
            earnest_money = tree.xpath('//*[@id="J_HoverShow"]/tr[2]/td[1]/span[2]/span/text()')[0] \
                .replace(',', '').replace(' ', '')
            aution.earnest_money = float(earnest_money)
            try:
                assess_value = tree.xpath('//*[@id="J_HoverShow"]/tr[3]/td[1]/span[2]/span/text()')[0].replace(',', '')
                aution.assess_value = float(assess_value)
            except Exception:
                aution.assess_value = None
            aution.court = tree.xpath('//p[@class="subscribe-unit"]/span/a/text()')[0]
            aution.contacts = tree.xpath('//p[@class="subscribe-unit"]/span/em/text()')[0]
            aution.phone_number = tree.xpath('//p[@class="subscribe-unit"][2]/span[2]/text()')[1]
            info.append(tree.xpath('string(//*[@id="J_DetailTabMain"]/div[4])'))
            info.append(tree.xpath('string(//*[@id="J_DetailTabMain"]/div[5])'))
            aution.info = info
            logo = tree.xpath('//h1[@class="bid-fail"]/text()')
            if logo:
                if '撤回' in logo[0] or '以物抵债' in logo[0] or '中止' in logo[0] or '暂缓' in logo[0] \
                        or '撤拍' in logo[0] or '待确认' in logo[0]:
                    return

                elif '已结束' in logo[0]:
                    # 时间字符串
                    auction_time = tree.xpath('//span[@class="countdown J_TimeLeft"]/text()')[0]
                    aution.auction_time = datetime.datetime.strptime(auction_time, "%Y/%m/%d %H:%M:%S")
                else:
                    # 时间戳
                    auction_time = tree.xpath('//li[@id="sf-countdown"]/@data-start')[0]
                    aution.auction_time = datetime.datetime.fromtimestamp(int(auction_time) / 1000)
            else:
                # 时间戳
                auction_time = tree.xpath('//li[@id="sf-countdown"]/@data-start')[0]
                aution.auction_time = datetime.datetime.fromtimestamp(int(auction_time) / 1000)
            aution.insert_db()
        except Exception as e:
            log.error('解析错误,url="{}",e="{}"'.format(detail_url, e))
Ejemplo n.º 7
0
 def get_detail(self, url_real, city_name, auction_type, html_type,auction_id,province,region):
     response = requests.get(url_real, headers=self.headers)
     html = response.text
     if 'Status 500' in html or 'Error report' in html:
         log.info('请求错误,url="{}"'.format(url_real))
         return
     tree = etree.HTML(html)
     auction = Auction(source, auction_type)
     auction.html_type = html_type
     auction.province = province
     auction.city = city_name
     auction.region = region
     auction.source_html = html
     if auction_type == '住宅':
         self.house_detailparse(url_real, auction, tree,auction_id)
     else:
         self.other_parse(url_real, auction, tree,auction_id)
 def get_info(self, url):
     response = requests.get(url=url, headers=self.headers)
     html = etree.HTML(response.text)
     print(url)
     wrong_list = []
     try:
         wrong = html.xpath("//div[@class='dialog']/h1/text()")[0]
         wrong_list.append(wrong)
     except Exception as e:
         print(e)
     if "We're sorry, but something went wrong." not in wrong_list:
         title = html.xpath("//div[@class='title']/text()")[0]
         start_price = html.xpath(
             "//table[@class='item-attrs']//tr[1]/td[2]/text()")[0]
         assess_price = html.xpath(
             "//table[@class='item-attrs']//tr[1]/td[4]/text()")[0]
         ensure_price = html.xpath(
             "//table[@class='item-attrs']//tr[1]/td[6]/text()")[0]
         auction_id = re.search(
             "http://auction\.qdauction\.com/items/(\d+)", url).group(1)
         auction = Auction(source=source, auction_type=auction_type)
         auction.auction_name = title
         auction.start_auction_price = start_price
         auction.assess_value = assess_price
         auction.earnest_money = ensure_price
         auction.auction_id = auction_id
         try:
             time = html.xpath("//tr[@class='deal']/td[4]/text()")[0]
             Auction.auction_time = datetime.datetime.strptime(
                 time, "%Y-%m-%d %H:%M:%S")
         except Exception as e:
             print(e)
         auction.source_html = response.text
         auction.city = '青岛'
         auction.html_type = '其他'
         auction.insert_db()
Ejemplo n.º 9
0
    def get_detail(self, id_, auction_time, html_type, auction_type, province,
                   city, region):
        auction = Auction(source=source, auction_type=auction_type)
        auction.html_type = html_type
        auction.auction_type = auction_type
        auction.province = province
        auction.city = city
        auction.region = region
        detail_url = 'http://www1.rmfysszc.gov.cn/Handle/' + id_ + '.shtml'
        try:
            response = requests.get(detail_url, headers=self.headers)
            html = response.content.decode()
            auction.source_html = html
            info_list = []
            try:
                if 'GetRecord()' in html:
                    tree = etree.HTML(html)
                    auction.auction_name = tree.xpath(
                        '//div[@id="Title"]/h1/text()')[0]
                    start_auction_price = tree.xpath(
                        '//*[@id="price"]/div[1]/span/text()')[0]
                    auction.start_auction_price = self.get_float(
                        start_auction_price)
                    assess_value = tree.xpath(
                        '//*[@id="bg1"]/div[1]/table/tr[1]/td/span[2]/text()'
                    )[0]
                    try:
                        auction.assess_value = self.get_float(assess_value)
                    except Exception as e:
                        auction.assess_value = None
                    earnest_money = tree.xpath(
                        '//*[@id="bg1"]/div[1]/table/tr[2]/td/span[2]/text()'
                    )[0]
                    auction.earnest_money = self.get_float(earnest_money)
                    announcement_date = tree.xpath(
                        '//*[@id="bg1"]/div[1]/table/tr[3]/td/span/text()')[0]
                    announcement_date_ = re.search(': (.*?)$',
                                                   announcement_date,
                                                   re.S | re.M).group(1)
                    auction.announcement_date = datetime.datetime.strptime(
                        announcement_date_, "%Y.%m.%d")
                    auction_level = tree.xpath(
                        '//*[@id="bg1"]/div[1]/table/tr[4]/td/span/text()')[0]
                    auction.auction_level = re.search(': (.*?)$',
                                                      auction_level,
                                                      re.S | re.M).group(1)
                    court = tree.xpath(
                        '//*[@id="bg1"]/div[2]/table/tr[1]/td/span/text()')[0]
                    auction.court = re.search(': (.*?)$', court,
                                              re.S | re.M).group(1)
                    info_list.append(
                        tree.xpath(
                            'string(//*[@id="bdjs11"])').encode().decode())
                    info_list.append(
                        tree.xpath(
                            'string(//*[@id="jjjl"])').encode().decode())
                    contacts = tree.xpath(
                        '//*[@id="bg1"]/div[2]/table/tr[2]/td/span/text()')[0]
                    auction.contacts = re.search(': (.*?)$', contacts,
                                                 re.S | re.M).group(1)
                    phone_number = tree.xpath(
                        '//*[@id="bg1"]/div[2]/table/tr[3]/td/span/text()')[0]
                    auction.phone_number = re.search(': (.*?)$', phone_number,
                                                     re.S | re.M).group(1)
                    auction.info = info_list
                    try:
                        auction.build_type = tree.xpath(
                            '//*[@id="bdjs11"]/table[1]/tr[2]/td[4]/text()')[0]
                    except Exception as e:
                        auction.build_type = None
                    auction.auction_id = id_
                    auction.auction_time = self.get_date(date=auction_time)
                    auction.insert_db()
                elif 'bmnumber()' in html:
                    tree = etree.HTML(html)
                    auction.auction_name = tree.xpath(
                        '//div[@id="Title"]/h1/text()')[0]
                    start_auction_price = tree.xpath(
                        '//*[@id="price"]/div[1]/span/text()')[0]
                    auction.start_auction_price = self.get_float(
                        start_auction_price)
                    assess_value = tree.xpath(
                        '//*[@id="bg1"]/div[1]/table/tr[1]/td/span[2]/text()'
                    )[0]
                    auction.assess_value = self.get_float(assess_value)
                    earnest_money = tree.xpath(
                        '//*[@id="bg1"]/div[1]/table/tr[2]/td/span[2]/text()'
                    )[0]
                    auction.earnest_money = self.get_float(earnest_money)
                    announcement_date = tree.xpath(
                        '//*[@id="bg1"]/div[1]/table/tr[3]/td/span/text()')[0]
                    announcement_date_ = re.search(': (.*?)$',
                                                   announcement_date,
                                                   re.S | re.M).group(1)
                    auction.announcement_date = datetime.datetime.strptime(
                        announcement_date_, "%Y-%m-%d")
                    auction_level = tree.xpath(
                        '//*[@id="bg1"]/div[1]/table/tr[4]/td/span/text()')[0]
                    auction.auction_level = re.search(': (.*?)$',
                                                      auction_level,
                                                      re.S | re.M).group(1)
                    court = tree.xpath(
                        '//*[@id="bg1"]/div[2]/table/tr[1]/td/span/text()')[0]
                    auction.court = re.search(': (.*?)$', court,
                                              re.S | re.M).group(1)
                    info_list.append(
                        tree.xpath(
                            'string(//*[@id="bdjs"])').encode().decode())
                    contacts = tree.xpath(
                        '//*[@id="bg1"]/div[2]/table/tr[2]/td/span/text()')[0]
                    auction.contacts = re.search(': (.*?)$', contacts,
                                                 re.S | re.M).group(1)
                    phone_number = tree.xpath(
                        '//*[@id="bg1"]/div[2]/table/tr[3]/td/span/text()')[0]
                    auction.phone_number = re.search(': (.*?)$', phone_number,
                                                     re.S | re.M).group(1)
                    auction.info = info_list
                    try:
                        auction.build_type = tree.xpath(
                            '//*[@id="bdjs11"]/table[1]/tr[2]/td[4]/text()')[0]
                    except Exception as e:
                        auction.build_type = None
                    auction.auction_id = id_
                    auction.auction_time = self.get_date(date=auction_time)
                    auction.insert_db()
                else:
                    tree = etree.HTML(html)
                    auction.auction_name = tree.xpath(
                        '//*[@id="xmgg"]/div/div[1]/text()')[0]
                    assess_value = tree.xpath(
                        '/html/body/div[6]/table/tr/td/ul/li[3]/span/text()'
                    )[0]
                    auction.assess_value = self.get_float(assess_value)
                    announcement_date = tree.xpath(
                        '/html/body/div[6]/table/tr/td/ul/li[2]/span/text()'
                    )[0]
                    try:
                        auction.announcement_date = datetime.datetime.strptime(
                            announcement_date, "%Y-%m-%d")
                    except Exception as e:
                        auction.announcement_date = datetime.datetime.strptime(
                            announcement_date, "%Y/%m/%d")
                    auction.court = tree.xpath(
                        '/html/body/div[6]/table/tr/td/ul/li[1]/span/text()'
                    )[0]
                    info_list.append(
                        tree.xpath(
                            'string(//*[@id="bdxx"]/div)').encode().decode())
                    info_list.append(
                        tree.xpath('string(//*[@id="tjzl"]/div/div[2])').
                        encode().decode())
                    auction.contacts = tree.xpath(
                        '/html/body/div[6]/table/tr/td/ul/li[4]/span/text()'
                    )[0]
                    auction.phone_number = tree.xpath(
                        '/html/body/div[6]/table/tr/td/ul/li[5]/span/text()'
                    )[0]
                    auction.info = info_list
                    try:
                        auction.build_type = tree.xpath(
                            '//*[@id="bdxx"]/div/div[2]/table/tr[2]/td[3]/text()'
                        )[0]
                    except Exception as e:
                        auction.build_type = None
                    auction.auction_id = id_
                    auction.auction_time = self.get_date(date=auction_time)
                    auction.insert_db()
            except Exception as e:
                log.error('解析错误,url="{}",e="{}"'.format(detail_url, e))

        except Exception as e:
            log.error('详情页请求错误,url="{}",e="{}"'.format(detail_url, e))
Ejemplo n.º 10
0
 def get_detail(self, aution_url, aution_id, aution_time, region_name,
                city_name, html_type, auction_type):
     info = []
     aution = Auction(source, auction_type)
     response = requests.get(aution_url, headers=self.headers)
     try:
         html = response.text
         tree = etree.HTML(html)
         aution.auction_id = aution_id
         aution.region = region_name
         aution.city = city_name
         aution.source_html = html
         aution.html_type = html_type
         try:
             aution.start_auction_price = float(
                 tree.xpath('//*[@id="Price_Start"]/text()')[0].replace(
                     ',', ''))
         except Exception as e:
             aution.start_auction_price = None
         if 'item2' in aution_url:
             aution.auction_name = tree.xpath(
                 '//div[@class="d-m-title"]/b/text()')[0]
             aution.auction_level = tree.xpath(
                 '//div[@class="d-m-tb"]/table[1]/tr[1]/td[2]/text()')[0]
             try:
                 assess_value = tree.xpath(
                     '//div[@class="d-m-tb"]/table[1]/tr[4]/td[1]/text()'
                 )[0]
                 aution.assess_value = float(
                     re.search('(\d+),?(\d+)', assess_value,
                               re.S | re.M).group(1).replace(',', ''))
             except Exception as e:
                 aution.assess_value = None
             earnest_money = tree.xpath(
                 '//div[@class="d-m-tb"]/table[1]/tr[3]/td[2]/text()')[0]
             aution.earnest_money = float(
                 re.search('(\d+),?(\d+)', earnest_money,
                           re.S | re.M).group(1).replace(',', ''))
             court = tree.xpath('//td[@class="pr7"]/text()')[0]
             aution.court = re.search('法院:(.*?)$', court,
                                      re.S | re.M).group(1)
             aution.contacts = tree.xpath('//td[@valign="top"]/text()')[0]
             phone_number = tree.xpath('//td[@colspan="2"]/text()')[0]
             try:
                 aution.phone_number = re.search('联系电话:(.*?)$',
                                                 phone_number,
                                                 re.S | re.M).group(1)
             except Exception as e:
                 aution.phone_number = None
             info.append(
                 tree.xpath(
                     'string(//div[@class="panel-con"]/div[@class="d-block"][2])'
                 ))
             info.append(
                 tree.xpath(
                     'string(//div[@class="panel-con"]/div[@class="d-article d-article2"][3])'
                 ))
             aution.info = info
             if aution_time:
                 aution.auction_time = datetime.datetime.strptime(
                     aution_time, "%Y-%m-%d %H:%M:%S")
         else:
             aution.auction_name = tree.xpath(
                 '//div[@class="DivItemName"]/text()')[0]
             aution.auction_level = tree.xpath(
                 '/html/body/div[1]/div[7]/div[2]/div[1]/div[2]/div[4]/li[4]/text()'
             )[0]
             try:
                 assess_value = tree.xpath(
                     '/html/body/div[1]/div[7]/div[2]/div[1]/div[2]/div[4]/li[5]/text()'
                 )[0]
                 aution.assess_value = float(
                     re.search('(\d+),?(\d+)', assess_value,
                               re.S | re.M).group(1).replace(',', ''))
             except Exception as e:
                 aution.assess_value = None
             earnest_money = tree.xpath(
                 '/html/body/div[1]/div[7]/div[2]/div[1]/div[2]/div[4]/li[6]/text()'
             )[0]
             aution.earnest_money = float(
                 re.search('(\d+),?(\d+)', earnest_money,
                           re.S | re.M).group(1).replace(',', ''))
             court = tree.xpath(
                 '/html/body/div[1]/div[7]/div[2]/div[1]/div[2]/div[4]/li[8]/text()'
             )[0]
             aution.court = re.search('法院:(.*?)$', court,
                                      re.S | re.M).group(1)
             area = tree.xpath(
                 '/html/body/div[1]/div[7]/div[2]/div[1]/div[2]/div[4]/li[2]/text()'
             )[0]
             aution.area = float(
                 re.search('(\d+)\.(\d+)', area,
                           re.S | re.M).group(1).replace(',', ''))
             info.append(tree.xpath('string(//div[@id="Tab1"])'))
             info.append(
                 tree.xpath('string(//div[@class="bootstrap-table"])'))
             aution.info = info
             if aution_time:
                 aution.auction_time = datetime.datetime.strptime(
                     aution_time, "%Y-%m-%d %H:%M:%S")
         aution.insert_db()
     except Exception as e:
         log.error('解析错误,url="{}",e="{}"'.format(aution_url, e))
Ejemplo n.º 11
0
 def get_detail(self, source, auction_id, auction_status):
     response, url = self.request_url(auction_id)
     try:
         tree = etree.HTML(response.text)
         html = response.text
         auction = Auction(source=source)
         auction.url = url
         # 起拍价(万)
         startPrice = self.get_startPrice(html=html)
         startPrice = float(startPrice.replace(' ', '').replace(',', '')) / 10000
         auction.startPrice = startPrice
         # 评估价(万)
         evalPrice = self.get_evalPrice(tree=tree)
         evalPrice = float(evalPrice.replace(' ', '').replace('¥', '').replace(',', '')) / 10000
         auction.evalPrice = evalPrice
         # 保证金(万)
         bond = tree.xpath('//div[@id="content"]/div/div[2]/div[1]/div/div[2]/div[10]/ul[3]/li/span[2]/text()')[0]
         bond = bond.replace(' ', '').replace('¥', '').split('.')[0]
         bond = float(bond) / 10000
         auction.bond = bond
         # 拍卖阶段
         auctionStage_info = tree.xpath('//div[@id="content"]/div[1]/div[2]/div[1]/div[1]/div[2]/h1/text()')[0]
         auctionStage_info = auctionStage_info.replace(' ', '').replace('\n', '').replace('\t', '')
         auctionStage = auctionStage_info.split('】')[0].split('【')[1]
         auction.auctionStage = auctionStage
         # 拍卖次数
         auctionCount = self.get_auctionCount(auctionStage)
         auction.auctionCount = auctionCount
         # 拍卖物品名称
         auction_name = tree.xpath('//div[@id="content"]/div[1]/div[2]/div[1]/div[1]/div[2]/h1/text()')[0]
         auction_name = auction_name.replace(' ', '').replace('\n', '').replace('\t', '').split('】')[1]
         auction.auction_name = auction_name
         # todo 调用黄村粮的方法进行切割获取城市,区域,小区名,经纬度等
         cut_info = CutMatch.to_match('上海', auction_name)
         # 切割后匹配库中的城市
         auction.matchCity = cut_info['matchCity']
         # 切割后匹配库中的区域
         auction.matchRegion = cut_info['matchRegion']
         # 切割后匹配库中的小区名称
         auction.matchName = cut_info['matchName']
         # 切割后匹配库中的地址
         auction.matchAddress = cut_info['matchAddress']
         # 切割后的房号
         auction.roomNum = cut_info['cutRoomnum']
         # 切割后的楼号
         auction.houseNum = cut_info['cutHousenum']
         # 切割后的城市
         auction.cutCity = cut_info['cutCity']
         # 切割后的区域
         auction.cutRegion = cut_info['cutRegion']
         # 切割后的小区名称
         auction.cutName = cut_info['cutName']
         # 切割后的地址
         auction.cutAddress = cut_info['cutAddress']
         # 切割后跑高德接口得到的经纬度
         auction.lat = cut_info['mapLat']
         auction.lng = cut_info['mapLng']
         # 地址
         address = tree.xpath('//em[@id="paimaiAddress"]/text()')[0]
         auction.address = address
         # 城市
         city = address.split(' ')[0]
         auction.city = city
         # 区域
         region = address.split(' ')[1]
         auction.region = region
         skulid = re.search('id="skuId" value="(.*?)"', html, re.S | re.M).group(1)
         # 竞拍状态  当前价格  成交价格
         curPrice, dealPrice = self.get_curPrice_and_dealPrice(skulid, auction_id)
         auction.biddingState = auction_status
         auction.curPrice = curPrice
         auction.dealPrice = dealPrice
         # 起始时间
         startShootingDate = tree.xpath('//input[@id="startTime"]/@value')[0]
         startShootingDate = datetime.datetime.strptime(startShootingDate, '%Y-%m-%d %H:%M:%S.%f')
         auction.startShootingDate = startShootingDate
         # 结束时间
         endShootingDate = tree.xpath('//input[@id="endTime"]/@value')[0]
         endShootingDate = datetime.datetime.strptime(endShootingDate, '%Y-%m-%d %H:%M:%S.%f')
         auction.endShootingDate = endShootingDate
         # 图片
         houseImgUrls = []
         houseImgUrls_info = tree.xpath('//div[@id="spec-list"]/div/ul/li/img/@src')
         for houseImgUrl in houseImgUrls_info:
             houseImgUrl = 'http:' + houseImgUrl
             big_img = houseImgUrl.replace('jfs','s1000x750_jfs')
             new_houseImgUrl = qiniufetch(big_img, big_img)
             houseImgUrls.append(new_houseImgUrl)
         auction.houseImgUrls = houseImgUrls
         # 参与人数 浏览数量
         participantsNumber, visitCount = self.get_participantsNumber_and_visitCount(auction_id)
         auction.participantsNumber = participantsNumber
         auction.visitCount = visitCount
         # 拍卖物品id
         auction.auctionId = auction_id
         # 网站来源(jingdong)
         auction.source = source
         auction.update()
     except Exception as e:
         log.error('解析错误')