Ejemplo n.º 1
0
 def detail_parse(auction_res, auction_type, html_type, auction_id):
     con = auction_res.json()
     auction = Auction(source=source, auction_type=auction_type)
     auction.source_html = con
     auction.html_type = html_type
     auction.auction_id = auction_id
     auction.auction_name = con['object_title']
     auction.start_auction_price = con['start_price']
     auction.assess_value = con['appraise_price']
     auction.earnest_money = con['bond_price']
     auction.court = con['court_name']
     auction_time = con['start_time']
     location = con['location']
     auction.auction_time = datetime.datetime.strptime(
         auction_time, "%Y-%m-%d %H:%M:%S")
     province, city, region = location.split(' ')
     auction.province = province
     auction.city = city
     auction.region = region
     if html_type == '房产':
         auction.floor = con['detail']['house_floor']
         auction.area = con['detail']['gross_floor_area']
     elif html_type == '土地':
         auction.area = con['detail']['l_land_area']
     auction.insert_db()
Ejemplo n.º 2
0
 def parse(self, html):
     auction_list = html.xpath("//dl/dd/a/@href")
     for auction_url in auction_list:
         try:
             url = 'http://www.shjiapai.cn' + auction_url
             auction_res = requests.get(url, headers=self.headers)
             con = auction_res.text
             auction_id = re.search('id/(\d+).html', auction_url).group(1)
             if not check_auction(source=source, auction_id=auction_id):
                 auction = Auction(source=source, auction_type=auction_type)
                 auction.source_html = con
                 auction.auction_id = auction_id
                 auction.auction_name = re.search('楼盘名称.*?">(.*?)</td', con,
                                                  re.S | re.M).group(1)
                 auction.city = '上海'
                 auction.html_type = '房产'
                 auction.start_auction_price = re.search(
                     '预计售价.*?">(.*?)</td', con, re.S | re.M).group(1)
                 auction.floor = re.search('层.*?">(.*?)楼</td', con,
                                           re.S | re.M).group(1)
                 auction.area = re.search('户型面积.*?">(.*?)</td', con,
                                          re.S | re.M).group(1)
                 auction.build_type = re.search('物业类型.*?">(.*?)</td', con,
                                                re.S | re.M).group(1)
                 auction.info = re.search('其它.*?>(.*?)</div', con,
                                          re.S | re.M).group(1)
                 auction.insert_db()
             else:
                 log.info("数据已存在")
         except Exception as e:
             log.error("{}解析失败".format(auction_url))
Ejemplo n.º 3
0
 def get_list_info(self, url_page, html_type, auction_type):
     response = requests.get(url_page, headers=self.headers)
     html = response.text
     tree = etree.HTML(html)
     div_list = tree.xpath('//div[@class="sflistdiv"]')
     for i in div_list:
         info = []
         auction = Auction(source, auction_type)
         auction.province = '上海'
         auction.city = '上海'
         auction.html_type = html_type
         auction.source_html = html
         auction_id = i.xpath(
             'div[@class="sflistdivn2"]/div[@class="f20hei"]/a/@href'
         )[0].split('/')[-1]
         is_exist = coll.find_one({
             'auction_id': str(auction_id),
             'source': source
         })
         if is_exist:
             log.info('id已存在,id="{}"'.format(str(auction_id)))
             continue
         auction.auction_id = auction_id
         try:
             auction_name_ = i.xpath(
                 'div[@class="sflistdivn2"]/div[@class="f20hei"]/a/text()'
             )[0]
         except Exception as e:
             auction_name_ = ''
         region = i.xpath(
             'div[@class="sflistdivn2"]/div[@class="sflistban"]/text()')[0]
         auction.region = re.search(' - (.*?)$', region,
                                    re.S | re.M).group(1)
         auction_time_ = i.xpath(
             'div[@class="sflistdivn2"]/div[@class="sflisttime"]/text()')[0]
         address = i.xpath(
             'div[@class="sflistdivn2"]/div[@class="sflistcan"]/text()'
         )[3].encode().decode()
         auction.auction_name = auction_name_ + address
         try:
             auction_time = re.search('拍卖时间:(.*?)$', auction_time_,
                                      re.S | re.M).group(1)
             auction.auction_time = datetime.datetime.strptime(
                 auction_time, "%y.%m.%d")
         except Exception as e:
             auction.auction_time = None
         info.append(i.xpath('string(div[@class="sflistdivn2"])'))
         area_ = i.xpath(
             'div[@class="sflistdivn2"]/div[@class="sflistcan"]/span[1]/text()'
         )[0]
         auction.area = re.search('面积:(.*?)$', area_, re.S | re.M).group(1)
         floor = i.xpath(
             'div[@class="sflistdivn2"]/div[@class="sflistcan"]/span[3]/text()'
         )[0]
         auction.floor = re.search('楼层:(.*?)$', floor, re.S | re.M).group(1)
         start_auction_price = i.xpath('//div[@class="f34hong"]/text()')[0]
         auction.start_auction_price = float(
             re.search('(\d+),?(\d+)', start_auction_price,
                       re.S | re.M).group(1).replace(',', '')) * 10000
         auction.insert_db()
Ejemplo n.º 4
0
 def get_detail(self, aution_url, aution_id, aution_time, region_name,
                city_name, html_type, auction_type):
     info = []
     aution = Auction(source, auction_type)
     response = requests.get(aution_url, headers=self.headers)
     try:
         html = response.text
         tree = etree.HTML(html)
         aution.auction_id = aution_id
         aution.region = region_name
         aution.city = city_name
         aution.source_html = html
         aution.html_type = html_type
         try:
             aution.start_auction_price = float(
                 tree.xpath('//*[@id="Price_Start"]/text()')[0].replace(
                     ',', ''))
         except Exception as e:
             aution.start_auction_price = None
         if 'item2' in aution_url:
             aution.auction_name = tree.xpath(
                 '//div[@class="d-m-title"]/b/text()')[0]
             aution.auction_level = tree.xpath(
                 '//div[@class="d-m-tb"]/table[1]/tr[1]/td[2]/text()')[0]
             try:
                 assess_value = tree.xpath(
                     '//div[@class="d-m-tb"]/table[1]/tr[4]/td[1]/text()'
                 )[0]
                 aution.assess_value = float(
                     re.search('(\d+),?(\d+)', assess_value,
                               re.S | re.M).group(1).replace(',', ''))
             except Exception as e:
                 aution.assess_value = None
             earnest_money = tree.xpath(
                 '//div[@class="d-m-tb"]/table[1]/tr[3]/td[2]/text()')[0]
             aution.earnest_money = float(
                 re.search('(\d+),?(\d+)', earnest_money,
                           re.S | re.M).group(1).replace(',', ''))
             court = tree.xpath('//td[@class="pr7"]/text()')[0]
             aution.court = re.search('法院:(.*?)$', court,
                                      re.S | re.M).group(1)
             aution.contacts = tree.xpath('//td[@valign="top"]/text()')[0]
             phone_number = tree.xpath('//td[@colspan="2"]/text()')[0]
             try:
                 aution.phone_number = re.search('联系电话:(.*?)$',
                                                 phone_number,
                                                 re.S | re.M).group(1)
             except Exception as e:
                 aution.phone_number = None
             info.append(
                 tree.xpath(
                     'string(//div[@class="panel-con"]/div[@class="d-block"][2])'
                 ))
             info.append(
                 tree.xpath(
                     'string(//div[@class="panel-con"]/div[@class="d-article d-article2"][3])'
                 ))
             aution.info = info
             if aution_time:
                 aution.auction_time = datetime.datetime.strptime(
                     aution_time, "%Y-%m-%d %H:%M:%S")
         else:
             aution.auction_name = tree.xpath(
                 '//div[@class="DivItemName"]/text()')[0]
             aution.auction_level = tree.xpath(
                 '/html/body/div[1]/div[7]/div[2]/div[1]/div[2]/div[4]/li[4]/text()'
             )[0]
             try:
                 assess_value = tree.xpath(
                     '/html/body/div[1]/div[7]/div[2]/div[1]/div[2]/div[4]/li[5]/text()'
                 )[0]
                 aution.assess_value = float(
                     re.search('(\d+),?(\d+)', assess_value,
                               re.S | re.M).group(1).replace(',', ''))
             except Exception as e:
                 aution.assess_value = None
             earnest_money = tree.xpath(
                 '/html/body/div[1]/div[7]/div[2]/div[1]/div[2]/div[4]/li[6]/text()'
             )[0]
             aution.earnest_money = float(
                 re.search('(\d+),?(\d+)', earnest_money,
                           re.S | re.M).group(1).replace(',', ''))
             court = tree.xpath(
                 '/html/body/div[1]/div[7]/div[2]/div[1]/div[2]/div[4]/li[8]/text()'
             )[0]
             aution.court = re.search('法院:(.*?)$', court,
                                      re.S | re.M).group(1)
             area = tree.xpath(
                 '/html/body/div[1]/div[7]/div[2]/div[1]/div[2]/div[4]/li[2]/text()'
             )[0]
             aution.area = float(
                 re.search('(\d+)\.(\d+)', area,
                           re.S | re.M).group(1).replace(',', ''))
             info.append(tree.xpath('string(//div[@id="Tab1"])'))
             info.append(
                 tree.xpath('string(//div[@class="bootstrap-table"])'))
             aution.info = info
             if aution_time:
                 aution.auction_time = datetime.datetime.strptime(
                     aution_time, "%Y-%m-%d %H:%M:%S")
         aution.insert_db()
     except Exception as e:
         log.error('解析错误,url="{}",e="{}"'.format(aution_url, e))
Ejemplo n.º 5
0
 def detail_parse(self, **kwargs):
     auction = Auction(source='ali')
     auction.auction_name = kwargs['auction_name']
     auction.auctionId = kwargs['auction_id']
     auction.biddingState = kwargs['status']
     auction.curPrice = kwargs['current_price']
     auction.evalPrice = kwargs['evalprice']
     auction.startShootingDate = kwargs['start_time']
     auction.endShootingDate = kwargs['end_time']
     auction.participantsNumber = kwargs['partnumber']
     auction.visitCount = kwargs['visitCount']
     detail_url = kwargs['url']
     auction.url = detail_url
     try:
         # todo 调用黄村粮的方法进行切割获取城市,区域,小区名,经纬度等
         cut_info = CutMatch.to_match('上海', kwargs['auction_name'])
         # 切割后匹配库中的城市
         auction.matchCity = cut_info['matchCity']
         # 切割后匹配库中的区域
         auction.matchRegion = cut_info['matchRegion']
         # 切割后匹配库中的小区名称
         auction.matchName = cut_info['matchName']
         # 切割后匹配库中的地址
         auction.matchAddress = cut_info['matchAddress']
         # 切割后的房号
         auction.roomNum = cut_info['cutRoomnum']
         # 切割后的楼号
         auction.houseNum = cut_info['cutHousenum']
         # 切割后的城市
         auction.cutCity = cut_info['cutCity']
         # 切割后的区域
         auction.cutRegion = cut_info['cutRegion']
         # 切割后的小区名称
         auction.cutName = cut_info['cutName']
         # 切割后的地址
         auction.cutAddress = cut_info['cutAddress']
         # 切割后跑高德接口得到的经纬度
         auction.lat = cut_info['mapLat']
         auction.lng = cut_info['mapLng']
     except Exception as e:
         log.error(e)
         return
     try:
         detail_res = requests.get(url=detail_url, headers=self.headers)
     except Exception as e:
         log.error('url={}, e={}'.format(detail_url, e))
         return
     html = etree.HTML(detail_res.content.decode('gbk'))
     try:
         title = html.xpath(
             '//div[contains(@class,"pm-main clearfix")]/h1/text()'
         )[0].strip()
     except:
         log.error('没有标题 url={}'.format(detail_url))
         return
     auctionStage = re.search('【(.*?)】', title).group(1)
     auction.auctionStage = auctionStage
     auction.auctionCount = self.get_auctionCount(auctionStage)
     startPrice = re.search('起拍价¥(.*?) ,',
                            detail_res.content.decode('gbk')).group(1)
     bond = re.search('保 证 金.*?J_Price">(.*?)</span',
                      detail_res.content.decode('gbk'),
                      re.S | re.M).group(1)
     comm_url = 'http://sf.taobao.com/json/getGovItemSummary.htm?itemId={}'.format(
         kwargs['auction_id'])
     res = requests.get(comm_url, headers=self.headers)
     try:
         auction.area = float(
             int(res.json()['props']['area']['value']) / 100)
     except:
         pass
     images = html.xpath("//div[@class='pm-pic pm-s80 ']/a/img/@src")
     image_list = []
     for image_url in images:
         big_img = image_url.replace('_80x80.jpg', '')
         image = qiniufetch(big_img, big_img)
         image_list.append(image)
     auction.houseImgUrls = image_list
     auction.startPrice = float(float(startPrice.replace(',', '')) / 10000)
     auction.bond = float(float(bond.replace(',', '').strip()) / 10000)
     if kwargs['status'] == '已成交':
         if re.search(
                 '失败|流拍',
                 html.xpath('//h1[@class="bid-fail"]/text()')[0]) is None:
             auction.update()
         else:
             auction.biddingState = '流拍'
             auction.update()
     else:
         auction.update()