コード例 #1
0
 def detail_parse(self, **kwargs):
     auction = Auction(source='ali')
     auction.auction_name = kwargs['auction_name']
     auction.auctionId = kwargs['auction_id']
     auction.biddingState = kwargs['status']
     auction.curPrice = kwargs['current_price']
     auction.evalPrice = kwargs['evalprice']
     auction.startShootingDate = kwargs['start_time']
     auction.endShootingDate = kwargs['end_time']
     auction.participantsNumber = kwargs['partnumber']
     auction.visitCount = kwargs['visitCount']
     detail_url = kwargs['url']
     auction.url = detail_url
     try:
         # todo 调用黄村粮的方法进行切割获取城市,区域,小区名,经纬度等
         cut_info = CutMatch.to_match('上海', kwargs['auction_name'])
         # 切割后匹配库中的城市
         auction.matchCity = cut_info['matchCity']
         # 切割后匹配库中的区域
         auction.matchRegion = cut_info['matchRegion']
         # 切割后匹配库中的小区名称
         auction.matchName = cut_info['matchName']
         # 切割后匹配库中的地址
         auction.matchAddress = cut_info['matchAddress']
         # 切割后的房号
         auction.roomNum = cut_info['cutRoomnum']
         # 切割后的楼号
         auction.houseNum = cut_info['cutHousenum']
         # 切割后的城市
         auction.cutCity = cut_info['cutCity']
         # 切割后的区域
         auction.cutRegion = cut_info['cutRegion']
         # 切割后的小区名称
         auction.cutName = cut_info['cutName']
         # 切割后的地址
         auction.cutAddress = cut_info['cutAddress']
         # 切割后跑高德接口得到的经纬度
         auction.lat = cut_info['mapLat']
         auction.lng = cut_info['mapLng']
     except Exception as e:
         log.error(e)
         return
     try:
         detail_res = requests.get(url=detail_url, headers=self.headers)
     except Exception as e:
         log.error('url={}, e={}'.format(detail_url, e))
         return
     html = etree.HTML(detail_res.content.decode('gbk'))
     try:
         title = html.xpath(
             '//div[contains(@class,"pm-main clearfix")]/h1/text()'
         )[0].strip()
     except:
         log.error('没有标题 url={}'.format(detail_url))
         return
     auctionStage = re.search('【(.*?)】', title).group(1)
     auction.auctionStage = auctionStage
     auction.auctionCount = self.get_auctionCount(auctionStage)
     startPrice = re.search('起拍价¥(.*?) ,',
                            detail_res.content.decode('gbk')).group(1)
     bond = re.search('保 证 金.*?J_Price">(.*?)</span',
                      detail_res.content.decode('gbk'),
                      re.S | re.M).group(1)
     comm_url = 'http://sf.taobao.com/json/getGovItemSummary.htm?itemId={}'.format(
         kwargs['auction_id'])
     res = requests.get(comm_url, headers=self.headers)
     try:
         auction.area = float(
             int(res.json()['props']['area']['value']) / 100)
     except:
         pass
     images = html.xpath("//div[@class='pm-pic pm-s80 ']/a/img/@src")
     image_list = []
     for image_url in images:
         big_img = image_url.replace('_80x80.jpg', '')
         image = qiniufetch(big_img, big_img)
         image_list.append(image)
     auction.houseImgUrls = image_list
     auction.startPrice = float(float(startPrice.replace(',', '')) / 10000)
     auction.bond = float(float(bond.replace(',', '').strip()) / 10000)
     if kwargs['status'] == '已成交':
         if re.search(
                 '失败|流拍',
                 html.xpath('//h1[@class="bid-fail"]/text()')[0]) is None:
             auction.update()
         else:
             auction.biddingState = '流拍'
             auction.update()
     else:
         auction.update()
コード例 #2
0
 def get_detail(self, source, auction_id, auction_status):
     response, url = self.request_url(auction_id)
     try:
         tree = etree.HTML(response.text)
         html = response.text
         auction = Auction(source=source)
         auction.url = url
         # 起拍价(万)
         startPrice = self.get_startPrice(html=html)
         startPrice = float(startPrice.replace(' ', '').replace(',', '')) / 10000
         auction.startPrice = startPrice
         # 评估价(万)
         evalPrice = self.get_evalPrice(tree=tree)
         evalPrice = float(evalPrice.replace(' ', '').replace('¥', '').replace(',', '')) / 10000
         auction.evalPrice = evalPrice
         # 保证金(万)
         bond = tree.xpath('//div[@id="content"]/div/div[2]/div[1]/div/div[2]/div[10]/ul[3]/li/span[2]/text()')[0]
         bond = bond.replace(' ', '').replace('¥', '').split('.')[0]
         bond = float(bond) / 10000
         auction.bond = bond
         # 拍卖阶段
         auctionStage_info = tree.xpath('//div[@id="content"]/div[1]/div[2]/div[1]/div[1]/div[2]/h1/text()')[0]
         auctionStage_info = auctionStage_info.replace(' ', '').replace('\n', '').replace('\t', '')
         auctionStage = auctionStage_info.split('】')[0].split('【')[1]
         auction.auctionStage = auctionStage
         # 拍卖次数
         auctionCount = self.get_auctionCount(auctionStage)
         auction.auctionCount = auctionCount
         # 拍卖物品名称
         auction_name = tree.xpath('//div[@id="content"]/div[1]/div[2]/div[1]/div[1]/div[2]/h1/text()')[0]
         auction_name = auction_name.replace(' ', '').replace('\n', '').replace('\t', '').split('】')[1]
         auction.auction_name = auction_name
         # todo 调用黄村粮的方法进行切割获取城市,区域,小区名,经纬度等
         cut_info = CutMatch.to_match('上海', auction_name)
         # 切割后匹配库中的城市
         auction.matchCity = cut_info['matchCity']
         # 切割后匹配库中的区域
         auction.matchRegion = cut_info['matchRegion']
         # 切割后匹配库中的小区名称
         auction.matchName = cut_info['matchName']
         # 切割后匹配库中的地址
         auction.matchAddress = cut_info['matchAddress']
         # 切割后的房号
         auction.roomNum = cut_info['cutRoomnum']
         # 切割后的楼号
         auction.houseNum = cut_info['cutHousenum']
         # 切割后的城市
         auction.cutCity = cut_info['cutCity']
         # 切割后的区域
         auction.cutRegion = cut_info['cutRegion']
         # 切割后的小区名称
         auction.cutName = cut_info['cutName']
         # 切割后的地址
         auction.cutAddress = cut_info['cutAddress']
         # 切割后跑高德接口得到的经纬度
         auction.lat = cut_info['mapLat']
         auction.lng = cut_info['mapLng']
         # 地址
         address = tree.xpath('//em[@id="paimaiAddress"]/text()')[0]
         auction.address = address
         # 城市
         city = address.split(' ')[0]
         auction.city = city
         # 区域
         region = address.split(' ')[1]
         auction.region = region
         skulid = re.search('id="skuId" value="(.*?)"', html, re.S | re.M).group(1)
         # 竞拍状态  当前价格  成交价格
         curPrice, dealPrice = self.get_curPrice_and_dealPrice(skulid, auction_id)
         auction.biddingState = auction_status
         auction.curPrice = curPrice
         auction.dealPrice = dealPrice
         # 起始时间
         startShootingDate = tree.xpath('//input[@id="startTime"]/@value')[0]
         startShootingDate = datetime.datetime.strptime(startShootingDate, '%Y-%m-%d %H:%M:%S.%f')
         auction.startShootingDate = startShootingDate
         # 结束时间
         endShootingDate = tree.xpath('//input[@id="endTime"]/@value')[0]
         endShootingDate = datetime.datetime.strptime(endShootingDate, '%Y-%m-%d %H:%M:%S.%f')
         auction.endShootingDate = endShootingDate
         # 图片
         houseImgUrls = []
         houseImgUrls_info = tree.xpath('//div[@id="spec-list"]/div/ul/li/img/@src')
         for houseImgUrl in houseImgUrls_info:
             houseImgUrl = 'http:' + houseImgUrl
             big_img = houseImgUrl.replace('jfs','s1000x750_jfs')
             new_houseImgUrl = qiniufetch(big_img, big_img)
             houseImgUrls.append(new_houseImgUrl)
         auction.houseImgUrls = houseImgUrls
         # 参与人数 浏览数量
         participantsNumber, visitCount = self.get_participantsNumber_and_visitCount(auction_id)
         auction.participantsNumber = participantsNumber
         auction.visitCount = visitCount
         # 拍卖物品id
         auction.auctionId = auction_id
         # 网站来源(jingdong)
         auction.source = source
         auction.update()
     except Exception as e:
         log.error('解析错误')