def detail_parse(self, **kwargs): auction = Auction(source='ali') auction.auction_name = kwargs['auction_name'] auction.auctionId = kwargs['auction_id'] auction.biddingState = kwargs['status'] auction.curPrice = kwargs['current_price'] auction.evalPrice = kwargs['evalprice'] auction.startShootingDate = kwargs['start_time'] auction.endShootingDate = kwargs['end_time'] auction.participantsNumber = kwargs['partnumber'] auction.visitCount = kwargs['visitCount'] detail_url = kwargs['url'] auction.url = detail_url try: # todo 调用黄村粮的方法进行切割获取城市,区域,小区名,经纬度等 cut_info = CutMatch.to_match('上海', kwargs['auction_name']) # 切割后匹配库中的城市 auction.matchCity = cut_info['matchCity'] # 切割后匹配库中的区域 auction.matchRegion = cut_info['matchRegion'] # 切割后匹配库中的小区名称 auction.matchName = cut_info['matchName'] # 切割后匹配库中的地址 auction.matchAddress = cut_info['matchAddress'] # 切割后的房号 auction.roomNum = cut_info['cutRoomnum'] # 切割后的楼号 auction.houseNum = cut_info['cutHousenum'] # 切割后的城市 auction.cutCity = cut_info['cutCity'] # 切割后的区域 auction.cutRegion = cut_info['cutRegion'] # 切割后的小区名称 auction.cutName = cut_info['cutName'] # 切割后的地址 auction.cutAddress = cut_info['cutAddress'] # 切割后跑高德接口得到的经纬度 auction.lat = cut_info['mapLat'] auction.lng = cut_info['mapLng'] except Exception as e: log.error(e) return try: detail_res = requests.get(url=detail_url, headers=self.headers) except Exception as e: log.error('url={}, e={}'.format(detail_url, e)) return html = etree.HTML(detail_res.content.decode('gbk')) try: title = html.xpath( '//div[contains(@class,"pm-main clearfix")]/h1/text()' )[0].strip() except: log.error('没有标题 url={}'.format(detail_url)) return auctionStage = re.search('【(.*?)】', title).group(1) auction.auctionStage = auctionStage auction.auctionCount = self.get_auctionCount(auctionStage) startPrice = re.search('起拍价¥(.*?) ,', detail_res.content.decode('gbk')).group(1) bond = re.search('保 证 金.*?J_Price">(.*?)</span', detail_res.content.decode('gbk'), re.S | re.M).group(1) comm_url = 'http://sf.taobao.com/json/getGovItemSummary.htm?itemId={}'.format( kwargs['auction_id']) res = requests.get(comm_url, headers=self.headers) try: auction.area = float( int(res.json()['props']['area']['value']) / 100) except: pass images = html.xpath("//div[@class='pm-pic pm-s80 ']/a/img/@src") image_list = [] for image_url in images: big_img = image_url.replace('_80x80.jpg', '') image = qiniufetch(big_img, big_img) image_list.append(image) auction.houseImgUrls = image_list auction.startPrice = float(float(startPrice.replace(',', '')) / 10000) auction.bond = float(float(bond.replace(',', '').strip()) / 10000) if kwargs['status'] == '已成交': if re.search( '失败|流拍', html.xpath('//h1[@class="bid-fail"]/text()')[0]) is None: auction.update() else: auction.biddingState = '流拍' auction.update() else: auction.update()
def get_detail(self, source, auction_id, auction_status): response, url = self.request_url(auction_id) try: tree = etree.HTML(response.text) html = response.text auction = Auction(source=source) auction.url = url # 起拍价(万) startPrice = self.get_startPrice(html=html) startPrice = float(startPrice.replace(' ', '').replace(',', '')) / 10000 auction.startPrice = startPrice # 评估价(万) evalPrice = self.get_evalPrice(tree=tree) evalPrice = float(evalPrice.replace(' ', '').replace('¥', '').replace(',', '')) / 10000 auction.evalPrice = evalPrice # 保证金(万) bond = tree.xpath('//div[@id="content"]/div/div[2]/div[1]/div/div[2]/div[10]/ul[3]/li/span[2]/text()')[0] bond = bond.replace(' ', '').replace('¥', '').split('.')[0] bond = float(bond) / 10000 auction.bond = bond # 拍卖阶段 auctionStage_info = tree.xpath('//div[@id="content"]/div[1]/div[2]/div[1]/div[1]/div[2]/h1/text()')[0] auctionStage_info = auctionStage_info.replace(' ', '').replace('\n', '').replace('\t', '') auctionStage = auctionStage_info.split('】')[0].split('【')[1] auction.auctionStage = auctionStage # 拍卖次数 auctionCount = self.get_auctionCount(auctionStage) auction.auctionCount = auctionCount # 拍卖物品名称 auction_name = tree.xpath('//div[@id="content"]/div[1]/div[2]/div[1]/div[1]/div[2]/h1/text()')[0] auction_name = auction_name.replace(' ', '').replace('\n', '').replace('\t', '').split('】')[1] auction.auction_name = auction_name # todo 调用黄村粮的方法进行切割获取城市,区域,小区名,经纬度等 cut_info = CutMatch.to_match('上海', auction_name) # 切割后匹配库中的城市 auction.matchCity = cut_info['matchCity'] # 切割后匹配库中的区域 auction.matchRegion = cut_info['matchRegion'] # 切割后匹配库中的小区名称 auction.matchName = cut_info['matchName'] # 切割后匹配库中的地址 auction.matchAddress = cut_info['matchAddress'] # 切割后的房号 auction.roomNum = cut_info['cutRoomnum'] # 切割后的楼号 auction.houseNum = cut_info['cutHousenum'] # 切割后的城市 auction.cutCity = cut_info['cutCity'] # 切割后的区域 auction.cutRegion = cut_info['cutRegion'] # 切割后的小区名称 auction.cutName = cut_info['cutName'] # 切割后的地址 auction.cutAddress = cut_info['cutAddress'] # 切割后跑高德接口得到的经纬度 auction.lat = cut_info['mapLat'] auction.lng = cut_info['mapLng'] # 地址 address = tree.xpath('//em[@id="paimaiAddress"]/text()')[0] auction.address = address # 城市 city = address.split(' ')[0] auction.city = city # 区域 region = address.split(' ')[1] auction.region = region skulid = re.search('id="skuId" value="(.*?)"', html, re.S | re.M).group(1) # 竞拍状态 当前价格 成交价格 curPrice, dealPrice = self.get_curPrice_and_dealPrice(skulid, auction_id) auction.biddingState = auction_status auction.curPrice = curPrice auction.dealPrice = dealPrice # 起始时间 startShootingDate = tree.xpath('//input[@id="startTime"]/@value')[0] startShootingDate = datetime.datetime.strptime(startShootingDate, '%Y-%m-%d %H:%M:%S.%f') auction.startShootingDate = startShootingDate # 结束时间 endShootingDate = tree.xpath('//input[@id="endTime"]/@value')[0] endShootingDate = datetime.datetime.strptime(endShootingDate, '%Y-%m-%d %H:%M:%S.%f') auction.endShootingDate = endShootingDate # 图片 houseImgUrls = [] houseImgUrls_info = tree.xpath('//div[@id="spec-list"]/div/ul/li/img/@src') for houseImgUrl in houseImgUrls_info: houseImgUrl = 'http:' + houseImgUrl big_img = houseImgUrl.replace('jfs','s1000x750_jfs') new_houseImgUrl = qiniufetch(big_img, big_img) houseImgUrls.append(new_houseImgUrl) auction.houseImgUrls = houseImgUrls # 参与人数 浏览数量 participantsNumber, visitCount = self.get_participantsNumber_and_visitCount(auction_id) auction.participantsNumber = participantsNumber auction.visitCount = visitCount # 拍卖物品id auction.auctionId = auction_id # 网站来源(jingdong) auction.source = source auction.update() except Exception as e: log.error('解析错误')