def get_real_pay(html): pay_money = db_common.fj_function(html, '<span class="txt count">', '</span>' )[1].replace('¥', '').replace('¥', '') if not pay_money: temp = db_common.fj_function(html, '应付总额', '</div>')[1] pay_money = db_common.fj_function(temp, ';', '</span>')[1].replace('\n', '').strip() return pay_money
def parse_orders_item(self, obj_date, item): url = db_common.fj_function(item, "href='//", "'")[1] orders_date = db_common.fj_function(item, '<span class="dealtime" title="', '">')[1] if orders_date[:10] < obj_date: self.end_tag = True return [url, orders_date]
def money(html): items = db_common.fj_function(html, '<div class="goods-total">', '</div>')[1].split('<span class="labe') amounts = [] for item in items[1:]: name = db_common.fj_function(item, 'l">', ':</span>')[1].replace(' ', '') txt = db_common.fj_function(item, '¥', '</span>')[1].replace('\n', '').replace(' ', '') amounts.append([name, txt]) return amounts
def parse_orders_item(self, item): url = db_common.fj_function(item, "href='//", "'")[1] if not url: url = db_common.fj_function(item, 'href="//', '"')[1] orders_date = db_common.fj_function(item, '<span class="dealtime" title="', '">')[1] if orders_date[:10] < self.param['spider_start_time']: self.end_tag = True return [url, orders_date]
def parse_detail(self, html, orders_date): order_id = db_common.fj_function(html, '订单号:', '</div>')[1] if not order_id and html.count('我的京东国际订单'): order_id = db_common.fj_function(html, '<li class="active">订单', '</li>')[1] goods_list = self.global_goods_info(html) else: goods_list = self.goods_info(html) amounts = self.money(html) goods_ret, amounts_ret = self.format_ret(order_id, orders_date, goods_list, amounts) return goods_ret
def get_shop_info(self, html): shop_data = db_common.fj_function(html, '<div class="mt goods-head">', '</div>')[1] shop_name = db_common.fj_function(shop_data, '<span class="shop-name">', '</span>')[1] shop_url = '' if not shop_name: self.param['vender_id'] = db_common.fj_function(html, 'id="venderIdListStr" value="', '"')[1] url = self.construct_url('shop') req_shop = self.get_html(url, 'post') shop_name = db_common.fj_function(req_shop, '"venderName":"', '"')[1] shop_url = db_common.fj_function(req_shop, '"venderUrl":"', '"')[1] return shop_name, shop_url
def global_goods_info(self, html): goods_list = [] shop_data = db_common.fj_function(html, '店铺名称:', '联系卖家')[1] shop_name = db_common.fj_function(shop_data, '<span>', '</span>')[1] shop_url = '' pay_money = db_common.fj_function(html, '<b class="red">¥', '</b>')[1] status = db_common.fj_function(html, '当前状态:', '</div>')[1] temp = db_common.fj_function(html, '<td class="itemName">', '<div class="price-info presale-price-info">')[1] items = temp.split('<tr class="tr-td" skuid') for item in items[1:]: temp = db_common.fj_function(item, '<div class="p-msg">', '<div class="p-msg">')[1] title = db_common.fj_function(temp, 'target="_blank">', '</a>')[1] url = db_common.fj_function(temp, '<a href="//', '"')[1] count = db_common.fj_function(item, '<td class="num">', '</td>')[1] price = db_common.fj_function( item, '<td class="jdPrice">', '</td>')[1].replace('¥', '').replace('\n', '').strip() brand_id, cate = self.get_global_brand(url) goods_list.append([shop_name, shop_url, title, url, status, count, price, pay_money, brand_id, cate]) return goods_list
def goods_info(self, html): shop_name, shop_url = self.get_shop_info(html) items = db_common.fj_function(html, '<table class="tb-void tb-order">', '<tr class="J-yunfeixian"' )[1].split('<div class="p-item"') status = db_common.fj_function(html, '<h3 class="state-txt ftx-02">', '</h3>')[1] if not status: status = db_common.fj_function(html, "<h3 class='state-txt ftx-02'>", '</h3>')[1] pay_money = self.get_real_pay(html) ware_info = db_common.fj_function(html, "['fwjBuyInWareInfo']='", "';")[1] goods_list = [] for item in items[1:]: url = db_common.fj_function(item, '<a href="//', '"')[1] temp = db_common.fj_function(item, '<div class="p-name">', '</div>')[1].replace('\t', '').replace('\n', '') title = re.sub('<.*?>', '', temp).strip() pid = db_common.fj_function(item, 'id="coupon_', '"')[1] brand_id, cate = self.get_brand(ware_info, pid) f_price = db_common.fj_function(item, '<span class="f-price', '</span>')[1] price = self.get_goods_price(title, html, f_price) temp = db_common.fj_function(item, '<span class="f-price', '<td id="jingdou')[1] count = db_common.fj_function(temp, '<td>', '</td>')[1] goods_list.append([shop_name, shop_url, title, url, status, count, price, pay_money, brand_id, cate]) return goods_list
def get_goods_price(self, title, html, f_price): # 获取正常价格 pattern = re.compile(r'(\d+\.\d+)') price_data = re.search(pattern, f_price) # 检查是否预售订单 presale = db_common.fj_function(html, '<span id="yuShouOrderItemJson" style="display:none;">', '</span>')[1] if title.count('赠品') or title.count('非卖品') or title.count('请勿'): price = "赠品" elif price_data: price = price_data.group() elif presale: order_id = db_common.GetJsonValue(presale, 'orderid') yn = db_common.GetJsonValue(presale, 'yn') passkey = db_common.GetJsonValue(presale, 'passkey') url = self.construct_url('presale').format(order_id, yn, passkey) p_html = self.get_html(url) price = db_common.fj_function(p_html, '"yPrice":"', '"')[1] elif f_price.count('赠品'): price = "赠品" else: price = "0" return price
def check_jd(jd_list): # 设置cookie cookie = get_cookie.run('jd') cookie = cookie.replace('\n', '') spider = jd_orders.Spider() spider.set_cookie(cookie) file_path = './jd_error' if not os.path.exists(file_path): os.makedirs(file_path) for url in jd_list: html = spider.get_html(url) order_id = db_common.fj_function(url, 'orderid=', '&')[1] file_name = '/order_{}.txt'.format(order_id) with open(file_path + file_name, 'w') as f: f.write(html)
def get_global_brand(self, url): html = self.get_html('https://' + url) brand_id = db_common.fj_function(html, 'brand: ', ',')[1] cate = db_common.fj_function(html, 'cat: [', ']')[1].replace(',', ';') return brand_id, cate