def parse_html(self, html, part): try: soup = BeautifulSoup(html, 'html.parser') min_title = soup.select_one('div.product-title > h1').get_text() prices_block = soup.select_one('div.product-others') if 'Аналоги' in prices_block: raise Exception prices_html = prices_block.select('div.price') prices = [ float(price.contents[0].replace(' ', '')) for price in prices_html ] main_price = soup.select_one('span.price').contents[0].replace( ' ', '') prices.append(float(main_price)) prices = sorted(prices) if len(prices) == 0: raise Exception if len(prices) == 1: min_price = prices[0] else: min_price = prices[1] ready_part = Part(part.number, part.model, min_title, min_price) except: traceback.print_exc() ready_part = Part(part.number, part.model, 'Нет в наличии', 'Нет в наличии') return ready_part
def parse_html(self, html, part): json_response = json.loads(html) block = json_response['data'] if not block: return Part(part.number, part.model, 'Нет в наличии', 'Нет в наличии') for inner_block in block.values(): return self.parse_part_from_block_model(inner_block, part) return Part(part.number, part.model, 'Нет в наличии', 'Нет в наличии')
def parse_html(self, html, part): try: soup = BeautifulSoup(html, 'html.parser') min_title = soup.select_one('td.fn')['title'] min_price_str = soup.select('td.price')[1].get_text() min_price = re.sub('[^\.0-9]', '', min_price_str) ready_part = Part(part.number, part.model, min_title, min_price) except: ready_part = Part(part.number, part.model, 'Нет в наличии', 'Нет в наличии') return ready_part
def parse_part_from_block_model(self, block_model, part): stores = list(block_model.values())[0] if len(stores) == 0: return Part(part.number, part.model, 'Нет в наличии', 'Нет в наличии') if len(stores) == 1: inner = stores[0] else: inner = stores[1] if len(inner) < 4: print(block_model) number = inner[3] title = inner[4] price = str(inner[16]).replace(',', '.') ready_part = Part(number, part.model, title, price) return ready_part
def read_parts_from_xlsx(xlsx_file, max_amount=int(1e+6)): wb = load_workbook(xlsx_file) ws = wb.worksheets[0] start_row = 2 if not ws[1][0].value or has_cyrillic(ws[1][0].value) else 1 parts = \ [Part(ws[row][0].value.split('#')[0], ws[row][1].value) for row in range(start_row, min(ws.max_row + 1, max_amount + 1)) if ws[row][0].value and ws[row][1].value] return parts
def parse_html(self, html, part): json_response = json.loads(html) # while 'data' not in json_response or 'catalogs' not in json_response['data']: # print('Try again...') # html = self.get_part_html(part) # json_response = json.loads(html) models = json_response['data']['catalogs'] relevant_models = [model for model in models if model['catalogName'] == self.prepare_model(part.model.lower())] if not relevant_models: return Part(part.number, part.model, part.model, 'Нет в наличии'), -1 model = relevant_models[0] article_id = model['id'] title = model['name'] ready_part = Part(part.number, part.model, title, 0) return ready_part, article_id
def find_one_part(self, part): start_time = time.time() try: if settings.DEBUG: print('start finding') print(part) html = self.get_part_html(part) ready_part, article_id = self.parse_html(html, part) if article_id == -1: return ready_part cost_html = self.get_cost_html(article_id) cost = self.parse_cost(cost_html, article_id) ready_part.price = cost if settings.DEBUG: print(ready_part) print('end finding') if not settings.DEBUG: print(f"{self.__class__.__name__}: {self.done}\\{self.amount}\n", end='') return ready_part except Exception as e: print('Произошла ошибка: ', traceback.print_exc()) print('Деталь: ', part) return Part(part.number, part.model, part.model, 'Нет в наличии') finally: self.done += 1 settings.progress_list[self.id] = self.done / self.amount time.sleep(max(self.DELAY - (time.time() - start_time), 0)) if self.proxy_index == 0: AutoPiter.ALL_SLEEP = True # print('SLEEP') # time.sleep(self.DELAY) AutoPiter.ALL_SLEEP = False while AutoPiter.ALL_SLEEP: time.sleep(1)
def find_one_part(self, part): start_time = time.time() try: if settings.DEBUG: print('start finding') print(part) html = self.get_part_html(part) ready_part = self.parse_html(html, part) if settings.DEBUG: print(ready_part) print('end finding') if not settings.DEBUG: print(f"{self.__class__.__name__}: {self.done}\\{self.amount}\n", end='') return ready_part except Exception as e: print('Произошла ошибка: ', traceback.print_exc()) print('Деталь: ', part) return Part(part.number, part.model, 'Нет в наличии', 'Нет в наличии') finally: self.done += 1 settings.progress_list[self.id] = self.done / self.amount
def parse_html(self, html, part): response1, response2 = html.split('!^^!') try: json_response = json.loads(response1) except: traceback.print_exc() print(response1) return Part(part.number, part.model, 'Нет в наличии', 'Нет в наличии') prices = json_response['data'] if isinstance(prices, dict): prices = list(prices.values()) if not prices: return Part(part.number, part.model, 'Нет в наличии', 'Нет в наличии') try: min_price = prices[0]['price'] min_title = prices[0].get('item_name', part.number) except: traceback.print_exc() print(response1) return Part(part.number, part.model, 'Нет в наличии', 'Нет в наличии') if len(prices) > 1: min_price2 = prices[1]['price'] min_title2 = prices[1].get('item_name', part.number) else: min_price2 = int(1e+6) min_title2 = min_title try: json_response = json.loads(response2) clones = json_response['data'] if not clones: raise Exception articles = clones[0] if isinstance(articles, dict): articles = list(articles.values()) for prices in articles: if isinstance(prices, dict): prices = list(prices.values()) current_min_price = prices[0]['price'] current_min_title = prices[0].get('item_name', part.number) current_min_price2 = prices[1]['price'] current_min_title2 = prices[1].get('item_name', part.number) if current_min_price > min_price2: continue if current_min_price < min_price: min_price2 = min_price min_title2 = min_title min_price = current_min_price min_title = current_min_title if current_min_price2 < min_price2: min_price2 = current_min_price2 min_title2 = current_min_title2 continue if min_price < current_min_price < min_price2: min_price2 = current_min_price min_title2 = current_min_title continue except: pass if min_price2 > int(1e+6) - 1: min_price2 = min_price ready_part = Part(part.number, part.model, min_title2, min_price2) return ready_part