def get_items_barcodes(self, items): """ Gets barcodes and image urls for all items :param items: List of items :return: List of dict items with barcode, and image_url added """ request_list = [(item['url'], item) for item in items] iteration = [0] self.print_func(0, len(request_list), 'Get Item barcodes') responses = asyncio.run( req.create_async_tasks( request_list, { "printer": (self.print_func, len(request_list), 'Get item barcodes'), "iteration": iteration }, self._async_get)) items = process_item_pages(responses) new_items = self.add_store_ids(items) return new_items
def _get_henry_items(self): """ Gets a list of items from henrys website. Excluding barcodes :return: List of items from henrys api """ category_ids = [ 16639, 16642, 16643, 16644, 16648, 16649, 23625, 23626, 23627, 23628, 23634, 23630, 23633, 23635, 23629, 13651, 13650, 13652, 13652, 13651, 13650, 13653, 13654, 13655, 13656, 13579, 13658, 13659, 13660, 13661, 13662, 13664, 13583, 13657, 13667, 13580, 13663, 13670, 13672, 13577, 24350, 13587, 24680, 24675, 24676, 24677, 24349, 24351, 24352, 24350, 24351, 24352, 13587, 24680, 24675, 24676, 24677, 24349, 24589, 24673, 24674, 24679 ] category_ids_str = [str(cat) for cat in category_ids] url = f"https://www.henrys.co.nz/api/products?categories={','.join(category_ids_str)}" # Get first page first_page = requests.get(url + "&page=0").json() total_pages = int(first_page['totalPages']) other_pages_urls = [(url + f"&page={i}", None) for i in range(1, total_pages + 1)] # Get rest of pages iteration = [0] self.print_func(0, len(other_pages_urls) - 1, 'Get Items') responses = asyncio.run( req.create_async_tasks( other_pages_urls, { "printer": (self.print_func, total_pages - 1, 'Get Items'), "iteration": iteration }, self._async_get)) # Collate all items in a list items = first_page['products'] for _, page in responses: items += page.json()['products'] return items
def upload_new_images(self, items, print_func): """ Uploads images for items that don't have one :param items: List of item dict objects. {"sku": int, "image_url": string} :return: None """ all_items = req.get(self.api.url + "/allitems", headers=self.api.headers).json() print('start') # Iterate through items and add to get list if there isn't an image req_list = [] checked_skus = set() for item in items: if item["sku"] not in checked_skus: has_image = True for cur_item in all_items: if cur_item["sku"] == item["sku"]: # Check if item doesn't have an image if cur_item["hasImage"] == 0: has_image = False break if not has_image: # Item doesn't have image req_list.append([ item["image_url"], item ]) # Add sku to set checked_skus.add(item["sku"]) # Get images iteration = [0] if len(req_list) > 0: print_func(0, len(req_list), "get images") responses = asyncio.run( req.create_async_tasks(req_list, {"headers": self.api.headers, "printer": (print_func, len(req_list), "get images"), "iteration": iteration}, self._async_get)) # Create put image request list image_list = [] print_func(0, len(responses), 'processing images') for i, (item, res) in enumerate(responses): try: content = res.read() image_bytes = images.process_response_content(content) image_list.append((item["sku"], image_bytes)) except Exception as err: tools.log_error(err) finally: print_func(i + 1, len(responses)) if len(image_list) > 0: print_func(0, len(image_list), "put images") responses = req.post_images(image_list, self.api.url + "/items", headers=self.api.headers, printer=(print_func, len(image_list), "put images"))
def update_item_prices(self, items, brand_id, brand_name, print_func=None): """ Posts items to pisspricer api that are new :param items: List of dict items { "name" "categoryId" "subcategoryId" "internalId" "barcode" "storeId" } :param brand_id: Store brand id :param print_func: Function for printing :return: None """ barcodes = req.get(self.api.url + "/barcodes", headers=self.api.headers) skus = req.get(self.api.url + "/internalids", headers=self.api.headers, params={"brandId": brand_id}) barcodes = barcodes.json() skus = skus.json() requests = [] for item in items: if item.get("barcode", None) is not None: barcode = item["barcode"] if barcode not in barcodes: # Add item to requests list requests.append([self.api.url + "/items", item]) barcodes[barcode] = [] elif item.get("internalSku", None) is None or item.get("internalSku", None) not in skus: requests.append([self.api.url + "/items", item]) skus[item["internalSku"]] = [] # Post all items iteration = [0] if print_func is not None and len(requests) > 0: print_func(0, len(requests), "create new products") responses = asyncio.run(req.create_async_tasks(requests, {"headers": self.api.headers, "printer": (print_func, len(requests), "create new products"), "iteration": iteration}, self._async_post_json)) # Add new items to dictionaries for res in responses: if res.status == 200 or res.status == 201: data = res.json() new_sku = data["sku"] item = res.content barcode = item.get("barcode", None) if barcode is not None: barcodes[barcode] = [new_sku] else: skus[item["internalSku"]] = [new_sku] # Create request list for prices requests = [] for item in items: if item.get("barcode") is not None: sku = barcodes[item["barcode"]][0] else: sku = skus[item["internalSku"]][0] item["sku"] = sku requests.append([f"{self.api.url}/items/{sku}/stores/{item['storeId']}", item]) # Upload images self.upload_new_images(items, print_func) # Put prices iteration = [0] if print_func is not None: print_func(0, len(requests), "post new prices") reses = asyncio.run(req.create_async_tasks(requests, {"headers": self.api.headers, "printer": (print_func, len(requests), "post new prices"), "iteration": iteration}, self._async_put_json)) return reses
def upload_new_stores(self, locations, brand_id, printer=None): """ Posts stores to pisspricer api that are new (based on internal id). Gets data from google maps api if location data is incomplete. Uploads region if it doesn't already exist. :param printer: (print_function, total, title) for printing :param brand_id: Brand id of store locations :param locations: List of dict objects { name: "required|string", url: "required|string", region: "required|string", region_lat: "numeric", region_lng: "numeric", address: "required|string", postcode: "numeric", lattitude: "numeric", longitude: "numeric" internalId: "string" } :return: None """ # Get current locations cur_locations_res = req.get(self.api.url + "/stores", headers=self.api.headers, params={"brandId": brand_id}) if cur_locations_res.status != 200: raise custom_exceptions.AiohttpException(cur_locations_res, "get stores", "pisspricer") cur_locations = cur_locations_res.json() # Create a set of internal ids cur_locs_set = set() for loc in cur_locations: cur_locs_set.add(loc["internalId"]) # Get a list of regions regions = self.get_regions() # Print first iteration if printer is not None: print_func, total, task = printer print_func(0, total, task) # Create a list of new stores new_locations = [] for i, loc in enumerate(locations): try: # Check if the store is new if loc["internalId"] not in cur_locs_set: # Get location if data not supplied region = loc["region"] lat = loc["lattitude"] lng = loc["longitude"] postcode = loc["postcode"] address = loc["address"] location_list = [region, lat, lng, postcode, address] if any(item is None for item in location_list): lat, lng, address, postcode, region = tools.geocode_address(f"{loc['name']}, {address}") # Create new location dict new_loc = { "name": loc["name"], "url": loc["url"], "brandId": brand_id, "regionId": self._get_region_id(regions, region, lat=loc["region_lat"], lng=loc["region_lng"]), "lattitude": lat, "longitude": lng, "postcode": postcode, "address": address, "internalId": loc["internalId"] } # Add new store to task list new_locations.append([self.api.url + "/stores", new_loc]) except custom_exceptions.GoogleApiException as err: tools.log_error(err) except custom_exceptions.AiohttpException as err: tools.log_error(err) finally: if printer is not None: print_func(i+1, total, task) # Post all stores # TODO Change post function iteration = [0] print_func, _, task = printer kwargs = {"headers": self.api.headers, "printer": (print_func, len(new_locations), task), "iteration": iteration} responses = asyncio.run(req.create_async_tasks(new_locations, kwargs, self._async_post_json)) for res in responses: if res.status != 201: tools.log_error(custom_exceptions.AiohttpException(res, "post stores", "pisspricer"))
def get_items(self, stores, should_print=True): task = "get items from liquorland" # Iterate through stores and categories and make items for HTTP get first_page_items = [] for store in stores: internal_id = store["internalId"] for cat in self.categories: for _, endpoint, subcatId in cat["subcats"]: cookies = copy.deepcopy(self.cookie) cookies["selectedStore"] = internal_id item = [ f"{self.base_url}{endpoint}", { "categoryId": cat["id"], "subcategoryId": subcatId, "storeId": store["storeId"], "internalId": store["internalId"], "url": f"{self.base_url}{endpoint}", }, cookies, self.params ] first_page_items.append(item) # Batch get first pages iteration = [0] responses = asyncio.run( req.create_async_tasks( first_page_items, { "printer": (self.print_func, len(first_page_items) - 1, task + " 1", iteration) }, self._async_get_item_page)) items = [] requests = [] if should_print: self.print_func(0, len(responses), "process responses liquorland 1") for i, (item, res) in enumerate(responses): soup = BeautifulSoup(res.text(), features="html.parser") item_divs = soup.find_all("div", {"class": "productItemDisplay"}) for item_div in item_divs: # Create a new item new_item = self._create_item(item_div, item) items.append(new_item) # work out anymore requests that need to be made for subsequent pages, if any item_count = self._get_page_item_count(soup) n_pages = math.ceil(item_count / self.page_count) cookies = copy.deepcopy(self.cookie) cookies["selectedStore"] = item["internalId"] for p in range(1, n_pages): params = copy.deepcopy(self.params) params["p"] = p req_item = [ res.url, { "categoryId": item["categoryId"], "subcategoryId": item["subcategoryId"], "storeId": item["storeId"], "internalId": item["internalId"], "url": res.url, }, cookies, params ] requests.append(req_item) # Print Progress if should_print: self.print_func(i, len(responses), "process responses liquorland 1") iteration = [0] responses = asyncio.run( req.create_async_tasks( requests, { "printer": (self.print_func, len(requests) - 1, task + " 2", iteration) }, self._async_get_item_page)) if should_print: self.print_func(0, len(responses), "process responses liquorland 2") for i, (item, res) in enumerate(responses): soup = BeautifulSoup(res.text(), features="html.parser") item_divs = soup.find_all("div", {"class": "productItemDisplay"}) for item_div in item_divs: new_item = self._create_item(item_div, item) items.append(new_item) # Print Progress if should_print: self.print_func(i + 1, len(responses), "process responses liquorland 2") return items