def create_child_products(self, main_id, ids, sizes, prices, images_grouped): """Creating child products (both colors and sizes). Arguments it gets are: main_id as product id of the master product, images_grouped that is a dict of images grouped by color (field i field) and dicts ids, sizes and prices (e.g. dict with color names as keys and fields of ids for it as values 'black': ['32854, '32855''])""" item = ExpressItem() i = 0 for k in ids: cur_id = main_id + "_" + chr(i + 97) item['product_id'] = [cur_id] item['master_product_id'] = [main_id] item['color'] = [k] # use this for some other path (our server) # images, thumbs = self.get_image_url(images_grouped[i]) if images_grouped: images = self.get_absolute_url(images_grouped[k]) # item['normal_image_url'], item['thumb_image_url'] = self.map_url_to_server(images,main_id) item['normal_image_url'] = basic.cdata_field(self.map_url_to_server(images, main_id)) self.xml.create_xml(item) item.clear() j = 0 for val in ids[k]: item['product_id'] = [cur_id + "_" + chr(j + 97)] item['master_product_id'] = [cur_id] if len(sizes): item['size'] = [sizes[k][j]] item['size_option_id'] = [ids[k][j]] item['price'] = [prices[k][j]] self.xml.create_xml(item) j += 1 i += 1
def _create_shop_lines(self, ids, names): item = ExpressItem() for i in range(0, len(ids)): item['product_id'] = [ids[i]] item['name'] = [basic.cdata(names[i])] item['shop_look'] = ['False'] item['normal'] = ['False'] item['shop_line'] = ['True'] item['in_stock'] = ['IN_STOCK'] self.xml.create_xml(item)
def parse_shop_look(self, hxs): products = hxs.select('//div[@id="cat-ens-prod-item"]') i = 0 # do this with actual id item = ExpressItem() whole_page = hxs.extract() whole_page = "".join(whole_page) ensemble_id = basic.get_middle_text(whole_page, "ensembleId: '", "',") name = hxs.select('//div[@id="cat-ens-prod-con"]/h1/text()').extract() name = basic.clean_string_field(name) item['ensemble_id'] = ensemble_id item['normal_image_url'] = self.shl_get_image(hxs) item['product_id'] = ["DUMMIE1"] item['shop_look'] = ['True'] item['normal'] = ['False'] item['shop_line'] = ['False'] item['in_stock'] = ['IN_STOCK'] item['name'] = name xml.create_xml(item) item.clear() for p in products: i += 1 item = ExpressItem() item['master_product_id'] = ['DUMMIE1'] item['product_id'] = ["DUMMIE1_" + str(i)] item['name'], item['price'], item['style'] = self.shl_basic_info(p) page = p.extract() item['variants'] = basic.cdata_field([self.shl_create_variants(self.get_variants(page))]) item['colors'] = basic.cdata_field(self.shl_get_swatches(p)) xml.create_xml(item)
def parse_for_shop_look(self, hxs, id, product_id, page, images_grouped, product_url, index): """Special parse function for shop looks and lines. It gets same info stored in different format, mostly json and reference to master product id that is actually shop look/line id. TO DO: see if there is need to specially handle the case for not available""" item = ExpressItem() item['master_product_id'] = [id] item['product_id'] = [id + "_" + product_id] if self.ordered: item['order_index'] = [self.order_list[index]] item['style'] = [product_id] item['product_page'] = [product_url] item['category_id'], item['subcategory_id'] = self.get_categories(hxs) item['add_to_cart_id'] = self.get_add_to_cart_id(hxs) # below is part fot creating swatch images and images json color_names, urls, swatch_image_names, jsons = self.get_swatch_images(hxs) i = 0 colors = [] for k in color_names: d = {'name': k, 'swatch_url': urls[i], 'image_url': self.get_absolute_url(images_grouped[k])} i += 1 colors.append(simplejson.dumps(d)) item['colors'] = basic.cdata_field(colors) item['price'], item['discount_price'] = self.get_product_prices(hxs) item['description'], item['promo_text'] = self.get_basic_info(hxs) item['name'] = self.get_name(hxs) # below is part for creating variants json ids, sizes, prices = self.get_variants(page) variants = [] for k in ids: d = {'color': k, 'prices': prices[k], 'ids': ids[k]} try: d['sizes'] = sizes[k] except StandardError: print "This product has no sizes" variants.append(simplejson.dumps(d)) item['variants'] = basic.cdata_field(variants) self.xml.create_xml(item)
def parse(self, response): self.counter += 1 basic.print_status(self.counter, self.total) hxs = HtmlXPathSelector(response) item = ExpressItem() index = self.url_list.index(response.url) self.url_list[index] = self.counter flag = 0 shop_look = 0 # main try that catches all unhandled errors try: hxs = HtmlXPathSelector(response) if response.url != "http://www.zmags.com/": error_404 = hxs.select('//img[@alt="404 Error Page Not Found"]').extract() flag = 1 if not error_404: flag = 1 available = hxs.select('//span[@class="glo-tex-error"]/text()').extract() page = " ".join(hxs.select('//html').extract()) #part for creating main product in xml id = self.get_product_id(hxs)[0] if id != self.id_list[index]: msg = "\nNot equal, id in sheet {0}, on site {1}".format(self.id_list[index], id) self.temp_msg += msg item['product_id'] = [id] item['name'] = self.get_name(hxs) item['description'], item['promo_text'] = self.get_basic_info(hxs) item['master_price'], item['discount_price'] = self.get_product_prices(hxs) item['shop_look'] = ['False'] item['normal'] = ['True'] item['shop_line'] = ['False'] item['in_stock'] = ["NOT_IN_STOCK"] if available[0] != "This item is no longer available for purchase.": item['category_id'], item['subcategory_id'] = self.get_categories(hxs) item['add_to_cart_id'] = self.get_add_to_cart_id(hxs) color_names, urls, swatch_image_names, jsons = self.get_swatch_images(hxs) #urls = basic.cdata_field(self.map_url_to_server(urls, id, True)) item['color_image_url'] = self.create_color_json(urls, color_names) item['in_stock'] = ["IN_STOCK"] item['product_page'] = [response.url] self.xml.create_xml(item) product_images, images_grouped = self.parse_jsons(jsons, color_names) ids, sizes, prices = self.get_variants(page) # calling function that will handle creating all child products self.create_child_products(id, ids, sizes, prices, images_grouped) item['image_urls'] = urls + product_images if self.shop_look_list[index]: self.parse_for_shop_look(hxs, self.shop_look_list[index], id, page, images_grouped, response.url, index) if self.shop_line_list[index]: self.parse_for_shop_look(hxs, self.shop_line_list[index], id, page, images_grouped, response.url, index) else: self.xml.create_xml(item) self.exc.code_handler(102, response.url) else: self.exc.code_handler(104, response.url) else: basic.not_provided() self.exc.code_handler(101, response.url) if not flag: item['product_id'] = [self.id_list[index]] item['in_stock'] = ["NOT_AVAILABLE"] item['name'] = ["not available"] self.xml.create_xml(item) except StandardError: self.exc.code_handler(100, response.url) #if it's last product write xml and run end_operations return item