コード例 #1
0
    def parse(self, response):
        self.counter += 1
        basic.print_status(self.counter, self.total)
        hxs = HtmlXPathSelector(response)
        item = GuitarCenterItem()
        from scrapy.conf import settings

        if "redirect_urls" in response.request.meta:
            cur_url = response.request.meta["redirect_urls"][0]
        else:
            cur_url = response.url
        index = self.products["urls"].index(cur_url)
        try:
            item["product_id"] = [self.products["product_ids"][index]]
            item["name"], item["brand"] = self.get_basic_info(hxs)
            item["heading"], item["details"], item["specs"], item["call_to_action"] = self.get_description(hxs)
            item["brand_image"], item["brand_image_promo"], brand_images = self.get_description_images(hxs)
            item["old_price"], item["discount"], item["price"] = self.get_prices(hxs)
            item["image_json"], img = self.get_images(hxs)
            item["serial"] = self.get_serials(hxs)
            item["warranty"] = self.gold_coverage(hxs)
            item["in_stock"] = self.get_available(hxs)
            item["product_ref"], item["add_to_cart_id"] = self.get_add_to_cart(hxs)
            if not item["add_to_cart_id"]:
                item["in_stock"] = ["NOT_AVAILABLE"]
            item["shipping"] = self.get_shipping(hxs)
            item["colors"] = self.get_colors(hxs)
            self.products["status"][index] = "ran"
        except StandardError:
            self.products["status"][index] = "error"
            self.exc.code_handler(100, response.url)
        else:
            self.xml.create_xml(item)
            item["image_urls"] = img + brand_images
        return item
コード例 #2
0
 def parse(self, response):
     self.counter += 1
     basic.print_status(self.counter, self.total)
     hxs = HtmlXPathSelector(response)
     item = GuitarCenterItem()
     from scrapy.conf import settings
     if 'redirect_urls' in response.request.meta:
         cur_url = response.request.meta['redirect_urls'][0]
     else:
         cur_url = response.url
     index = self.products['urls'].index(cur_url)
     try:
         item['product_id'] = [self.products['product_ids'][index]]
         item['name'], item['brand'] = self.get_basic_info(hxs)
         item['heading'], item['details'], item['specs'], item['call_to_action'] = self.get_description(hxs)
         item['brand_image'], item['brand_image_promo'], brand_images = self.get_description_images(hxs)
         item['old_price'], item['discount'], item['price'] = self.get_prices(hxs)
         item['image_json'], img = self.get_images(hxs)
         item['serial'] = self.get_serials(hxs)
         item['warranty'] = self.gold_coverage(hxs)
         item['in_stock'] = self.get_available(hxs)
         item['product_ref'], item['add_to_cart_id'] = self.get_add_to_cart(hxs)
         if not item['add_to_cart_id']:
             item['in_stock'] = ["NOT_AVAILABLE"]
         item['shipping'] = self.get_shipping(hxs)
         item['colors'] = self.get_colors(hxs)
         self.products['status'][index] = "ran"
     except StandardError:
         self.products['status'][index] = "error"
         self.exc.code_handler(100, response.url)
     else:
         self.xml.create_xml(item)
         item['image_urls'] = img + brand_images
     return item
コード例 #3
0
    def parse(self, response):
        self.counter += 1
        basic.print_status(self.counter, self.total)
        hxs = HtmlXPathSelector(response)
        item = SportmanItem()
        if "redirect_urls" in response.request.meta:
            cur_url = response.request.meta["redirect_urls"][0]
        else:
            cur_url = response.url
        index = self.products["urls"].index(cur_url)
        try:
            if "redirect_urls" in response.request.meta:
                item["product_id"] = [self.products["product_ids"][index]]
                item["name"] = [self.products["names"][index]]
                item["in_stock"] = ["NOT_AVAILABLE"]
                self.exc.code_handler(102, response.url)
                self.xml.create_xml(item)
                self.products["status"][index] = "no_avail"
            else:
                item["name"], item["short_desc"], item["description"], item["old_price"], item["custom_price"], item[
                    "product_id"
                ], item["sku"] = self.get_basic_info(hxs)
                item["in_stock"] = ["IN_STOCK"]
                viewstate, eventval, prevpage, hidden, view_page, even_page, pre_page, hidd_page = self.get_vars(
                    response, hxs
                )

                viewstate1 = viewstate[:2000]
                viewstate2 = viewstate[2000:4000]
                viewstate3 = viewstate[4000:6000]
                viewstate4 = viewstate[6000:8000]
                viewstate5 = viewstate[8000:10000]
                viewstate6 = viewstate[10000:]

                item["viewstate1"] = [basic.cdata(viewstate1)]
                item["viewstate2"] = [basic.cdata(viewstate2)]
                item["viewstate3"] = [basic.cdata(viewstate3)]
                item["viewstate4"] = [basic.cdata(viewstate4)]
                item["viewstate5"] = [basic.cdata(viewstate5)]
                item["viewstate6"] = [basic.cdata(viewstate6)]
                item["eventval"] = [basic.cdata(eventval)]
                item["size_options"] = self.get_variants(hxs, response)

                images_url = self.get_images(hxs)

                item["normal_image_url"] = self.get_server_path(images_url)

                self.xml.create_xml(item)
                item.clear()
                item["image_urls"] = self.get_images(hxs)
                self.products["status"][index] = "ran"
        except:
            self.exc.code_handler(100, response.url)
            self.products["status"][index] = "error"
        else:
            return item
コード例 #4
0
 def parse_can(self, response):
     """Parse function for scraping canadian sites.
     There is meta information send in request in this function about language."""
     self.counter += 1
     basic.print_status(self.counter, self.total)
     item = PartyliteItem()
     hxs = HtmlXPathSelector(response)
     image_urls = []
     if  'redirect_urls' in response.request.meta:
         item['product_id'] = [self.get_id(response.request.meta['redirect_urls'][0])[0]]
         self.exc.code_handler(102, response.request.meta['redirect_urls'])
         if 'language' in response.request.meta:
             item['product_id'] = [self.get_id(response.request.meta['redirect_urls'][0])[0]
                                   + "_can" + "_" + response.meta['language']]
         try:
             index = self.products['product_ids'].index(self.get_id
                             (response.request.meta['redirect_urls'][0])[0])
             item['name'] = [basic.cdata(item['product_id'][0]
                             + self.products['names'][index])]
             self.products['status'][index] = 'no_avail'
         except KeyError as e:
             print "This %s id is not in list" % (item['product_id'][0])
         item['in_stock'] = ['NOT_AVAILABLE']
         item['product_id'] = self.remove_spaces(item['product_id'])
         self.xml.create_xml(item)
     else:
         index = self.products['product_ids'].index(self.get_id(response.url)[0])
         try:
             item['product_id'] = self.get_id(response.url)
             item['name'], item['shown_with'] = self.get_basic_info(hxs)
             item['description'] = self.get_description(hxs)
             if 'language' in response.meta:
                 item['product_id'] = [item['product_id'][0] + "_can" + "_" + response.meta['language']]
             response.meta['item'] = item
             page = " ".join(hxs.select('//html').extract())
             image_urls = self.get_more_images(page)
             item['normal_image_url'] = self.get_server_path_field(image_urls)
             item['in_stock'] = self.get_in_stock(hxs)
             color_products = self.create_subproducts(page)
             if color_products:
                 self.write_subproducts(item['product_id'], color_products, xml)
             else:
                 item['add_to_cart_id'] = self.get_add_to_cart_id(page)
                 item['custom_price'], item['custom_discount'] = self.get_price(hxs)
             self.products['status'][index] = "ran"
         except StandardError:
             basic.print_error()
             self.products['status'][index] = "error"
             self.exc.code_handler(100, response.url)
         else:
             item['product_id'] = self.remove_spaces(item['product_id'])
             self.xml.create_xml(item)
     if image_urls:
         item['image_urls'] = image_urls
     return item
コード例 #5
0
 def parse(self, response):
     self.counter += 1
     basic.print_status(self.counter, self.total)
     hxs = HtmlXPathSelector(response)
     item = BootsItem()
     item['product_id'], item['store_id'], item['lang_id'], item['catalog_id'] = self.get_ids(hxs)
     item['name'] = self.get_name(hxs)
     item['short_description'], sponsored, description, in_stock, item['ingredients'], patient_information_url, item['offer'], item['promotion'] = self.get_description(hxs)
     item['rating'] = self.get_rating(hxs)
     size, price_per_size = self.get_size(hxs)
     item['normal_image_url'], image_urls = self.get_images(hxs)
     brand, brand_image_url = self.get_brand(hxs)
     item['save_money'], item['old_price'] = self.get_oldies(hxs)
     for i in range(0, len(description)):
         tag = 'description_%d' % (i + 1)
         item[tag] = [basic.cdata(description[i])]
     if sponsored is not None:
         item['sponsored'] = sponsored
     item['in_stock'] = ["NOT_IN_STOCK"]
     if in_stock == "In stock":
         item['in_stock'] = ["IN_STOCK"]
         item['order_id'] = hxs.select('//input[@name="orderId"]/@value').extract()
         item['cat_entry_id'] = hxs.select('//input[@name="catEntryId"]/@value').extract()
         item['calculation_usage_id'] = hxs.select('//input[@name="calculationUsageId"]/@value').extract()
     if brand_image_url is not None:
         item['brand'] = brand
         item['brand_image_url'] = ["43662980-f344-11e1-a21f-0800200c9a66/full/" + self.get_image_sha1(brand_image_url)]
         image_urls.append(brand_image_url)
     if patient_information_url is not None:
         item['patient_information_url'] = [basic.cdata(patient_information_url)]
     prices, point_prices, collect_points, colors, color_image_urls, variant_ids = self.get_color_variants(hxs)
     if size is not None:
         item['size'] = size
         item['price_per_size'] = price_per_size
     elif variant_ids is None:
         prices, point_prices, collect_points, sizes, variant_ids = self.get_size_variants(hxs)
     if color_image_urls is not None:
         image_urls.extend(color_image_urls)
     if variant_ids is not None:
         self.xml.create_xml(item)
         if colors is not None:
             self.create_color_variants(prices, point_prices, colors, color_image_urls, variant_ids, collect_points, item['product_id'])
         else:
             self.create_size_variants(prices, point_prices, sizes, variant_ids, collect_points, item['product_id'])
     else:
         prices = hxs.select('//p[@class="price"]/text()').extract()[0]
         point_prices = hxs.select('//span[@class="pointsPrice"]/text()').extract()[0]
         collect_points = [basic.get_price(hxs.select('//p[@class="collectPoints"]/text()').extract()[0])]
         item['price'] = [basic.get_price(prices)]
         item['points_price'] = [basic.get_price(point_prices)]
         item['collect_points'] = collect_points
         self.xml.create_xml(item)
     item['image_urls'] = image_urls
     #raw_input("Press Enter to continue...")
     return item
コード例 #6
0
    def parse(self, response):
        self.counter += 1
        basic.print_status(self.counter, self.total)
        hxs = HtmlXPathSelector(response)
        item = SportmanItem()
        if 'redirect_urls' in response.request.meta:
            cur_url = response.request.meta['redirect_urls'][0]
        else:
            cur_url = response.url
        index = self.products['urls'].index(cur_url)
        try:
            if 'redirect_urls' in response.request.meta:
                item['product_id'] = [self.products['product_ids'][index]]
                item['name'] = [self.products['names'][index]]
                item['in_stock'] = ["NOT_AVAILABLE"]
                self.exc.code_handler(102, response.url)
                self.xml.create_xml(item)
                self.products["status"][index] = "no_avail"
            else:
                item["name"], item["short_desc"], item["description"], item[
                    "old_price"], item["custom_price"], item[
                        "product_id"], item["sku"] = self.get_basic_info(hxs)
                item['in_stock'] = ['IN_STOCK']
                viewstate, eventval, prevpage, hidden, view_page, even_page, pre_page, hidd_page = self.get_vars(
                    response, hxs)

                viewstate1 = viewstate[:2000]
                viewstate2 = viewstate[2000:4000]
                viewstate3 = viewstate[4000:6000]
                viewstate4 = viewstate[6000:8000]
                viewstate5 = viewstate[8000:10000]
                viewstate6 = viewstate[10000:]

                item["viewstate1"] = [basic.cdata(viewstate1)]
                item["viewstate2"] = [basic.cdata(viewstate2)]
                item["viewstate3"] = [basic.cdata(viewstate3)]
                item["viewstate4"] = [basic.cdata(viewstate4)]
                item["viewstate5"] = [basic.cdata(viewstate5)]
                item["viewstate6"] = [basic.cdata(viewstate6)]
                item["eventval"] = [basic.cdata(eventval)]
                item["size_options"] = self.get_variants(hxs, response)

                images_url = self.get_images(hxs)

                item["normal_image_url"] = self.get_server_path(images_url)

                self.xml.create_xml(item)
                item.clear()
                item['image_urls'] = self.get_images(hxs)
                self.products["status"][index] = "ran"
        except:
            self.exc.code_handler(100, response.url)
            self.products["status"][index] = "error"
        else:
            return item
コード例 #7
0
 def parse(self, response):
     self.counter += 1
     basic.print_status(self.counter, self.total)
     hxs = HtmlXPathSelector(response)
     item = LydiasItem()
     if 'redirect_urls' in response.request.meta:
         cur_url = response.request.meta['redirect_urls'][0]
     else:
         cur_url = response.url
     index = self.products['urls'].index(cur_url)
     id = self.products['product_ids'][index]
     try:
         available = hxs.select('//div[@id="searchfor"]/text()').extract()
         if not available:
             item['product_id'] = [id]
             item['name'], item['price'], item['old_price'], item[
                 'description'] = self.get_basic_info(hxs)
             item['rating'], item['custom_rating'] = self.get_rating(hxs)
             chart = self.absolute_path(self.get_size_image(hxs))
             item['sizes_chart_image_url'] = self.get_server_path(chart)
             color_urls, color_names, product_image, color_codes = self.get_image_swatches(
                 hxs)
             color_urls = self.absolute_path(color_urls)
             item['color_image_url'] = self.make_colors_json(
                 color_urls, color_names, color_codes)
             item['in_stock'] = ["IN_STOCK"]
             item['embroidery'] = self.get_embroidery(hxs)
             default_images = self.absolute_path(self.get_extra_images(hxs))
             item['default_image_url'] = self.get_server_path(
                 default_images)
             self.xml.create_xml(item)
             product_image = self.absolute_path(product_image)
             self.create_subproducts(id, color_names, product_image,
                                     color_codes, hxs)
             item[
                 'image_urls'] = product_image + color_urls + chart + default_images
             self.products['status'][index] = "ran"
         else:
             self.exc.code_handler(102, response.url)
             item['product_id'] = [id]
             item['in_stock'] = ["NOT_AVAILABLE"]
             self.products['status'][index] = "not_avail"
             self.xml.create_xml(item)
     except:
         self.products['status'][index] = "error"
         self.exc.code_handler(100, response.url)
     return item
コード例 #8
0
ファイル: lydias_spider.py プロジェクト: marjevtic/testMarko
 def parse(self, response):
     self.counter += 1
     basic.print_status(self.counter, self.total)
     hxs = HtmlXPathSelector(response)
     item = LydiasItem()
     if 'redirect_urls' in response.request.meta:
         cur_url = response.request.meta['redirect_urls'][0]
     else:
         cur_url = response.url
     index = self.products['urls'].index(cur_url)
     id = self.products['product_ids'][index]
     try:
         available = hxs.select('//div[@id="searchfor"]/text()').extract()
         if not available:
             item['product_id'] = [id]
             item['name'], item['price'], item['old_price'], item['description'] = self.get_basic_info(hxs)
             item['rating'], item['custom_rating'] = self.get_rating(hxs)
             chart = self.absolute_path(self.get_size_image(hxs))
             item['sizes_chart_image_url'] = self.get_server_path(chart)
             color_urls, color_names, product_image, color_codes = self.get_image_swatches(hxs)
             color_urls = self.absolute_path(color_urls)
             item['color_image_url'] = self.make_colors_json(color_urls, color_names, color_codes)
             item['in_stock'] = ["IN_STOCK"]
             item['embroidery'] = self.get_embroidery(hxs)
             default_images = self.absolute_path(self.get_extra_images(hxs))
             item['default_image_url'] = self.get_server_path(default_images)
             self.xml.create_xml(item)
             product_image = self.absolute_path(product_image)
             self.create_subproducts(id, color_names, product_image, color_codes, hxs)
             item['image_urls'] = product_image + color_urls + chart + default_images
             self.products['status'][index] = "ran"
         else:
             self.exc.code_handler(102, response.url)
             item['product_id'] = [id]
             item['in_stock'] = ["NOT_AVAILABLE"]
             self.products['status'][index] = "not_avail"
             self.xml.create_xml(item)
     except:
         self.products['status'][index] = "error"
         self.exc.code_handler(100, response.url)
     return item
コード例 #9
0
ファイル: burton_spider.py プロジェクト: marjevtic/testMarko
 def parse(self, response):
     self.counter += 1
     basic.print_status(self.counter, self.total)
     hxs = HtmlXPathSelector(response)
     item = BurtonItem()
     page = hxs.extract()
     if 'redirect_urls' in response.request.meta:
         cur_url = response.request.meta['redirect_urls'][0]
     else:
         cur_url = response.url
     index = self.products['urls'].index(cur_url)
     try:
         if 'redirect_urls' in response.request.meta:
             item['product_id'] = [self.products['product_ids'][index]]
             item['name'] = [self.products['names'][index]]
             item['in_stock'] = ["NOT_AVAILABLE"]
             self.exc.code_handler(102, response.url)
             self.xml.create_xml(item)
             self.products["status"][index] = "no_avail"
         else:
             item['product_id'], item['name'] = self.get_basic_info(hxs)
             item['description'], item['features'] = self.get_description(
                 hxs)
             item['variants'], thumb_urls, color_names = self.get_variants(
                 page)
             item['all_sizes'] = self.get_all_sizes(page)
             item['color_json'], image_urls = self.get_colors(
                 page, color_names)
             item['price'], item['old_price'] = self.get_prices(hxs)
             item['in_stock'] = ['IN_STOCK']
             item['product_link'] = [basic.cdata(response.url)]
             self.xml.create_xml(item)
             item['image_urls'] = image_urls + thumb_urls
             self.products["status"][index] = "ran"
     except:
         self.exc.code_handler(100, response.url)
         self.products["status"][index] = "error"
     else:
         return item
コード例 #10
0
ファイル: burton_spider.py プロジェクト: marjevtic/testMarko
 def parse(self, response):
     self.counter += 1
     basic.print_status(self.counter, self.total)
     hxs = HtmlXPathSelector(response)
     item = BurtonItem()
     page = hxs.extract()
     if 'redirect_urls' in response.request.meta:
         cur_url = response.request.meta['redirect_urls'][0]
     else:
         cur_url = response.url
     index = self.products['urls'].index(cur_url)
     try:
         if 'redirect_urls' in response.request.meta:
             item['product_id'] = [self.products['product_ids'][index]]
             item['name'] = [self.products['names'][index]]
             item['in_stock'] = ["NOT_AVAILABLE"]
             self.exc.code_handler(102, response.url)
             self.xml.create_xml(item)
             self.products["status"][index] = "no_avail"
         else:
             item['product_id'], item['name'] = self.get_basic_info(hxs)
             item['description'], item['features'] = self.get_description(hxs)
             item['variants'], thumb_urls, color_names = self.get_variants(page)
             item['all_sizes'] = self.get_all_sizes(page)
             item['color_json'], image_urls = self.get_colors(page, color_names)
             item['price'], item['old_price'] = self.get_prices(hxs)
             item['in_stock'] = ['IN_STOCK']
             item['product_link'] = [basic.cdata(response.url)]
             self.xml.create_xml(item)
             item['image_urls'] = image_urls + thumb_urls
             self.products["status"][index] = "ran"
     except:
         self.exc.code_handler(100, response.url)
         self.products["status"][index] = "error"
     else:
         return item
コード例 #11
0
    def parse(self, response):
        self.counter += 1
        basic.print_status(self.counter, self.total)
        hxs = HtmlXPathSelector(response)
        item = ExpressItem()
        index = self.url_list.index(response.url)
        self.url_list[index] = self.counter
        flag = 0
        shop_look = 0
        # main try that catches all unhandled errors
        try:
            hxs = HtmlXPathSelector(response)
            if response.url != "http://www.zmags.com/":
                error_404 = hxs.select('//img[@alt="404 Error Page Not Found"]').extract()
                flag = 1
                if not error_404:
                    flag = 1
                    available = hxs.select('//span[@class="glo-tex-error"]/text()').extract()
                    page = " ".join(hxs.select('//html').extract())
                    #part for creating main product in xml
                    id = self.get_product_id(hxs)[0]
                    if id != self.id_list[index]:
                        msg = "\nNot equal, id in sheet {0}, on site {1}".format(self.id_list[index], id)
                        self.temp_msg += msg
                    item['product_id'] = [id]
                    item['name'] = self.get_name(hxs)
                    item['description'], item['promo_text'] = self.get_basic_info(hxs)
                    item['master_price'], item['discount_price'] = self.get_product_prices(hxs)
                    item['shop_look'] = ['False']
                    item['normal'] = ['True']
                    item['shop_line'] = ['False']
                    item['in_stock'] = ["NOT_IN_STOCK"]
                    if available[0] != "This item is no longer available for purchase.":
                        item['category_id'], item['subcategory_id'] = self.get_categories(hxs)
                        item['add_to_cart_id'] = self.get_add_to_cart_id(hxs)
                        color_names, urls, swatch_image_names, jsons = self.get_swatch_images(hxs)
                        #urls = basic.cdata_field(self.map_url_to_server(urls, id, True))
                        item['color_image_url'] = self.create_color_json(urls, color_names)
                        item['in_stock'] = ["IN_STOCK"]
                        item['product_page'] = [response.url]
                        self.xml.create_xml(item)
                        product_images, images_grouped = self.parse_jsons(jsons, color_names)
                        ids, sizes, prices = self.get_variants(page)
                        # calling function that will handle creating all child products
                        self.create_child_products(id, ids, sizes, prices, images_grouped)
                        item['image_urls'] = urls + product_images
                        if self.shop_look_list[index]:
                            self.parse_for_shop_look(hxs, self.shop_look_list[index],
                                                     id, page, images_grouped, response.url, index)
                        if self.shop_line_list[index]:
                            self.parse_for_shop_look(hxs, self.shop_line_list[index],
                                                     id, page, images_grouped, response.url, index)
                    else:
                        self.xml.create_xml(item)
                        self.exc.code_handler(102, response.url)

                else:
                    self.exc.code_handler(104, response.url)
            else:
                basic.not_provided()
                self.exc.code_handler(101, response.url)
            if not flag:
                item['product_id'] = [self.id_list[index]]
                item['in_stock'] = ["NOT_AVAILABLE"]
                item['name'] = ["not available"]
                self.xml.create_xml(item)
        except StandardError:
            self.exc.code_handler(100, response.url)
        #if it's last product write xml and run end_operations
        return item
コード例 #12
0
ファイル: kenneth_spider.py プロジェクト: marjevtic/testMarko
    def parse(self, response):
        self.counter += 1
        basic.print_status(self.counter, self.total)
        hxs = HtmlXPathSelector(response)
        item = KennethItem()
        #main try for script, run general except if error happens in code (send
        # url on mail where it happened)
        try:
            cur_url = response.url
                # search for noResultContent div on the page, if it exists keep
                # track, that product doesn't exist on
                # their page, otherwise continue scraping page
            available = hxs.select('//div[@id="noResultsContent"]').extract()

            if not available:
                index = self.products['urls'].index(cur_url)
                cur_id = self.get_product_id(cur_url)
                id = self.products['product_ids'][index]
                page = hxs.select('//div[@id="mainContent"]').extract()
                page = " ".join(page)
                item['name'], item['description'] = self.get_basic_info(hxs)
                price, new_p, old_p = self.get_prices(hxs)
                if new_p:
                    item['new_price'] = new_p
                    item['old_price'] = old_p
                else:
                    item['price'] = price
                desc = basic.clean_string(item['description'][0])
                item['description'] = [desc]
                urls = self.get_color_image(hxs)
                new = self.get_image_server_path(urls, id)
                item['color_image_urls'] = new
                self.export(item['color_image_urls'], [id], "swatchImage")
                jsons, images = self.we_also_recommend(cur_id, id)
                item['product_page'] = [cur_url]
                item['product_id'] = [id]
                item['add_to_cart_id'] = [cur_id]
                item['recommended_product'] = jsons
                item['in_stock'] = ["IN_STOCK"]
                self.products['status'][index] = "ran"
                images_or_404 = self.get_colors(hxs, page, id)
                if images_or_404 == 404:
                    item['in_stock'] = ["NOT_AVAILABLE"]
                self.xml.create_xml(item)
                item['image_urls'] = []
                if images_or_404 != 404:
                    item['image_urls'] += images_or_404
                item['image_urls'] += urls
                item['image_urls'] += images
                #self.export(item['image_urls'])
                #item['image_urls'] = [] #uncomment for donwloading images 

            else:
                # part for handling products that are not available
                cur_id = self.get_product_id(cur_url)
                cur_url = "http://www.kennethcole.com/product/index.jsp?"
                cur_url += "productId=" + str(cur_id)
                index = self.products['urls'].index(cur_url)
                self.products['status'][index] = "no_avail"
                item['product_id'] = [self.products['product_ids'][index]]
                if self.products['product_ids'][index]:
                    item['name'] = [self.products['names'][index]]
                else:
                    item['name'] = ["not available"]
                item['in_stock'] = ["NOT_AVAILABLE"]
                self.xml.create_xml(item)
                self.exc.code_handler(102, cur_url)
        except:
            # part for catching errors and keeping track of numbers of
            # it and urls where it happened
            print "Error occured scraping this product"
            index = self.products['urls'].index(cur_url)
            self.products['status'][index] = "error"
            self.exc.code_handler(100, cur_url)
        return item