예제 #1
0
 def get_image_names(self, page):
     """Gets color names for color swatches."""
     temp = page.split("new DropDownInfo")
     names = {}
     for i in range(1, len(temp)):
         names[basic.get_middle_text(temp[i], "('", "'")[0]] = basic.get_middle_text(temp[i], "'", "')")[2]
     return  names
예제 #2
0
 def we_also_recommend(self, id, main_id):
     url = "http://www.res-x.com/ws/r2/Resonance.aspx?appid=kennethcole01&t"
     url += "k=154212870918247&ss=525178103419747&sg=1&pg=897706724574618&b"
     url += "x=true&vr=2.67&sc=product_rr&ev=product&ei=" + id + "&cu=&ct=k"
     url += "ennethcolec01&no=3&cb=r1eh&clk=&cv1=" + id + "&cv23=63&ur=http%"
     url += "3A//www.kennethcole.com/product/index.jsp%3FproductId%3D3" + id
     url += "&plk=&rf="
     import urllib2
     page = urllib2.urlopen(url).read()
     temp = page.split("certonaRecBoxes")
     images = []
     ids = []
     names = []
     prices = []
     urls = []
     # parsing data got from the upper url about we also recommend products
     for i in range(1, len(temp)):
         id = [basic.get_middle_text(temp[i], "d=", '\\"')[0]]
         image = basic.get_middle_text(temp[i], 'src=\\"', '\\"')[0]
         name = basic.get_middle_text(temp[i], 'alt=\\"', '\\"')
         price = basic.get_middle_text(temp[i], '<br>', '</a>')
         url = "http://www.kennethcole.com/product/index.jsp?productId="
         url += id[0]
         urls.append(url)
         ids.append(id)
         names.append(name)
         prices.append(price)
         images.append(image)
     jsons = self.make_json(ids, names, prices, self.get_image_server_path(images, main_id), urls)
     return jsons, images
예제 #3
0
 def get_reviews(self, page):
     """Gets average product rating.
     Returns string like 4.6 of 5 reviews."""
     id = self.get_review_id(page)
     url = "http://partylite.ugc.bazaarvoice.com/8504-en_us/" + id + "/reviews.djs?format=embeddedhtml"
     url = url.replace(" ", "")
     page = urllib2.urlopen(url).read()
     page = basic.get_middle_text(page, '<div class=\\"BVRRRatingNormalImage\\">', '<\/div>')
     if page:
         rating = basic.get_middle_text(page[0], 'alt=\\"', '\\"')[0]
         return [rating]
     else:
         return []
예제 #4
0
 def parse_jsons(self, jsons, color_names):
     """Parsing json from json urls.
     Returning all images in field, also returns them grouped by colors,
     so those groups can be used later when creating child products in xml"""
     images = []
     images_grouped = {}
     for i in range(0, len(jsons)):
         json = urllib2.urlopen(jsons[i]).read()
         image = basic.get_middle_text(json, '"expressfashion/', ";")
         rest_of_images = basic.get_middle_text(json, ',expressfashion/', ";")
         temp = image + rest_of_images
         images_grouped = basic.add_to_dict(images_grouped, color_names[i], temp)
         images += temp
     return self.get_absolute_url(images), images_grouped
예제 #5
0
 def get_swatch_image_name(self, image_sites):
     """Gets swatch image name from swatch image url"""
     image_names = []
     for x in range(0, len(image_sites)):
         name = basic.get_middle_text(image_sites[x], "fashion/", "_s")[0]
         image_names.append(name)
     return image_names
예제 #6
0
 def create_subproducts(self, page):
     """Gets information about colors from javascript.
     Returns field of dicts with information about colors.
     Those are really color variants for product."""
     try:
         tmp = page.split("var largeImages = new Array();")[1]
     except IndexError:
         print "This product has no images"
     else:
         tmp = tmp.split("colorDropdownArray")[0]
         images = basic.get_middle_text(tmp, "ProductGroupProduct(", ");")
         image_names = self.get_image_names(page)
         color_products = []
         for im in images:
             product = {}
             attributes = im.split("',")
             product['normal_image_url'] = "http://qa.partylite.biz/imaging/resize?fileName=/productcatalog/production"
             product['normal_image_url'] += self.custom_clean_string(attributes[26], True)
             product['description'] = basic.cdata(self.custom_clean_string(attributes[27]))
             product['color_id'] = self.custom_clean_string(attributes[7], True)
             product['swatch_color'] = basic.cdata(self.custom_clean_string(attributes[9]).replace(" ", ""))
             product['name'] = basic.cdata(image_names[product['color_id']])
             product['add_to_cart_id'] = self.custom_clean_string(attributes[0], True).replace(" ", "")
             product['price'] = self.custom_clean_string(attributes[10], True)
             color_products.append(product)
         return color_products
     return []
예제 #7
0
 def parse_shop_look(self, hxs):
     products = hxs.select('//div[@id="cat-ens-prod-item"]')
     i = 0
     # do this with actual id
     item = ExpressItem()
     whole_page = hxs.extract()
     whole_page = "".join(whole_page)
     ensemble_id = basic.get_middle_text(whole_page, "ensembleId: '", "',")
     name = hxs.select('//div[@id="cat-ens-prod-con"]/h1/text()').extract()
     name = basic.clean_string_field(name)
     item['ensemble_id'] = ensemble_id
     item['normal_image_url'] = self.shl_get_image(hxs)
     item['product_id'] = ["DUMMIE1"]
     item['shop_look'] = ['True']
     item['normal'] = ['False']
     item['shop_line'] = ['False']
     item['in_stock'] = ['IN_STOCK']
     item['name'] = name
     xml.create_xml(item)
     item.clear()
     for p in products:
         i += 1
         item = ExpressItem()
         item['master_product_id'] = ['DUMMIE1']
         item['product_id'] = ["DUMMIE1_" + str(i)]
         item['name'], item['price'], item['style'] = self.shl_basic_info(p)
         page = p.extract()
         item['variants'] = basic.cdata_field([self.shl_create_variants(self.get_variants(page))])
         item['colors'] = basic.cdata_field(self.shl_get_swatches(p))
         xml.create_xml(item)
예제 #8
0
 def get_rating(self, hxs):
     temp = hxs.select('//div[@id="Customerssay"]/p[2]/text()').extract()
     if temp:
         rating = basic.get_middle_text(temp[0].replace(" ", ""), "Rating:", "out")
         return rating, temp
     else:
         return [], temp
예제 #9
0
 def get_rating(self, hxs):
     temp = hxs.select('//div[@id="Customerssay"]/p[2]/text()').extract()
     if temp:
         rating = basic.get_middle_text(temp[0].replace(" ", ""), "Rating:",
                                        "out")
         return rating, temp
     else:
         return [], temp
예제 #10
0
 def get_colors(self, page, color_names):
     """Gets color information with images from javascript on the page.
     Returns  json with color name and imagself.images_store = "/" + settings['IMAGES_STORE']e url for that color, and
     returnes filed of image urls that can be used for download later"""
     script = basic.get_middle_text(page, 'var imageMap_0 = new Array();', '</script>')[0]
     colors = basic.get_middle_text(script, '] = ', ';')
     image_urls = []
     colors_json = []
     for i in range(0, len(color_names)):
         color = burton.replace_color_json(colors[i])
         color = simplejson.loads(color)
         color['cname'] = color_names[i]
         color.pop('reg')
         image_urls.append(color['enh'])
         color['enh'] = self.get_server_path(color['enh'])
         colors_json.append(basic.cdata(simplejson.dumps(color)))
     return colors_json, image_urls
예제 #11
0
 def get_extra_images(self, hxs):
     additional_images = hxs.select('//div[@id="AddImg"]/script/text()').extract()
     if additional_images:
         temp = basic.get_middle_text(additional_images[0], '"', '"')
         thumb_images = temp[0].split(",")
         return thumb_images
     else:
         return []
예제 #12
0
 def get_more_images(self, page):
     """Gets field of images."""
     try:
         script = basic.get_middle_text(page, "var moreImages", "var numberOfImages")[0]
     except IndexError:
         print "This product has no images."
     else:
         r = basic.get_middle_text(script, "moreImages[", "';")
         images = []
         # return cdata here if needed to go with absolute links
         for i in range(0, len(r)):
             if self.production:
                 images.append("http://www.partylite.biz" + r[i].split("= '")[1])
             else:
                 images.append("http://qa.partylite.biz" + r[i].split("= '")[1])
         return images
     return []
예제 #13
0
    def get_vars(self, response, hxs):
        headers1 = {
            'User-Agent':
            'Mozilla/5.0 (Windows NT 5.1; rv:13.0) Gecko/20100101 Firefox/13.0.1',
            'Host':
            'www.sportmann.no',
            'Accept':
            'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language':
            'en-us,en;q=0.5',
            'Accept-Charset':
            'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
            'Connection':
            'keep-alive',
            'Referer':
            '/product.aspx?productid=613232',
            'Cookie':
            'ASP.NET_SessionId=lurvsvrn3jxsfd45cedmsv45; Besok=922884e3-e9cb-4b69-b8c8-215f3cc988a9; __utma=184084580.1353376623.1312483243.1312483243.1312483243.1; __utmb=184084580.9.10.1312483243; __utmc=184084580; __utmz=184084580.1312483243.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)'
        }

        page = hxs.select('//html').extract()
        page = " ".join(page)

        viewst = basic.get_middle_text(page, 'id="__VIEWSTATE" value="', '"')
        eventval = basic.get_middle_text(page,
                                         'id="__EVENTVALIDATION" value="', '"')
        prevpage = [""]
        hidden_field = [""]

        r = requests.get(response.url, headers=headers1)

        page_one = r.content

        viewst_page = basic.get_middle_text(page_one,
                                            'id="__VIEWSTATE" value="', '"')
        eventval_page = basic.get_middle_text(
            page_one, 'id="__EVENTVALIDATION" value="', '"')
        prevpage_page = basic.get_middle_text(page_one,
                                              'id="__PREVIOUSPAGE" value="',
                                              '"')
        hidden_temp = page_one.split('id="__VIEWSTATE"')
        hidden_temp = hidden_temp[1].split('id="__PREVIOUSPAGE"')
        hidden_temp = hidden_temp[0].split('<script sr')

        val_x = len(hidden_temp) - 1

        hidden_temp = basic.get_middle_text(hidden_temp[val_x], 'c="', '"')
        hidden_temp_val = hidden_temp[0]
        hidden_temp_val = hidden_temp_val.replace('amp;', '')
        hidden_url = "http://www.sportmann.no" + hidden_temp_val

        request_hidden = urllib2.Request(hidden_url)
        response_hidden = urllib2.urlopen(request_hidden)
        hidden_field_page = basic.get_middle_text(
            response_hidden.read(),
            "ctl00_ScriptManager1_HiddenField').value += '", "';")

        return viewst[0], eventval[0], prevpage[0], hidden_field[
            0], viewst_page[0], eventval_page[0], prevpage_page[
                0], hidden_field_page[0]
예제 #14
0
    def get_colors(self, hxs, page, main_id):
        item = KennethItem()
        try:
            tmp = page.split('displays[0]')[1]
        except IndexError:
            print "This product is not available"
            return 404
        script = tmp.split('</script>')[0]
        displays = script.split("};")
        global counter
        ids = []
        images = []
        color_ids = []
        sizes_script = self.get_sizes_part_page(page)
        color_internal_code = {}

        for x in range(0, len(displays) - 1):
            id = basic.get_middle_text(displays[x], 'colorId: "', '"')
            ids.append(id[0])
            reg = displays[x].count("Reg")
            images_in = []
            for i in range(1, reg + 1):
                image = basic.get_middle_text(displays[x], "vw" + str(i) + 'Reg: "', '"')
                if len(image) == 0:
                    image = basic.get_middle_text(displays[x], "vw" + str(i) + 'Reg:"', '"')
                if (len(image) > 0):
                    if (image[0] != "null"):
                        images_in.append(image[0])

            if not images_in:
                images_in = hxs.select('//input[@name="productImage"]/@value').extract()
            color_ids.append(str(main_id) + "_" + str(x))
            item['product_id'] = [str(main_id) + "_" + str(x)]
            item['color_option_id'] = id
            item['master_product_id'] = [main_id]
            item['normal_image_url'] = self.get_image_server_path(images_in, main_id)
            item['thumb_image_url'] = self.get_image_server_path_thumb(images_in, main_id)
            item['in_stock'] = ["NOT_IN_STOCK"]
            item['color'] = self.get_color_name(sizes_script, id[0])
            color_internal_code[id[0]] = str(x)
            self.xml.create_xml(item)
            images += images_in
            self.export(item['normal_image_url'], item['product_id'], "productImage")
        self.get_sizes(sizes_script, ids, main_id, color_internal_code)
        return images
예제 #15
0
 def get_extra_images(self, hxs):
     additional_images = hxs.select(
         '//div[@id="AddImg"]/script/text()').extract()
     if additional_images:
         temp = basic.get_middle_text(additional_images[0], '"', '"')
         thumb_images = temp[0].split(",")
         return thumb_images
     else:
         return []
예제 #16
0
 def get_colors(self, page, color_names):
     """Gets color information with images from javascript on the page.
     Returns  json with color name and imagself.images_store = "/" + settings['IMAGES_STORE']e url for that color, and
     returnes filed of image urls that can be used for download later"""
     script = basic.get_middle_text(page, 'var imageMap_0 = new Array();',
                                    '</script>')[0]
     colors = basic.get_middle_text(script, '] = ', ';')
     image_urls = []
     colors_json = []
     for i in range(0, len(color_names)):
         color = burton.replace_color_json(colors[i])
         color = simplejson.loads(color)
         color['cname'] = color_names[i]
         color.pop('reg')
         image_urls.append(color['enh'])
         color['enh'] = self.get_server_path(color['enh'])
         colors_json.append(basic.cdata(simplejson.dumps(color)))
     return colors_json, image_urls
예제 #17
0
 def get_imagesets(self, hxs):
     """Function for getting image set in case where there is no color for product.
     Gets image set info from the javascript on the page and selects only first one,
     if there is more because there is only one color to associate with (no_color)"""
     page = hxs.extract()
     print len(page)
     iset = basic.get_middle_text(page, 'imagesets = "', '"; //Change')
     iset = iset[0].split(',')
     return [iset[0]]
예제 #18
0
 def get_variants(self, page):
     """Gets jsons for colors with all available sizes.
     In json are also fetched all information for sizes that are on the site
     """
     script = basic.get_middle_text(page, 'var skuSizeColorObj = new Array();', '</script>')[0]
     sizes = []
     image_urls = []
     color_names = []
     colors = script.split('skuSizeColorObj')
     for c in range(1, len(colors)):
         temp = basic.get_middle_text(colors[c], '= ', ';')
         # delete swatch image as it obviously won't be needed
         t = simplejson.loads(burton.replace_for_json(temp[0]))
         image_urls.append(t['swatchURL'])
         color_names.append(t['ColorDesc'])
         t['swatchURL'] = self.get_server_path(t['swatchURL'])
         sizes.append(basic.cdata(simplejson.dumps(t)))
     return sizes, image_urls, color_names
예제 #19
0
 def get_variants(self, page):
     """Gets jsons for colors with all available sizes.
     In json are also fetched all information for sizes that are on the site
     """
     script = basic.get_middle_text(page,
                                    'var skuSizeColorObj = new Array();',
                                    '</script>')[0]
     sizes = []
     image_urls = []
     color_names = []
     colors = script.split('skuSizeColorObj')
     for c in range(1, len(colors)):
         temp = basic.get_middle_text(colors[c], '= ', ';')
         # delete swatch image as it obviously won't be needed
         t = simplejson.loads(burton.replace_for_json(temp[0]))
         image_urls.append(t['swatchURL'])
         color_names.append(t['ColorDesc'])
         t['swatchURL'] = self.get_server_path(t['swatchURL'])
         sizes.append(basic.cdata(simplejson.dumps(t)))
     return sizes, image_urls, color_names
예제 #20
0
 def get_variants(self, page):
     """Getting variants from javascript on the page.
     Returns three dicts ids, sizes and prices. Format of the dicts is like
     (key = color, value = field of (ids, sizes and prices))"""
     temp = page.split("// Load the product variants")[1]
     temp = temp.split("// Set the field to update with the product variant")[0]
     variants = temp.split("// Create the variant")
     sizes = {}
     ids = {}
     prices = {}
     for i in range(1, len(variants)):
         color = basic.get_middle_text(variants[i], "Color','", "')")
         if color:
             color = color[0]
         else:
             color = "no_color"
         ids = basic.add_to_dict(ids, color, basic.get_middle_text(variants[i], "setId('", "')")[0])
         if variants[i].find("Size','") != -1:
             sizes = basic.add_to_dict(sizes, color, basic.get_middle_text(variants[i], "Size','", "')")[0])
         prices = basic.add_to_dict(prices, color, basic.get_middle_text(variants[i], 'numericPrice="', '"')[0])
     return ids, sizes, prices
예제 #21
0
    def get_vars(self, response, hxs):
        headers1 = {
            "User-Agent": "Mozilla/5.0 (Windows NT 5.1; rv:13.0) Gecko/20100101 Firefox/13.0.1",
            "Host": "www.sportmann.no",
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
            "Accept-Language": "en-us,en;q=0.5",
            "Accept-Charset": "ISO-8859-1,utf-8;q=0.7,*;q=0.7",
            "Connection": "keep-alive",
            "Referer": "/product.aspx?productid=613232",
            "Cookie": "ASP.NET_SessionId=lurvsvrn3jxsfd45cedmsv45; Besok=922884e3-e9cb-4b69-b8c8-215f3cc988a9; __utma=184084580.1353376623.1312483243.1312483243.1312483243.1; __utmb=184084580.9.10.1312483243; __utmc=184084580; __utmz=184084580.1312483243.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)",
        }

        page = hxs.select("//html").extract()
        page = " ".join(page)

        viewst = basic.get_middle_text(page, 'id="__VIEWSTATE" value="', '"')
        eventval = basic.get_middle_text(page, 'id="__EVENTVALIDATION" value="', '"')
        prevpage = [""]
        hidden_field = [""]

        r = requests.get(response.url, headers=headers1)

        page_one = r.content

        viewst_page = basic.get_middle_text(page_one, 'id="__VIEWSTATE" value="', '"')
        eventval_page = basic.get_middle_text(page_one, 'id="__EVENTVALIDATION" value="', '"')
        prevpage_page = basic.get_middle_text(page_one, 'id="__PREVIOUSPAGE" value="', '"')
        hidden_temp = page_one.split('id="__VIEWSTATE"')
        hidden_temp = hidden_temp[1].split('id="__PREVIOUSPAGE"')
        hidden_temp = hidden_temp[0].split("<script sr")

        val_x = len(hidden_temp) - 1

        hidden_temp = basic.get_middle_text(hidden_temp[val_x], 'c="', '"')
        hidden_temp_val = hidden_temp[0]
        hidden_temp_val = hidden_temp_val.replace("amp;", "")
        hidden_url = "http://www.sportmann.no" + hidden_temp_val

        request_hidden = urllib2.Request(hidden_url)
        response_hidden = urllib2.urlopen(request_hidden)
        hidden_field_page = basic.get_middle_text(
            response_hidden.read(), "ctl00_ScriptManager1_HiddenField').value += '", "';"
        )

        return (
            viewst[0],
            eventval[0],
            prevpage[0],
            hidden_field[0],
            viewst_page[0],
            eventval_page[0],
            prevpage_page[0],
            hidden_field_page[0],
        )
예제 #22
0
 def get_sizes(self, page, ids, main_id, color_internal_code):
     options = page.split("};")
     skus = {}
     colors_name = {}
     inStocks = {}
     sizes = {}
     prices = {}
     for x in range(0, len(options) - 1):
         id = basic.get_middle_text(options[x], 'cId: "', '"')
         for i in range(0, len(ids)):
             if (id[0] == ids[i]):
                 sku = basic.get_middle_text(options[x], 'sku: ', ',s')
                 sku = re.sub("[^0-9]", "", sku[0])
                 skus = self.add_to_dict(skus, ids[i], sku)
                 size = basic.get_middle_text(options[x], 'sDesc: "', '"')
                 sizes = self.add_to_dict(sizes, ids[i], size[0])
                 price = basic.get_middle_text(options[x], 'price: "', '"')
                 price = self.clean_price(price[0])
                 prices = self.add_to_dict(prices, ids[i], price[0])
                 available = basic.get_middle_text(options[x], 'avail: "', '"')
                 inStocks = self.add_to_dict(inStocks, ids[i], available[0])
     self.create_subproducts_xml(main_id, color_internal_code, colors_name, sizes, skus, inStocks, prices)
     return main_id, colors_name, sizes, skus, inStocks, prices
예제 #23
0
    def get_images(self, hxs):
        page = hxs.select('//html').extract()
        page = " ".join(page)

        images = []

        temp = page.split('class="gallery_demo_unstyled"')
        temp = temp[1].split('<div class="right_container">')
        temp = basic.get_middle_text(temp[0], 'src="', '"')

        for i in range(0, len(temp)):
            image_url = "http://www.sportmann.no" + temp[i]
            images.append(image_url)

        return images
예제 #24
0
    def get_images(self, hxs):
        page = hxs.select("//html").extract()
        page = " ".join(page)

        images = []

        temp = page.split('class="gallery_demo_unstyled"')
        temp = temp[1].split('<div class="right_container">')
        temp = basic.get_middle_text(temp[0], 'src="', '"')

        for i in range(0, len(temp)):
            image_url = "http://www.sportmann.no" + temp[i]
            images.append(image_url)

        return images
예제 #25
0
    def get_basic_info(self, hxs):
        name = hxs.select('//div[@id="fragment-1"]/h2/text()').extract()

        short_desc = hxs.select(
            '//div[@class="description2"]/text()').extract()

        description = hxs.select(
            '//div[@id="fragment-1"]/div[@class="description"]').extract()
        description = sportman.delete_tags(re, description[0])
        description = [basic.cdata(description)]

        old_price = hxs.select('//span[@class="oldprice"]/text()').extract()
        if (old_price != []):
            old_price = " ".join(old_price)
            old_price = old_price.split(':')
            old_price = old_price[1].replace('Kr', '')
            old_price = [old_price.replace(" ", "")]
        else:
            old_price = old_price

        price = hxs.select('//span[@class="nowprice"]/text()').extract()
        if (price != []):
            price = " ".join(price)
            price = price.split(':')
            price = price[1].replace('Kr', '')
            price = [price.replace(" ", "")]
        else:
            price = hxs.select('//span[@class="normalprice"]/text()').extract()
            price = " ".join(price)
            price = price.split(':')
            price = price[1].replace('Kr', '')
            price = [price.replace(" ", "")]

        id = hxs.select('//div[@class="articlenumber"]').extract()
        id = " ".join(id)
        id = id.replace(u"\xa0", "")
        id = basic.get_middle_text(id, 'Art.nr.', '</div>')
        sku = id
        id = [id[0]]

        return name, short_desc, description, old_price, price, id, sku
예제 #26
0
    def get_basic_info(self, hxs):
        name = hxs.select('//div[@id="fragment-1"]/h2/text()').extract()

        short_desc = hxs.select('//div[@class="description2"]/text()').extract()

        description = hxs.select('//div[@id="fragment-1"]/div[@class="description"]').extract()
        description = sportman.delete_tags(re, description[0])
        description = [basic.cdata(description)]

        old_price = hxs.select('//span[@class="oldprice"]/text()').extract()
        if old_price != []:
            old_price = " ".join(old_price)
            old_price = old_price.split(":")
            old_price = old_price[1].replace("Kr", "")
            old_price = [old_price.replace(" ", "")]
        else:
            old_price = old_price

        price = hxs.select('//span[@class="nowprice"]/text()').extract()
        if price != []:
            price = " ".join(price)
            price = price.split(":")
            price = price[1].replace("Kr", "")
            price = [price.replace(" ", "")]
        else:
            price = hxs.select('//span[@class="normalprice"]/text()').extract()
            price = " ".join(price)
            price = price.split(":")
            price = price[1].replace("Kr", "")
            price = [price.replace(" ", "")]

        id = hxs.select('//div[@class="articlenumber"]').extract()
        id = " ".join(id)
        id = id.replace(u"\xa0", "")
        id = basic.get_middle_text(id, "Art.nr.", "</div>")
        sku = id
        id = [id[0]]

        return name, short_desc, description, old_price, price, id, sku
예제 #27
0
    def get_variants(self, hxs, response):
        page = hxs.select("//html").extract()
        page = " ".join(page)
        dict_one = {}
        test_one = []

        temp = page.split('<div class="color">')
        temp = temp[1].split("</div>")
        temp = temp[0].split("<select name")

        viewstate, eventvalidation, previouspage, hiddenfield, view_page, even_page, pre_page, hidd_page = self.get_vars(
            response, hxs
        )

        if len(temp) == 1:
            color = hxs.select('//div[@class="color"]/text()').extract()
            value = hxs.select('//input[@id="ctl00_ContentPlaceHolder1_Variant1Hidden"]/@value').extract()
            color[0] = color[0].replace("  ", "")
            color = basic.clean_string(color[0])
            value = value[0]

        #            color = basic.clean_string(color[0])
        #            color = color.replace("  ","")
        #
        #            dict['color'] = color
        #            dict['color_value'] = value[0]

        else:
            test_color = basic.get_middle_text(temp[1], "farge</option>", "</select>")
            color = basic.get_middle_text(test_color[0], '">', "</option>")
            value = basic.get_middle_text(test_color[0], 'value="', '">')

            for i in range(0, len(color)):
                color[i] = color[i].replace("  ", "")
            #
            #                dict['color'] = color
            #                dict['color_value'] = value

        size_temp = page.split('<div class="size">')
        size_temp = size_temp[1].split("</div>")
        size_temp = size_temp[0].split("<select name")

        if len(size_temp) == 1:
            size = hxs.select('//div[@class="size"]/text()').extract()
            size = basic.clean_string(size[0])
            size = [size.replace("   ", "")]

            size_val = hxs.select('//input[@id="ctl00_ContentPlaceHolder1_Variant2Hidden"]/@value').extract()

            if size[0] == "":
                for i in range(len(value)):
                    resp_page = self.get_data(response, hidd_page, view_page, pre_page, even_page, value[i])

                    a_page = resp_page.split('<div class="siz')
                    a_page = a_page[1].split("</select>")

                    if len(a_page) == 1:

                        size = basic.get_middle_text(a_page[0], 'e">', '<input type="hidden"')
                        size_val = basic.get_middle_text(a_page[0], 'value="', '"')
                        size_val = size_val[0]
                        size_val = [size_val]

                    else:
                        a_page = basic.get_middle_text(a_page[0], "se</option>", "</select>")
                        size = basic.get_middle_text(a_page[0], '">', "</option>")
                        size_val = basic.get_middle_text(a_page[0], 'value="', '">')

                    dict_one["color"] = color[i]
                    dict_one["color_value"] = value[i]
                    dict_one["size_value"] = size_val

                    for x in range(0, len(size)):
                        size[x] = basic.clean_string(size[x])
                        size[x] = size[x].replace("   ", "")

                        dict_one["size"] = size

                    test_one.append(basic.cdata(json.dumps(dict_one)))

            else:
                dict_one["color"] = color

                dict_one["color_value"] = value
                dict_one["size"] = size
                dict_one["size_value"] = size_val
                test_one.append(basic.cdata(simplejson.dumps(dict_one)))

        else:
            test_size = basic.get_middle_text(size_temp[1], "se</option>", "</select>")
            size = basic.get_middle_text(test_size[0], '">', "</option>")
            size_val = basic.get_middle_text(test_size[0], 'value="', '">')

            for x in range(0, len(size)):
                size[x] = basic.clean_string(size[x])
                size[x] = size[x].replace("   ", "")

            dict_one["color"] = color
            dict_one["color_value"] = value
            dict_one["size"] = size
            dict_one["size_value"] = size_val

            test_one.append(basic.cdata(json.dumps(dict_one)))

        return test_one
예제 #28
0
    def get_variants(self, hxs, response):
        page = hxs.select('//html').extract()
        page = " ".join(page)
        dict_one = {}
        test_one = []

        temp = page.split('<div class="color">')
        temp = temp[1].split('</div>')
        temp = temp[0].split('<select name')

        viewstate, eventvalidation, previouspage, hiddenfield, view_page, even_page, pre_page, hidd_page = self.get_vars(
            response, hxs)

        if (len(temp) == 1):
            color = hxs.select('//div[@class="color"]/text()').extract()
            value = hxs.select(
                '//input[@id="ctl00_ContentPlaceHolder1_Variant1Hidden"]/@value'
            ).extract()
            color[0] = color[0].replace("  ", "")
            color = basic.clean_string(color[0])
            value = value[0]

        #            color = basic.clean_string(color[0])
        #            color = color.replace("  ","")
        #
        #            dict['color'] = color
        #            dict['color_value'] = value[0]

        else:
            test_color = basic.get_middle_text(temp[1], 'farge</option>',
                                               '</select>')
            color = basic.get_middle_text(test_color[0], '">', '</option>')
            value = basic.get_middle_text(test_color[0], 'value="', '">')

            for i in range(0, len(color)):
                color[i] = color[i].replace("  ", "")
            #
            #                dict['color'] = color
            #                dict['color_value'] = value

        size_temp = page.split('<div class="size">')
        size_temp = size_temp[1].split('</div>')
        size_temp = size_temp[0].split('<select name')

        if (len(size_temp) == 1):
            size = hxs.select('//div[@class="size"]/text()').extract()
            size = basic.clean_string(size[0])
            size = [size.replace("   ", "")]

            size_val = hxs.select(
                '//input[@id="ctl00_ContentPlaceHolder1_Variant2Hidden"]/@value'
            ).extract()

            if size[0] == "":
                for i in range(len(value)):
                    resp_page = self.get_data(response, hidd_page, view_page,
                                              pre_page, even_page, value[i])

                    a_page = resp_page.split('<div class="siz')
                    a_page = a_page[1].split('</select>')

                    if len(a_page) == 1:

                        size = basic.get_middle_text(a_page[0], 'e">',
                                                     '<input type="hidden"')
                        size_val = basic.get_middle_text(
                            a_page[0], 'value="', '"')
                        size_val = size_val[0]
                        size_val = [size_val]

                    else:
                        a_page = basic.get_middle_text(a_page[0],
                                                       'se</option>',
                                                       '</select>')
                        size = basic.get_middle_text(a_page[0], '">',
                                                     '</option>')
                        size_val = basic.get_middle_text(
                            a_page[0], 'value="', '">')

                    dict_one["color"] = color[i]
                    dict_one["color_value"] = value[i]
                    dict_one["size_value"] = size_val

                    for x in range(0, len(size)):
                        size[x] = basic.clean_string(size[x])
                        size[x] = size[x].replace("   ", "")

                        dict_one["size"] = size

                    test_one.append(basic.cdata(json.dumps(dict_one)))

            else:
                dict_one["color"] = color

                dict_one["color_value"] = value
                dict_one['size'] = size
                dict_one['size_value'] = size_val
                test_one.append(basic.cdata(simplejson.dumps(dict_one)))

        else:
            test_size = basic.get_middle_text(size_temp[1], 'se</option>',
                                              '</select>')
            size = basic.get_middle_text(test_size[0], '">', '</option>')
            size_val = basic.get_middle_text(test_size[0], 'value="', '">')

            for x in range(0, len(size)):
                size[x] = basic.clean_string(size[x])
                size[x] = size[x].replace("   ", "")

            dict_one["color"] = color
            dict_one["color_value"] = value
            dict_one['size'] = size
            dict_one['size_value'] = size_val

            test_one.append(basic.cdata(json.dumps(dict_one)))

        return test_one
예제 #29
0
 def get_product_id(self, hxs):
     temp = hxs.select('//div[@id="wrap"]/script/text()').extract()
     id = basic.get_middle_text(temp[0], 'productid","', '"')
     return id[0]
예제 #30
0
 def get_all_sizes(self, page):
     script = basic.get_middle_text(page, 'var distsizeobj=new Array();', 'var indexcolor=0;')[0]
     all_sizes = basic.get_middle_text(script, ']="','";')
     return [basic.cdata(simplejson.dumps(all_sizes))]
예제 #31
0
 def get_add_to_cart_id(self, page):
     """Gets add to cart id from the javascript on the page."""
     tmp = basic.get_middle_text(page, "if(isOrderStarted){", "}else")[0]
     tmp = basic.get_middle_text(tmp, "addItemToCart(", ",")
     return tmp
예제 #32
0
 def get_review_id(self, page):
     """Gets review id that is used in javascript for reviews."""
     return basic.get_middle_text(page, 'productId: "', '"')[0]
예제 #33
0
 def shl_get_image(self, hxs):
     page = hxs.extract()
     image = basic.get_middle_text(page, 'imagesets = "', '";')
     image = "http://t.express.com/com/scene7/s7d5/=/is/image/expressfashion/%s/i81" % (image[0])
     return [image]
예제 #34
0
 def get_all_sizes(self, page):
     script = basic.get_middle_text(page, 'var distsizeobj=new Array();',
                                    'var indexcolor=0;')[0]
     all_sizes = basic.get_middle_text(script, ']="', '";')
     return [basic.cdata(simplejson.dumps(all_sizes))]
예제 #35
0
 def get_product_id(self, hxs):
     temp = hxs.select('//div[@id="wrap"]/script/text()').extract()
     id = basic.get_middle_text(temp[0], 'productid","', '"')
     return id[0]