def Variant_Lookup(item_id): # Keyword ='Carboflex 130' Operation = 'ItemLookup' ResponseGroup = 'Offers' SearchIndex = 'All' Service = 'AWSECommerceService' Timestamp = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.000Z") Timestamp = Timestamp.replace(":","%3A") string = """GET webservices.amazon.com /onca/xml AWSAccessKeyId=%s&AssociateTag=%s&ItemId=%s&Operation=%s&ResponseGroup=%s&Service=%s&Timestamp=%s""" %(AWS_AccessKeyId, AWS_AssociateTag, item_id.replace(',','%2C'), Operation, ResponseGroup.replace(',','%2C'), Service, Timestamp) signature = (base64.b64encode(hmac.new("lQoQOPJ+aqny5BtehWFTFjm9Lbdu1OcVlJC7ncx4", msg=string, digestmod=hashlib.sha256).digest())).replace("=","%3D").replace("+","%2B") path = 'http://webservices.amazon.com/onca/xml?AWSAccessKeyId=%s&AssociateTag=%s&ItemId=%s&Operation=%s&ResponseGroup=%s&Service=%s&Timestamp=%s' %(AWS_AccessKeyId, AWS_AssociateTag, item_id.replace(',','%2C'), Operation, ResponseGroup, Service, Timestamp) response = requests.request('GET', path + '&Signature='+signature) variant_dict = {} for variation in re.findall('<ASIN>(.*?)</Item>', response.text): try: ASIN = variation.split('</ASIN')[0] ASIN_price = re.findall('\$(.*?)</FormattedPrice>',variation)[0].strip('$') ASIN_price = converter.calculate_price(ASIN_price) variant_dict[ASIN] = ASIN_price except: pass return variant_dict
def parse_product(self, response): print "Parsing Product" sel = Selector(response) global output_file, count_output, mywriter if mycsv.calculate_filesize(output_file) > (22): # Limiting each output file to 22MB output_file = "%s-%s" % (output_file.split("-")[0], str(count_output)) count_output += 1 mywriter = mycsv.initialize_csv(output_file, category_name) dict = {} browse_nodes_list = [ node.strip().split("=")[-1] for node in sel.xpath("//div[@id='wayfinding-breadcrumbs_feature_div']/ul//a/@href").extract() ] dict.update(converter.clean_browsenodes(browse_nodes_list, category_name)) dict["item_name"] = sel.xpath("//span[@id='productTitle']/text()").extract()[0] dict["external_product_id"] = parent_ASIN = ( sel.xpath("//div[@id='tell-a-friend']/@data-dest").extract()[0].split("parentASIN=")[-1].split("&")[0] ) dict["item_sku"] = dict["part_number"] = "LYS" + dict["external_product_id"] try: dict["brand_name"] = sel.xpath("//a[@id='brand']/text()").extract()[0] except: dict["brand_name"] = sel.xpath("//a[@id='brand']/@href").extract()[0].split("/")[1] dict["manufacturer"] = dict["brand_name"] dict["item_length"], dict["item_height"], dict["item_width"], dict[ "item_dimensions_unit_of_measure" ] = converter.clean_dimensions( sel.xpath("//li[contains(text(),'inches')][contains(text(),'x')]/text()").extract() ) dict["product_description"] = " ".join( x for x in sel.xpath("//div[@id='productDescription']/p/text()").extract() ) dict.update( converter.clean_bullet_points(sel.xpath("//ul[@class='a-vertical a-spacing-none']//span/text()").extract()) ) dict["parent_child"] = "Parent" dict["department_name1"] = dict["target_gender"] = converter.clean_department_name(dict["item_name"]) dict["generic_keywords1"] = dict["generic_keywords"] = dict["item_name"] output__dict = mycsv.default_values(dict["item_name"], category_name) output__dict.update(dict) variant_script = sel.xpath( "//script[@language='JavaScript'][contains(text(),'window.isTwisterAUI = 1')]" ).extract() if ("shoes" in category_name.lower() and variant_script) or variant_script: print "Variants" """ Initializing Dictionaries for Variants(Asin, Variant Values), Pricing(Asin, Price) and Images(Asin, Images) """ variant_script = sel.xpath( "//script[@language='JavaScript'][contains(text(),'window.isTwisterAUI = 1')]" ).extract()[0] image_script = sel.xpath("//script[@type='text/javascript'][contains(text(),'customerImages')]").extract()[ 0 ] variant_dict, output__dict["variation_theme"] = variants.clean_variants( variant_script, image_script, dict, mywriter, category_name ) """Writing Parent Row""" mycsv.write__csv(output__dict, mywriter) """Writes Variants to CSV""" for asin, child_dict in variant_dict.iteritems(): mycsv.write__csv(child_dict, mywriter) else: images = sel.xpath("//img[@id='landingImage']/@data-a-dynamic-image").extract()[0] images = re.findall(r"(http.*?.jpg)", images) output__dict["main_image_url"] = images[0] for index, image in enumerate(images[1:], 1): output__dict["other_image_url" + str(index)] = image if index == 3: break output__dict["variation_theme"] = "" output__dict["standard_price"] = converter.calculate_price( sel.xpath("//span[@id='priceblock_ourprice']/text()| //span[@id='priceblock_saleprice']/text()") .extract()[0] .split("-")[-1] .strip("$") ) mycsv.write__csv(output__dict, mywriter)