def __check_file(self, file_name): path = "xls/{0}/{1}.xls".format(self.name, file_name) print path if not self.os.path.exists(path): basic.warning("File you selected does not exist") basic.green("Files available for this script:") files = self.os.listdir("xls/{0}/".format(self.name)) for f in files: if '.xls' in f and '.xlsx' not in f: if not f.startswith('.'): basic.green("\t{0}".format(f.replace(".xls", ""))) self.os._exit(3)
def spider_closed(self, spider): """Handles spider_closed signal from end of scraping. Handles usual end operations for scraper like writing xml, exporting to database and sending appropriate mail message.""" msg = "Ran: {0}\n".format(datetime.now()) if self.total - self.number: msg += "{0} id(s) from id list weren't found in feed".format( self.total - self.number) basic.warning(msg) else: msg += "All ids found in feed." basic.green(msg) # filename for writing xml if self.d['database']: try: self.database.connect() filename = self.database.get_name(self.d['catalog_id']) self.database.update_db(self.no_urls) self.database.disconnect() msg += "\nRan from interface.\n" except: msg += "\nUpdating database failed, please report." else: msg += "\nRan from console.\n" filename = self.d['file'] self.xml.write_xml(self.name, filename) msg += self.exc.create_message(self.counter) #if self.d['upload']: #exp = CommonExport() #try: #exp.xml_to_db(self.name, self.d['file'], "40b029c9-dff7-4bc1-b8bc-ef062960b24d") #msg += "\n\nExport to database successful" #except StandardError: #msg += "\n\nExport to database failed" #else: #msg += "\n\nUpload to database not selected" from modules.mail import Mail mail = Mail() try: mail.send_mail(msg, "CelebratingHome: {0}".format(filename)) if self.d['email']: mail.send_mail(msg, "CelebratingHome: {0}".format(filename), self.d['email']) except: msg += "\nSending mail failed." if self.d['database']: path = "logs/{0}".format(self.name) if not os.path.exists(path): os.makedirs(path) with open("{0}/{1}".format(path, filename), 'w') as f: f.write(msg)
def spider_closed(self, spider): """Handles spider_closed signal from end of scraping. Handles usual end operations for scraper like writing xml, exporting to database and sending appropriate mail message.""" msg = "Ran: {0}\n".format(datetime.now()) if self.total - self.number: msg += "{0} id(s) from id list weren't found in feed".format(self.total - self.number) basic.warning(msg) else: msg += "All ids found in feed." basic.green(msg) # filename for writing xml if self.d['database']: try: self.database.connect() filename = self.database.get_name(self.d['catalog_id']) self.database.update_db(self.no_urls) self.database.disconnect() msg += "\nRan from interface.\n" except: msg += "\nUpdating database failed, please report." else: msg += "\nRan from console.\n" filename = self.d['file'] self.xml.write_xml(self.name, filename) msg += self.exc.create_message(self.counter) #if self.d['upload']: #exp = CommonExport() #try: #exp.xml_to_db(self.name, self.d['file'], "40b029c9-dff7-4bc1-b8bc-ef062960b24d") #msg += "\n\nExport to database successful" #except StandardError: #msg += "\n\nExport to database failed" #else: #msg += "\n\nUpload to database not selected" from modules.mail import Mail mail = Mail() try: mail.send_mail(msg, "CelebratingHome: {0}".format(filename)) if self.d['email']: mail.send_mail(msg, "CelebratingHome: {0}".format(filename), self.d['email']) except: msg += "\nSending mail failed." if self.d['database']: path = "logs/{0}".format(self.name) if not os.path.exists(path): os.makedirs(path) with open("{0}/{1}".format(path, filename), 'w') as f: f.write(msg)
def _check_mandatory(self): for k in self.mandatory: if k not in self.d: basic.warning("Option '{0}' is mandatory.".format(k)) self.print_arguments() self.os._exit(3)
def _check_valid(self): msg = "Either 'file' option or 'database' option must be set" if not self.d['database'] and not self.d['file']: basic.warning(msg) self.print_arguments() self.os._exit(3)
def parse_whole_xml(self): xml_dir = "xml/{0}".format(self.name) file_url = "https://svc.celebratinghome.com/ZMags.svc/ProductInfo1" downloader = Downloader() if self.d['download']: downloader.get_file(xml_dir, file_url, "client_feed") else: if not os.path.exists('xml/{0}/client_feed.xml'.format(self.name)): basic.warning("Feed file doesn't exist please de-select no download option") os._exit(2) self.number = 0 xml_item = ChomeItem() urls_all = [] for event, elem in iterparse('xml/{0}/client_feed.xml'.format(self.name)): if elem.tag == "{http://schemas.microsoft.com/ado/2007/08/dataservices/metadata}properties": for r in elem: p = "{http://schemas.microsoft.com/ado/2007/08/dataservices}" if r.tag == p + "Id" and r.text in self.no_urls['product_ids']: index = self.no_urls['product_ids'].index(r.text) self.no_urls['status'][index] = 'ran' self.number += 1 urls = [] flag = 0 for x in elem: if x.tag == p + "Id": xml_item['product_id'] = [x.text] elif x.tag == p + "EngLongDesc" and x.text is not None: xml_item['description_english'] = [self.escape(basic.cdata(x.text))] elif x.tag == p + "RetailPrice": xml_item['custom_price'] = [x.text[:-2]] elif x.tag == p + "SpnLongDesc" and x.text is not None: xml_item['description_spanish'] = [self.escape(basic.cdata(x.text))] elif x.tag == p + "PartNumber": xml_item['add_to_cart_id'] = [x.text] elif x.tag == p + "MaxQty": xml_item['max_qty'] = [x.text] elif x.tag == p + "TimeType": xml_item['time_type'] = [x.text] elif x.tag == p + "SpnName" and x.text is not None: xml_item['name_spanish'] = [x.text] elif x.tag == p + "EngName": xml_item['name_english'] = [x.text] elif x.tag == p + "ImagePath_Large" and x.text is not None: urls.append(self.get_absolute(x.text)) xml_item['normal_image_url'] = [self.get_server_path(self.get_absolute(x.text))] elif x.tag == p + "IsActive": if x.text == 0: xml_item['in_stock'] = ["NOT_IN_STOCK"] else: xml_item['in_stock'] = ['IN_STOCK'] else: for i in range(1, 4): tag = p + "Alternate%sImagePath_Large" % (str(i)) if x.tag == tag and x.text is not None: urls.append(self.get_absolute(x.text)) xml_item['normal_image_url'].append(self.get_server_path(self.get_absolute(x.text))) # change image paths for normal_image_url and return urls self.xml.create_xml(xml_item) urls_all += urls for i in range(0, len(self.no_urls['status'])): if self.no_urls['status'][i] != 'ran': self.no_urls['status'][i] = 'not_found' return urls_all
def parse_whole_xml(self): xml_dir = "xml/{0}".format(self.name) file_url = "https://svc.celebratinghome.com/ZMags.svc/ProductInfo1" downloader = Downloader() if self.d['download']: downloader.get_file(xml_dir, file_url, "client_feed") else: if not os.path.exists('xml/{0}/client_feed.xml'.format(self.name)): basic.warning( "Feed file doesn't exist please de-select no download option" ) os._exit(2) self.number = 0 xml_item = ChomeItem() urls_all = [] for event, elem in iterparse('xml/{0}/client_feed.xml'.format( self.name)): if elem.tag == "{http://schemas.microsoft.com/ado/2007/08/dataservices/metadata}properties": for r in elem: p = "{http://schemas.microsoft.com/ado/2007/08/dataservices}" if r.tag == p + "Id" and r.text in self.no_urls[ 'product_ids']: index = self.no_urls['product_ids'].index(r.text) self.no_urls['status'][index] = 'ran' self.number += 1 urls = [] flag = 0 for x in elem: if x.tag == p + "Id": xml_item['product_id'] = [x.text] elif x.tag == p + "EngLongDesc" and x.text is not None: xml_item['description_english'] = [ self.escape(basic.cdata(x.text)) ] elif x.tag == p + "RetailPrice": xml_item['custom_price'] = [x.text[:-2]] elif x.tag == p + "SpnLongDesc" and x.text is not None: xml_item['description_spanish'] = [ self.escape(basic.cdata(x.text)) ] elif x.tag == p + "PartNumber": xml_item['add_to_cart_id'] = [x.text] elif x.tag == p + "MaxQty": xml_item['max_qty'] = [x.text] elif x.tag == p + "TimeType": xml_item['time_type'] = [x.text] elif x.tag == p + "SpnName" and x.text is not None: xml_item['name_spanish'] = [x.text] elif x.tag == p + "EngName": xml_item['name_english'] = [x.text] elif x.tag == p + "ImagePath_Large" and x.text is not None: urls.append(self.get_absolute(x.text)) xml_item['normal_image_url'] = [ self.get_server_path( self.get_absolute(x.text)) ] elif x.tag == p + "IsActive": if x.text == 0: xml_item['in_stock'] = ["NOT_IN_STOCK"] else: xml_item['in_stock'] = ['IN_STOCK'] else: for i in range(1, 4): tag = p + "Alternate%sImagePath_Large" % ( str(i)) if x.tag == tag and x.text is not None: urls.append(self.get_absolute(x.text)) xml_item['normal_image_url'].append( self.get_server_path( self.get_absolute(x.text))) # change image paths for normal_image_url and return urls self.xml.create_xml(xml_item) urls_all += urls for i in range(0, len(self.no_urls['status'])): if self.no_urls['status'][i] != 'ran': self.no_urls['status'][i] = 'not_found' return urls_all