Example #1
0
 def __check_file(self, file_name):
     path = "xls/{0}/{1}.xls".format(self.name, file_name)
     print path
     if not self.os.path.exists(path):
         basic.warning("File you selected does not exist")
         basic.green("Files available for this script:")
         files = self.os.listdir("xls/{0}/".format(self.name))
         for f in files:
             if '.xls' in f and '.xlsx' not in f:
                 if not f.startswith('.'):
                     basic.green("\t{0}".format(f.replace(".xls", "")))
         self.os._exit(3)
Example #2
0
 def __check_file(self, file_name):
     path = "xls/{0}/{1}.xls".format(self.name, file_name)
     print path
     if not self.os.path.exists(path):
         basic.warning("File you selected does not exist")
         basic.green("Files available for this script:")
         files = self.os.listdir("xls/{0}/".format(self.name))
         for f in files:
             if '.xls' in f and '.xlsx' not in f:
                 if not f.startswith('.'):
                     basic.green("\t{0}".format(f.replace(".xls", "")))
         self.os._exit(3)
Example #3
0
 def spider_closed(self, spider):
     """Handles spider_closed signal from end of scraping.
     Handles usual end operations for scraper like writing xml, exporting
     to database and sending appropriate mail message."""
     msg = "Ran: {0}\n".format(datetime.now())
     if self.total - self.number:
         msg += "{0} id(s) from id list weren't found in feed".format(
             self.total - self.number)
         basic.warning(msg)
     else:
         msg += "All ids found in feed."
         basic.green(msg)
     # filename for writing xml
     if self.d['database']:
         try:
             self.database.connect()
             filename = self.database.get_name(self.d['catalog_id'])
             self.database.update_db(self.no_urls)
             self.database.disconnect()
             msg += "\nRan from interface.\n"
         except:
             msg += "\nUpdating database failed, please report."
     else:
         msg += "\nRan from console.\n"
         filename = self.d['file']
     self.xml.write_xml(self.name, filename)
     msg += self.exc.create_message(self.counter)
     #if self.d['upload']:
     #exp = CommonExport()
     #try:
     #exp.xml_to_db(self.name, self.d['file'], "40b029c9-dff7-4bc1-b8bc-ef062960b24d")
     #msg += "\n\nExport to database successful"
     #except StandardError:
     #msg += "\n\nExport to database failed"
     #else:
     #msg += "\n\nUpload to database not selected"
     from modules.mail import Mail
     mail = Mail()
     try:
         mail.send_mail(msg, "CelebratingHome: {0}".format(filename))
         if self.d['email']:
             mail.send_mail(msg, "CelebratingHome: {0}".format(filename),
                            self.d['email'])
     except:
         msg += "\nSending mail failed."
     if self.d['database']:
         path = "logs/{0}".format(self.name)
         if not os.path.exists(path):
             os.makedirs(path)
         with open("{0}/{1}".format(path, filename), 'w') as f:
             f.write(msg)
Example #4
0
 def spider_closed(self, spider):
     """Handles spider_closed signal from end of scraping.
     Handles usual end operations for scraper like writing xml, exporting
     to database and sending appropriate mail message."""
     msg = "Ran: {0}\n".format(datetime.now())
     if self.total - self.number:
         msg += "{0} id(s) from id list weren't found in feed".format(self.total - self.number)
         basic.warning(msg)
     else:
         msg += "All ids found in feed."
         basic.green(msg)
     # filename for writing xml
     if self.d['database']:
         try:
             self.database.connect()
             filename = self.database.get_name(self.d['catalog_id'])
             self.database.update_db(self.no_urls)
             self.database.disconnect()
             msg += "\nRan from interface.\n"
         except:
             msg += "\nUpdating database failed, please report."
     else:
         msg += "\nRan from console.\n"
         filename = self.d['file']
     self.xml.write_xml(self.name, filename)
     msg += self.exc.create_message(self.counter)
     #if self.d['upload']:
         #exp = CommonExport()
         #try:
             #exp.xml_to_db(self.name, self.d['file'], "40b029c9-dff7-4bc1-b8bc-ef062960b24d")
             #msg += "\n\nExport to database successful"
         #except StandardError:
             #msg += "\n\nExport to database failed"
     #else:
         #msg += "\n\nUpload to database not selected"
     from modules.mail import Mail
     mail = Mail()
     try:
         mail.send_mail(msg, "CelebratingHome: {0}".format(filename))
         if self.d['email']:
             mail.send_mail(msg, "CelebratingHome: {0}".format(filename), self.d['email'])
     except:
         msg += "\nSending mail failed."
     if self.d['database']:
         path = "logs/{0}".format(self.name)
         if not os.path.exists(path):
             os.makedirs(path)
         with open("{0}/{1}".format(path, filename), 'w') as f:
             f.write(msg)
Example #5
0
 def _check_mandatory(self):
     for k in self.mandatory:
         if k not in self.d:
             basic.warning("Option '{0}' is mandatory.".format(k))
             self.print_arguments()
             self.os._exit(3)
Example #6
0
 def _check_valid(self):
     msg = "Either 'file' option or 'database' option must be set"
     if not self.d['database'] and not self.d['file']:
         basic.warning(msg)
         self.print_arguments()
         self.os._exit(3)
Example #7
0
 def parse_whole_xml(self):
     xml_dir = "xml/{0}".format(self.name)
     file_url = "https://svc.celebratinghome.com/ZMags.svc/ProductInfo1"
     downloader = Downloader()
     if self.d['download']:
         downloader.get_file(xml_dir, file_url, "client_feed")
     else:
         if not os.path.exists('xml/{0}/client_feed.xml'.format(self.name)):
             basic.warning("Feed file doesn't exist please de-select no download option")
             os._exit(2)
     self.number = 0
     xml_item = ChomeItem()
     urls_all = []
     for event, elem in iterparse('xml/{0}/client_feed.xml'.format(self.name)):
         if elem.tag == "{http://schemas.microsoft.com/ado/2007/08/dataservices/metadata}properties":
             for r in elem:
                 p = "{http://schemas.microsoft.com/ado/2007/08/dataservices}"
                 if r.tag == p + "Id" and r.text in self.no_urls['product_ids']:
                     index = self.no_urls['product_ids'].index(r.text)
                     self.no_urls['status'][index] = 'ran'
                     self.number += 1
                     urls = []
                     flag = 0
                     for x in elem:
                         if x.tag == p + "Id":
                             xml_item['product_id'] = [x.text]
                         elif x.tag == p + "EngLongDesc" and x.text is not None:
                             xml_item['description_english'] = [self.escape(basic.cdata(x.text))]
                         elif x.tag == p + "RetailPrice":
                             xml_item['custom_price'] = [x.text[:-2]]
                         elif x.tag == p + "SpnLongDesc" and x.text is not None:
                             xml_item['description_spanish'] = [self.escape(basic.cdata(x.text))]
                         elif x.tag == p + "PartNumber":
                             xml_item['add_to_cart_id'] = [x.text]
                         elif x.tag == p + "MaxQty":
                             xml_item['max_qty'] = [x.text]
                         elif x.tag == p + "TimeType":
                             xml_item['time_type'] = [x.text]
                         elif x.tag == p + "SpnName" and x.text is not None:
                             xml_item['name_spanish'] = [x.text]
                         elif x.tag == p + "EngName":
                             xml_item['name_english'] = [x.text]
                         elif x.tag == p + "ImagePath_Large" and x.text is not None:
                             urls.append(self.get_absolute(x.text))
                             xml_item['normal_image_url'] = [self.get_server_path(self.get_absolute(x.text))]
                         elif x.tag == p + "IsActive":
                             if x.text == 0:
                                 xml_item['in_stock'] = ["NOT_IN_STOCK"]
                             else:
                                 xml_item['in_stock'] = ['IN_STOCK']
                         else:
                             for i in range(1, 4):
                                 tag = p + "Alternate%sImagePath_Large" % (str(i))
                                 if x.tag == tag and x.text is not None:
                                     urls.append(self.get_absolute(x.text))
                                     xml_item['normal_image_url'].append(self.get_server_path(self.get_absolute(x.text)))
                                     # change image paths for normal_image_url and return urls
                     self.xml.create_xml(xml_item)
                     urls_all += urls
     for i in range(0, len(self.no_urls['status'])):
         if self.no_urls['status'][i] != 'ran':
             self.no_urls['status'][i] = 'not_found'
     return urls_all
Example #8
0
 def parse_whole_xml(self):
     xml_dir = "xml/{0}".format(self.name)
     file_url = "https://svc.celebratinghome.com/ZMags.svc/ProductInfo1"
     downloader = Downloader()
     if self.d['download']:
         downloader.get_file(xml_dir, file_url, "client_feed")
     else:
         if not os.path.exists('xml/{0}/client_feed.xml'.format(self.name)):
             basic.warning(
                 "Feed file doesn't exist please de-select no download option"
             )
             os._exit(2)
     self.number = 0
     xml_item = ChomeItem()
     urls_all = []
     for event, elem in iterparse('xml/{0}/client_feed.xml'.format(
             self.name)):
         if elem.tag == "{http://schemas.microsoft.com/ado/2007/08/dataservices/metadata}properties":
             for r in elem:
                 p = "{http://schemas.microsoft.com/ado/2007/08/dataservices}"
                 if r.tag == p + "Id" and r.text in self.no_urls[
                         'product_ids']:
                     index = self.no_urls['product_ids'].index(r.text)
                     self.no_urls['status'][index] = 'ran'
                     self.number += 1
                     urls = []
                     flag = 0
                     for x in elem:
                         if x.tag == p + "Id":
                             xml_item['product_id'] = [x.text]
                         elif x.tag == p + "EngLongDesc" and x.text is not None:
                             xml_item['description_english'] = [
                                 self.escape(basic.cdata(x.text))
                             ]
                         elif x.tag == p + "RetailPrice":
                             xml_item['custom_price'] = [x.text[:-2]]
                         elif x.tag == p + "SpnLongDesc" and x.text is not None:
                             xml_item['description_spanish'] = [
                                 self.escape(basic.cdata(x.text))
                             ]
                         elif x.tag == p + "PartNumber":
                             xml_item['add_to_cart_id'] = [x.text]
                         elif x.tag == p + "MaxQty":
                             xml_item['max_qty'] = [x.text]
                         elif x.tag == p + "TimeType":
                             xml_item['time_type'] = [x.text]
                         elif x.tag == p + "SpnName" and x.text is not None:
                             xml_item['name_spanish'] = [x.text]
                         elif x.tag == p + "EngName":
                             xml_item['name_english'] = [x.text]
                         elif x.tag == p + "ImagePath_Large" and x.text is not None:
                             urls.append(self.get_absolute(x.text))
                             xml_item['normal_image_url'] = [
                                 self.get_server_path(
                                     self.get_absolute(x.text))
                             ]
                         elif x.tag == p + "IsActive":
                             if x.text == 0:
                                 xml_item['in_stock'] = ["NOT_IN_STOCK"]
                             else:
                                 xml_item['in_stock'] = ['IN_STOCK']
                         else:
                             for i in range(1, 4):
                                 tag = p + "Alternate%sImagePath_Large" % (
                                     str(i))
                                 if x.tag == tag and x.text is not None:
                                     urls.append(self.get_absolute(x.text))
                                     xml_item['normal_image_url'].append(
                                         self.get_server_path(
                                             self.get_absolute(x.text)))
                                     # change image paths for normal_image_url and return urls
                     self.xml.create_xml(xml_item)
                     urls_all += urls
     for i in range(0, len(self.no_urls['status'])):
         if self.no_urls['status'][i] != 'ran':
             self.no_urls['status'][i] = 'not_found'
     return urls_all
Example #9
0
 def _check_mandatory(self):
     for k in self.mandatory:
         if k not in self.d:
             basic.warning("Option '{0}' is mandatory.".format(k))
             self.print_arguments()
             self.os._exit(3)
Example #10
0
 def _check_valid(self):
     msg = "Either 'file' option or 'database' option must be set"
     if not self.d['database'] and not self.d['file']:
         basic.warning(msg)
         self.print_arguments()
         self.os._exit(3)