def run_emailer(cls): from email.mime.base import MIMEBase from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText from email import Encoders import smtplib logger.info('start sending email to subscribers...') smtp = smtplib.SMTP(host=config.SMTP_HOST, port=config.SMTP_PORT) try: smtp.set_debuglevel(4) smtp.ehlo() smtp.starttls() smtp.ehlo() smtp.login(user=config.SMTP_USER, password=config.SMTP_PASSWORD) logger.info('established secure connection to smtp server...') toaddrs = [ user.email for user in User.findall() if user.subscription_status == 'subscribed' ] print toaddrs fromaddr = config.FROM_ADDR current_date_string = datetime.datetime.now().strftime('%Y-%m-%d') message_subject = "%s:%s" % (config.APP_NAME, current_date_string) message_text = "Thank you for subscribing %s. Please find the newly posted jobs as of %s" % ( config.APP_NAME, current_date_string) msg = MIMEMultipart() msg['From'] = fromaddr msg['To'] = '' msg['Cc'] = ','.join(toaddrs) msg['Subject'] = message_subject msg.attach(MIMEText(message_text)) part = MIMEBase('application', "octet-stream") file_format = 'xlsx' part.set_payload(JobItem.extract_records_as_bytes(file_format)) logger.info( 'attached extracted files to the mail...waiting to be sent..') Encoders.encode_base64(part) part.add_header( 'Content-Disposition', 'attachment; filename="extracted_jobs_%s.%s"' % (current_date_string, file_format)) msg.attach(part) smtp.sendmail(fromaddr, toaddrs, msg.as_string()) logger.info('done sending email to subscribers...') except Exception as e: logger.error(e) finally: smtp.quit()
def remove_old_records(cls, retention_days=14): conn = cls.connect_db() try: c = conn.cursor() c.execute("DELETE FROM " + cls.table_name + " WHERE publish_date < NOW() - INTERVAL '" + str( retention_days) + " days'") conn.commit() except Exception as e: conn.rollback() logger.error('Unable to remove the old records') logger.error(e) finally: conn.close()
def run_emailer(cls): from email.mime.base import MIMEBase from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText from email import Encoders import smtplib logger.info('start sending email to subscribers...') smtp = smtplib.SMTP(host=config.SMTP_HOST, port=config.SMTP_PORT) try: smtp.set_debuglevel(4) smtp.ehlo() smtp.starttls() smtp.ehlo() smtp.login(user=config.SMTP_USER, password=config.SMTP_PASSWORD) logger.info('established secure connection to smtp server...') toaddrs = [user.email for user in User.findall() if user.subscription_status == 'subscribed'] print toaddrs fromaddr = config.FROM_ADDR current_date_string = datetime.datetime.now().strftime('%Y-%m-%d') message_subject = "%s:%s" % (config.APP_NAME, current_date_string) message_text = "Thank you for subscribing %s. Please find the newly posted jobs as of %s" % ( config.APP_NAME, current_date_string) msg = MIMEMultipart() msg['From'] = fromaddr msg['To'] = '' msg['Cc'] = ','.join(toaddrs) msg['Subject'] = message_subject msg.attach(MIMEText(message_text)) part = MIMEBase('application', "octet-stream") file_format = 'xlsx' part.set_payload(JobItem.extract_records_as_bytes(file_format)) logger.info('attached extracted files to the mail...waiting to be sent..') Encoders.encode_base64(part) part.add_header('Content-Disposition', 'attachment; filename="extracted_jobs_%s.%s"' % (current_date_string, file_format)) msg.attach(part) smtp.sendmail(fromaddr, toaddrs, msg.as_string()) logger.info('done sending email to subscribers...') except Exception as e: logger.error(e) finally: smtp.quit()
def is_contact_blocked(cls, contact=''): if contact is None or contact == '': logger.debug('returning False as contact is None or Empty in is_contact_blocked()') return False conn = cls.connect_db() try: c = conn.cursor() c.execute('SELECT COUNT(*) FROM ' + cls.table_name + ' WHERE contact=?', (contact, )) return int(c.fetchone()[0]) > 0 except Exception as e: logger.error(e) logger.error('returning False as exception occurs in is_contact_blocked()') return False finally: conn.close()
def remove(self): conn = self.connect_db() try: c = conn.cursor() c.execute('DELETE FROM ' + self.table_name + ' WHERE ' + ' AND '.join(['%s=?' % property for property in self.key_properties]), tuple([getattr(self, property) for property in self.key_properties])) conn.commit() logger.info('Removed: %s' % self) except Exception as e: logger.error(e) logger.info('Unable to remove: %s' % self) conn.rollback() raise DatabaseError(str(e)) finally: conn.close()
def update(self): conn = self.connect_db() try: c = conn.cursor() c.execute(' UPDATE ' + self.table_name + ' SET ' + ', '.join(['%s=?' % property for property in self.property_names]) + ' WHERE ' + ' AND '.join(['%s=?' % property for property in self.key_properties]), tuple([getattr(self, property) for property in self.property_names] + [getattr(self, property) for property in self.key_properties])) conn.commit() logger.info('Updated: %s' % self) except Exception as e: logger.error(e) logger.info('Unable to update: %s' % self) conn.rollback() raise DatabaseError(str(e)) finally: conn.close()
def migrate_db(cls): """ place holder for putting the migrate db scripts -- need to be updated before every release :return: """ cls.create_db() conn = cls.datasource.get_connection() try: logger.info('start migrating database') User('meng', 'meng123', '*****@*****.**', 'admin').save() logger.info('done migrating database') except Exception as e: logger.error('Unable to run migrate_db') logger.error(e) finally: conn.close()
def should_be_rejected(cls, input_text=''): if input_text is None or input_text == '': logger.debug('returning False as input_text is None or Empty in should_be_rejected()') return False try: records = cls.findall() for record in records: match = re.search(record.reject_pattern, input_text) if match: logger.debug('returning True as input_text matches %s in should_be_rejected()' % record.reject_pattern) return True else: pass logger.debug('returning False as input_text does not match any patterns should_be_rejected()') return False except Exception as e: logger.error(e) logger.error('returning False as exception occurs in is_contact_blocked()') return False
def validate(cls, user=None): if user is not None: if user.username and user.username != '' and user.password and user.password != '': conn = cls.connect_db() try: c = conn.cursor() c.execute("SELECT COUNT(*) FROM " + cls.table_name + " WHERE username=? and password=?", (user.username, user.password)) return int(c.fetchone()[0]) > 0 except Exception as e: logger.error('failed to retrieve the item count') logger.error(e) return False finally: conn.close() else: logger.debug('username or password is empty.. hence returning false in validate()') return False else: return False
def is_exists(cls, item=None): if item: job_title = item.job_title if job_title: conn = cls.connect_db() try: c = conn.cursor() c.execute("SELECT COUNT(*) FROM " + cls.table_name + " WHERE job_title=?", (job_title,)) job_item_count = int(c.fetchone()[0]) return job_item_count > 0 except Exception as e: logger.error('failed to retrieve the item count') logger.error(e) return False finally: conn.close() else: logger.debug('item title is None.. hence returning true in is_exist()') return True else: logger.debug('item is None.. hence returning true in is_exist()') return True
def save(self): if self: if self.find(self) is None: conn = self.connect_db() try: c = conn.cursor() c.execute('INSERT INTO ' + self.table_name + '(' + ', '.join(self.property_names) + ') ' + 'VALUES (' + ', '.join(['?'] * len(self.property_names)) + ')', tuple([getattr(self, property_name) for property_name in self.property_names]) ) conn.commit() logger.info('Inserted item: %s' % self) except Exception as e: conn.rollback() logger.error('Unable to insert the item: %s' % self) logger.error(e) finally: conn.close() else: self.update()
def extract_records_as_bytes(cls, format='txt'): import xlsxwriter import unicodecsv import tempfile import os tmp_file = (tempfile.NamedTemporaryFile(prefix='zjobs.%s.' % cls.__name__, suffix=('.%s' % format), delete=False)).name try: records = cls.findall() if format.lower() == 'xlsx': workbook = xlsxwriter.Workbook(tmp_file, {'default_date_format': 'yyyy-mm-dd'}) worksheet = workbook.add_worksheet('crawled_jobs') worksheet.set_column('A:M', 40) worksheet.write_row(0, 0, [property_name.upper() for property_name in cls.property_names]) for rowIdx, record in enumerate(records): worksheet.write_row(rowIdx + 1, 0, [getattr(record, property_name) for property_name in cls.property_names]) workbook.close() elif format.lower() == 'csv': with open(tmp_file, 'w') as f: writer = unicodecsv.writer(f, encoding='utf-8') writer.writerow([property_name.upper() for property_name in cls.property_names]) for record in records: writer.writerow([getattr(record, property_name) for property_name in cls.property_names]) elif format.lower() == 'txt': with open(tmp_file, 'w') as f: f.write('\t'.join([property_name.upper() for property_name in cls.property_names]) + '\n') for record in records: f.write('\t'.join([repr(getattr(record, property_name)) if getattr(record, property_name) is not None else '' for property_name in cls.property_names]) + '\n') else: raise Exception("'%s' format is not supported" % format) file_content = open(tmp_file, 'rb').read() return file_content except Exception as e: logger.error(e) logger.error('Unable to extract all records as bytes') raise e finally: os.remove(tmp_file)
def create_db(cls): conn = cls.datasource.get_connection() try: c = conn.cursor() c.execute('DROP TABLE IF EXISTS CRAWLED_JOBS') c.execute('DROP INDEX IF EXISTS job_title_idx') c.execute(''' CREATE TABLE IF NOT EXISTS CRAWLED_JOBS( source text, crawled_date date, publish_date date, job_title text, job_desc text, job_details_link text, job_location text, job_country text, salary text, employer_name text, contact text ) ''') c.execute(''' CREATE UNIQUE INDEX job_title_idx ON CRAWLED_JOBS(job_title) ''') logger.info("created table and indexes for CRAWLED_JOBS") c.execute('DROP TABLE IF EXISTS JOB_REJECTION_RULES') c.execute('DROP INDEX IF EXISTS reject_pattern_idx') c.execute(''' CREATE TABLE IF NOT EXISTS JOB_REJECTION_RULES( reject_pattern text, reject_reason text ) ''') c.execute(''' CREATE UNIQUE INDEX reject_pattern_idx ON JOB_REJECTION_RULES(reject_pattern) ''') logger.info("created table and indexes for JOB_REJECTION_RULES") c.execute('DROP TABLE IF EXISTS BLOCKED_CONTACTS') c.execute('DROP INDEX IF EXISTS blocked_contacts_idx') c.execute(''' CREATE TABLE IF NOT EXISTS BLOCKED_CONTACTS( contact text, block_reason text ) ''') c.execute(''' CREATE UNIQUE INDEX blocked_contacts_idx ON BLOCKED_CONTACTS(contact) ''') logger.info("created table and indexes for BLOCKED_CONTACTS") c.execute('DROP TABLE IF EXISTS USERS') c.execute('DROP INDEX IF EXISTS users_idx') c.execute(''' CREATE TABLE IF NOT EXISTS USERS( username text, password text, email text, subscription_status text, role text, last_login_date date, register_date date ) ''') c.execute(''' CREATE UNIQUE INDEX users_idx ON USERS(username) ''') logger.info("created table and indexes for USERS") c.execute('DROP TABLE IF EXISTS DOCS') c.execute('DROP INDEX IF EXISTS docs_idx') c.execute(''' CREATE TABLE IF NOT EXISTS DOCS( filename text, content_type text, content bytea, uploaded_by text, uploaded_date date ) ''') c.execute(''' CREATE UNIQUE INDEX docs_idx ON DOCS(filename) ''') logger.info("created table and indexes for DOCS") conn.commit() logger.info('done create database') except Exception as e: logger.error('Unable to run create_db') logger.error(e) conn.rollback() finally: conn.close()