Ejemplo n.º 1
0
    def run_emailer(cls):
        from email.mime.base import MIMEBase
        from email.mime.multipart import MIMEMultipart
        from email.mime.text import MIMEText
        from email import Encoders
        import smtplib

        logger.info('start sending email to subscribers...')
        smtp = smtplib.SMTP(host=config.SMTP_HOST, port=config.SMTP_PORT)

        try:
            smtp.set_debuglevel(4)
            smtp.ehlo()
            smtp.starttls()
            smtp.ehlo()
            smtp.login(user=config.SMTP_USER, password=config.SMTP_PASSWORD)

            logger.info('established secure connection to smtp server...')

            toaddrs = [
                user.email for user in User.findall()
                if user.subscription_status == 'subscribed'
            ]
            print toaddrs
            fromaddr = config.FROM_ADDR

            current_date_string = datetime.datetime.now().strftime('%Y-%m-%d')
            message_subject = "%s:%s" % (config.APP_NAME, current_date_string)
            message_text = "Thank you for subscribing %s. Please find the newly posted jobs as of %s" % (
                config.APP_NAME, current_date_string)

            msg = MIMEMultipart()
            msg['From'] = fromaddr
            msg['To'] = ''
            msg['Cc'] = ','.join(toaddrs)
            msg['Subject'] = message_subject
            msg.attach(MIMEText(message_text))

            part = MIMEBase('application', "octet-stream")
            file_format = 'xlsx'
            part.set_payload(JobItem.extract_records_as_bytes(file_format))
            logger.info(
                'attached extracted files to the mail...waiting to be sent..')
            Encoders.encode_base64(part)
            part.add_header(
                'Content-Disposition',
                'attachment; filename="extracted_jobs_%s.%s"' %
                (current_date_string, file_format))
            msg.attach(part)

            smtp.sendmail(fromaddr, toaddrs, msg.as_string())
            logger.info('done sending email to subscribers...')
        except Exception as e:
            logger.error(e)
        finally:
            smtp.quit()
Ejemplo n.º 2
0
 def remove_old_records(cls, retention_days=14):
     conn = cls.connect_db()
     try:
         c = conn.cursor()
         c.execute("DELETE FROM " + cls.table_name + " WHERE publish_date < NOW() - INTERVAL '" + str(
             retention_days) + " days'")
         conn.commit()
     except Exception as e:
         conn.rollback()
         logger.error('Unable to remove the old records')
         logger.error(e)
     finally:
         conn.close()
Ejemplo n.º 3
0
    def run_emailer(cls):
        from email.mime.base import MIMEBase
        from email.mime.multipart import MIMEMultipart
        from email.mime.text import MIMEText
        from email import Encoders
        import smtplib

        logger.info('start sending email to subscribers...')
        smtp = smtplib.SMTP(host=config.SMTP_HOST, port=config.SMTP_PORT)

        try:
            smtp.set_debuglevel(4)
            smtp.ehlo()
            smtp.starttls()
            smtp.ehlo()
            smtp.login(user=config.SMTP_USER, password=config.SMTP_PASSWORD)

            logger.info('established secure connection to smtp server...')

            toaddrs = [user.email for user in User.findall() if user.subscription_status == 'subscribed']
            print toaddrs
            fromaddr = config.FROM_ADDR

            current_date_string = datetime.datetime.now().strftime('%Y-%m-%d')
            message_subject = "%s:%s" % (config.APP_NAME, current_date_string)
            message_text = "Thank you for subscribing %s. Please find the newly posted jobs as of %s" % (
                config.APP_NAME, current_date_string)

            msg = MIMEMultipart()
            msg['From'] = fromaddr
            msg['To'] = ''
            msg['Cc'] = ','.join(toaddrs)
            msg['Subject'] = message_subject
            msg.attach(MIMEText(message_text))

            part = MIMEBase('application', "octet-stream")
            file_format = 'xlsx'
            part.set_payload(JobItem.extract_records_as_bytes(file_format))
            logger.info('attached extracted files to the mail...waiting to be sent..')
            Encoders.encode_base64(part)
            part.add_header('Content-Disposition',
                            'attachment; filename="extracted_jobs_%s.%s"' % (current_date_string, file_format))
            msg.attach(part)

            smtp.sendmail(fromaddr, toaddrs, msg.as_string())
            logger.info('done sending email to subscribers...')
        except Exception as e:
            logger.error(e)
        finally:
            smtp.quit()
Ejemplo n.º 4
0
 def is_contact_blocked(cls, contact=''):
     if contact is None or contact == '':
         logger.debug('returning False as contact is None or Empty in is_contact_blocked()')
         return False
     conn = cls.connect_db()
     try:
         c = conn.cursor()
         c.execute('SELECT COUNT(*) FROM ' + cls.table_name + ' WHERE contact=?', (contact, ))
         return int(c.fetchone()[0]) > 0
     except Exception as e:
         logger.error(e)
         logger.error('returning False as exception occurs in is_contact_blocked()')
         return False
     finally:
         conn.close()
Ejemplo n.º 5
0
 def remove(self):
     conn = self.connect_db()
     try:
         c = conn.cursor()
         c.execute('DELETE FROM ' + self.table_name + ' WHERE ' + ' AND '.join(['%s=?' % property for property in self.key_properties]),
                   tuple([getattr(self, property) for property in self.key_properties]))
         conn.commit()
         logger.info('Removed: %s' % self)
     except Exception as e:
         logger.error(e)
         logger.info('Unable to remove: %s' % self)
         conn.rollback()
         raise DatabaseError(str(e))
     finally:
         conn.close()
Ejemplo n.º 6
0
 def update(self):
     conn = self.connect_db()
     try:
         c = conn.cursor()
         c.execute(' UPDATE ' + self.table_name +
                   ' SET ' + ', '.join(['%s=?' % property for property in self.property_names]) +
                   ' WHERE ' + ' AND '.join(['%s=?' % property for property in self.key_properties]),
                   tuple([getattr(self, property) for property in self.property_names] + [getattr(self, property) for property in self.key_properties]))
         conn.commit()
         logger.info('Updated: %s' % self)
     except Exception as e:
         logger.error(e)
         logger.info('Unable to update: %s' % self)
         conn.rollback()
         raise DatabaseError(str(e))
     finally:
         conn.close()
Ejemplo n.º 7
0
    def migrate_db(cls):
        """
        place holder for putting the migrate db scripts -- need to be updated before every release
        :return:
        """

        cls.create_db()
        conn = cls.datasource.get_connection()
        try:
            logger.info('start migrating database')
            User('meng', 'meng123', '*****@*****.**', 'admin').save()
            logger.info('done migrating database')
        except Exception as e:
            logger.error('Unable to run migrate_db')
            logger.error(e)

        finally:
            conn.close()
Ejemplo n.º 8
0
    def migrate_db(cls):
        """
        place holder for putting the migrate db scripts -- need to be updated before every release
        :return:
        """

        cls.create_db()
        conn = cls.datasource.get_connection()
        try:
            logger.info('start migrating database')
            User('meng', 'meng123', '*****@*****.**', 'admin').save()
            logger.info('done migrating database')
        except Exception as e:
            logger.error('Unable to run migrate_db')
            logger.error(e)

        finally:
            conn.close()
Ejemplo n.º 9
0
 def should_be_rejected(cls, input_text=''):
     if input_text is None or input_text == '':
         logger.debug('returning False as input_text is None or Empty in should_be_rejected()')
         return False
     try:
         records = cls.findall()
         for record in records:
             match = re.search(record.reject_pattern, input_text)
             if match:
                 logger.debug('returning True as input_text matches %s in should_be_rejected()' % record.reject_pattern)
                 return True
             else:
                 pass
         logger.debug('returning False as input_text does not match any patterns should_be_rejected()')
         return False
     except Exception as e:
         logger.error(e)
         logger.error('returning False as exception occurs in is_contact_blocked()')
         return False
Ejemplo n.º 10
0
 def validate(cls, user=None):
     if user is not None:
         if user.username and user.username != '' and user.password and user.password != '':
             conn = cls.connect_db()
             try:
                 c = conn.cursor()
                 c.execute("SELECT COUNT(*) FROM " + cls.table_name + " WHERE username=? and password=?", (user.username, user.password))
                 return int(c.fetchone()[0]) > 0
             except Exception as e:
                 logger.error('failed to retrieve the item count')
                 logger.error(e)
                 return False
             finally:
                 conn.close()
         else:
             logger.debug('username or password is empty.. hence returning false in validate()')
             return False
     else:
         return False
Ejemplo n.º 11
0
    def is_exists(cls, item=None):

        if item:
            job_title = item.job_title
            if job_title:
                conn = cls.connect_db()
                try:
                    c = conn.cursor()
                    c.execute("SELECT COUNT(*) FROM " + cls.table_name + " WHERE job_title=?", (job_title,))
                    job_item_count = int(c.fetchone()[0])
                    return job_item_count > 0
                except Exception as e:
                    logger.error('failed to retrieve the item count')
                    logger.error(e)
                    return False
                finally:
                    conn.close()
            else:
                logger.debug('item title is None.. hence returning true in is_exist()')
                return True
        else:
            logger.debug('item is None.. hence returning true in is_exist()')
            return True
Ejemplo n.º 12
0
 def save(self):
     if self:
         if self.find(self) is None:
             conn = self.connect_db()
             try:
                 c = conn.cursor()
                 c.execute('INSERT INTO ' + self.table_name +
                           '(' +
                           ', '.join(self.property_names) +
                           ') ' +
                           'VALUES (' + ', '.join(['?'] * len(self.property_names)) + ')',
                           tuple([getattr(self, property_name) for property_name in self.property_names])
                           )
                 conn.commit()
                 logger.info('Inserted item: %s' % self)
             except Exception as e:
                 conn.rollback()
                 logger.error('Unable to insert the item: %s' % self)
                 logger.error(e)
             finally:
                 conn.close()
         else:
             self.update()
Ejemplo n.º 13
0
 def extract_records_as_bytes(cls, format='txt'):
     import xlsxwriter
     import unicodecsv
     import tempfile
     import os
     tmp_file = (tempfile.NamedTemporaryFile(prefix='zjobs.%s.' % cls.__name__, suffix=('.%s' % format), delete=False)).name
     try:
         records = cls.findall()
         if format.lower() == 'xlsx':
             workbook = xlsxwriter.Workbook(tmp_file, {'default_date_format': 'yyyy-mm-dd'})
             worksheet = workbook.add_worksheet('crawled_jobs')
             worksheet.set_column('A:M', 40)
             worksheet.write_row(0, 0, [property_name.upper() for property_name in cls.property_names])
             for rowIdx, record in enumerate(records):
                 worksheet.write_row(rowIdx + 1, 0, [getattr(record, property_name) for property_name in cls.property_names])
             workbook.close()
         elif format.lower() == 'csv':
             with open(tmp_file, 'w') as f:
                 writer = unicodecsv.writer(f, encoding='utf-8')
                 writer.writerow([property_name.upper() for property_name in cls.property_names])
                 for record in records:
                     writer.writerow([getattr(record, property_name) for property_name in cls.property_names])
         elif format.lower() == 'txt':
             with open(tmp_file, 'w') as f:
                 f.write('\t'.join([property_name.upper() for property_name in cls.property_names]) + '\n')
                 for record in records:
                     f.write('\t'.join([repr(getattr(record, property_name)) if getattr(record, property_name) is not None else ''  for property_name in cls.property_names]) + '\n')
         else:
             raise Exception("'%s' format is not supported" % format)
         file_content = open(tmp_file, 'rb').read()
         return file_content
     except Exception as e:
         logger.error(e)
         logger.error('Unable to extract all records as bytes')
         raise e
     finally:
         os.remove(tmp_file)
Ejemplo n.º 14
0
    def create_db(cls):
        conn = cls.datasource.get_connection()
        try:
            c = conn.cursor()

            c.execute('DROP TABLE IF EXISTS CRAWLED_JOBS')
            c.execute('DROP INDEX IF EXISTS job_title_idx')

            c.execute('''
                CREATE TABLE IF NOT EXISTS CRAWLED_JOBS(
                    source            text,
                    crawled_date      date,
                    publish_date      date,
                    job_title         text,
                    job_desc          text,
                    job_details_link  text,
                    job_location      text,
                    job_country       text,
                    salary            text,
                    employer_name     text,
                    contact           text
                )
                ''')

            c.execute('''
                CREATE UNIQUE INDEX job_title_idx ON CRAWLED_JOBS(job_title)
            ''')

            logger.info("created table and indexes for CRAWLED_JOBS")

            c.execute('DROP TABLE IF EXISTS JOB_REJECTION_RULES')
            c.execute('DROP INDEX IF EXISTS reject_pattern_idx')

            c.execute('''
                CREATE TABLE IF NOT EXISTS JOB_REJECTION_RULES(
                    reject_pattern    text,
                    reject_reason     text
                )
                ''')

            c.execute('''
                CREATE UNIQUE INDEX reject_pattern_idx ON JOB_REJECTION_RULES(reject_pattern)
            ''')

            logger.info("created table and indexes for JOB_REJECTION_RULES")

            c.execute('DROP TABLE IF EXISTS BLOCKED_CONTACTS')
            c.execute('DROP INDEX IF EXISTS blocked_contacts_idx')

            c.execute('''
                CREATE TABLE IF NOT EXISTS BLOCKED_CONTACTS(
                    contact      text,
                    block_reason text
                )
                ''')

            c.execute('''
                CREATE UNIQUE INDEX blocked_contacts_idx ON BLOCKED_CONTACTS(contact)
            ''')

            logger.info("created table and indexes for BLOCKED_CONTACTS")

            c.execute('DROP TABLE IF EXISTS USERS')
            c.execute('DROP INDEX IF EXISTS users_idx')

            c.execute('''
                CREATE TABLE IF NOT EXISTS USERS(
                    username            text,
                    password            text,
                    email               text,
                    subscription_status text,
                    role           text,
                    last_login_date     date,
                    register_date       date
                )
                ''')

            c.execute('''
                CREATE UNIQUE INDEX users_idx ON USERS(username)
            ''')

            logger.info("created table and indexes for USERS")

            c.execute('DROP TABLE IF EXISTS DOCS')
            c.execute('DROP INDEX IF EXISTS docs_idx')

            c.execute('''
                CREATE TABLE IF NOT EXISTS DOCS(
                    filename              text,
                    content_type          text,
                    content               bytea,
                    uploaded_by           text,
                    uploaded_date         date
                )
                ''')

            c.execute('''
                CREATE UNIQUE INDEX docs_idx ON DOCS(filename)
            ''')

            logger.info("created table and indexes for DOCS")

            conn.commit()
            logger.info('done create database')
        except Exception as e:
            logger.error('Unable to run create_db')
            logger.error(e)
            conn.rollback()

        finally:
            conn.close()
Ejemplo n.º 15
0
    def create_db(cls):
        conn = cls.datasource.get_connection()
        try:
            c = conn.cursor()

            c.execute('DROP TABLE IF EXISTS CRAWLED_JOBS')
            c.execute('DROP INDEX IF EXISTS job_title_idx')

            c.execute('''
                CREATE TABLE IF NOT EXISTS CRAWLED_JOBS(
                    source            text,
                    crawled_date      date,
                    publish_date      date,
                    job_title         text,
                    job_desc          text,
                    job_details_link  text,
                    job_location      text,
                    job_country       text,
                    salary            text,
                    employer_name     text,
                    contact           text
                )
                ''')

            c.execute('''
                CREATE UNIQUE INDEX job_title_idx ON CRAWLED_JOBS(job_title)
            ''')

            logger.info("created table and indexes for CRAWLED_JOBS")

            c.execute('DROP TABLE IF EXISTS JOB_REJECTION_RULES')
            c.execute('DROP INDEX IF EXISTS reject_pattern_idx')

            c.execute('''
                CREATE TABLE IF NOT EXISTS JOB_REJECTION_RULES(
                    reject_pattern    text,
                    reject_reason     text
                )
                ''')

            c.execute('''
                CREATE UNIQUE INDEX reject_pattern_idx ON JOB_REJECTION_RULES(reject_pattern)
            ''')

            logger.info("created table and indexes for JOB_REJECTION_RULES")

            c.execute('DROP TABLE IF EXISTS BLOCKED_CONTACTS')
            c.execute('DROP INDEX IF EXISTS blocked_contacts_idx')

            c.execute('''
                CREATE TABLE IF NOT EXISTS BLOCKED_CONTACTS(
                    contact      text,
                    block_reason text
                )
                ''')

            c.execute('''
                CREATE UNIQUE INDEX blocked_contacts_idx ON BLOCKED_CONTACTS(contact)
            ''')

            logger.info("created table and indexes for BLOCKED_CONTACTS")

            c.execute('DROP TABLE IF EXISTS USERS')
            c.execute('DROP INDEX IF EXISTS users_idx')

            c.execute('''
                CREATE TABLE IF NOT EXISTS USERS(
                    username            text,
                    password            text,
                    email               text,
                    subscription_status text,
                    role           text,
                    last_login_date     date,
                    register_date       date
                )
                ''')

            c.execute('''
                CREATE UNIQUE INDEX users_idx ON USERS(username)
            ''')

            logger.info("created table and indexes for USERS")

            c.execute('DROP TABLE IF EXISTS DOCS')
            c.execute('DROP INDEX IF EXISTS docs_idx')

            c.execute('''
                CREATE TABLE IF NOT EXISTS DOCS(
                    filename              text,
                    content_type          text,
                    content               bytea,
                    uploaded_by           text,
                    uploaded_date         date
                )
                ''')

            c.execute('''
                CREATE UNIQUE INDEX docs_idx ON DOCS(filename)
            ''')

            logger.info("created table and indexes for DOCS")

            conn.commit()
            logger.info('done create database')
        except Exception as e:
            logger.error('Unable to run create_db')
            logger.error(e)
            conn.rollback()

        finally:
            conn.close()