def setUpClass(cls): cls._setup_log() if "DROPBOX_API_KEY" not in os.environ: logger.error('DROPBOX_API_KEY env variable is not set!') raise Exception('DROPBOX_API_KEY env variable is not set!') logger.info('Loading Configuration..') cls.configuration = Configuration(config_src=os.path.join(cls.test_data_path, 'template_conf_all_args.yml')) cls.remote_tests_folder = '/job_bot_tests' cloud_store = JobBotDropboxCloudstore(config=cls.configuration.get_cloudstores()[0]) cloud_store.delete_file(cls.remote_tests_folder)
def test_update_get_email_data(self): cloud_store = JobBotDropboxCloudstore(config=self.configuration.get_cloudstores()[0], remote_files_folder=self.remote_tests_folder) email_types = (('application_to_send', cloud_store.get_application_to_send_email_data, cloud_store.update_application_to_send_email_data), ('inform_should_call', cloud_store.get_inform_should_call_email_data, cloud_store.update_inform_should_call_email_data), ('inform_success', cloud_store.get_inform_success_email_data, cloud_store.update_inform_success_email_data)) for email_type, get_func, update_func in email_types: # Copy bcks to to actual files bck_subject_path = os.path.join(cloud_store.local_files_folder, 'bck_subject.txt') bck_html_path = os.path.join(cloud_store.local_files_folder, 'bck_body.html') current_subject_file = '{type}_subject.txt'.format(type=email_type) current_html_file = '{type}_body.html'.format(type=email_type) subject_path = os.path.join(cloud_store.local_files_folder, current_subject_file) html_path = os.path.join(cloud_store.local_files_folder, current_html_file) copyfile(bck_subject_path, subject_path) copyfile(bck_html_path, html_path) # Upload stop_words logger.info('Uploading %s email data..' % email_type) update_func() # Check if it was uploaded self.assertIn(current_subject_file, cloud_store.ls(self.remote_tests_folder).keys()) self.assertIn(current_html_file, cloud_store.ls(self.remote_tests_folder).keys()) # Rename the old files before downloading them logger.info('Renaming the old file before downloading it..') copied_subject_file = os.path.join(self.test_data_path, self.file_name + '_{type}_subject.txt'.format(type=email_type)) copied_html_file = os.path.join(self.test_data_path, self.file_name + '_{type}_body.html'.format(type=email_type)) os.rename(os.path.join(self.test_data_path, current_subject_file), copied_subject_file) os.rename(os.path.join(self.test_data_path, current_html_file), copied_html_file) # Download it logger.info('Downloading {type} email data..'.format(type=email_type)) actual_subject, actual_html = get_func() logger.debug("Received: %s and %s" % (actual_subject, actual_html)) # Compare contents of downloaded file with the original with open(copied_subject_file, 'rb') as f: self.assertEqual(f.read(), bytes(actual_subject, encoding='utf-8')) with open(copied_html_file, 'rb') as f: self.assertEqual(f.read(), bytes(actual_html, encoding='utf-8')) logger.info("Clearing file: %s" % copied_subject_file) os.remove(copied_subject_file) logger.info("Clearing file: %s" % copied_html_file) os.remove(copied_html_file)
def test_upload_download_attachment(self): cloud_store = JobBotDropboxCloudstore(config=self.configuration.get_cloudstores()[0], remote_files_folder=self.remote_tests_folder) # Copy bck to actual file attachment_path = os.path.join(cloud_store.local_files_folder, cloud_store.attachments_names[0]) bck_attachment_path = os.path.join(cloud_store.local_files_folder, 'bck_' + cloud_store.attachments_names[0]) copyfile(bck_attachment_path, attachment_path) # Upload attachments logger.info('Uploading attachment..') cloud_store.upload_attachments() # Check if it was uploaded self.assertIn(cloud_store.attachments_names[0], cloud_store.ls(self.remote_tests_folder).keys()) # Rename the old file before downloading it logger.info('Renaming the old file before downloading it..') os.rename(attachment_path, os.path.join(self.test_data_path, self.file_name)) # Download it logger.info('Downloading attachment..') cloud_store.download_attachments() # Compare contents of downloaded file with the original self.assertEqual(open(os.path.join(self.test_data_path, self.file_name), 'rb').read(), open(attachment_path, 'rb').read()) # Delete the attachment os.remove(attachment_path)
def main(): """ :Example: python main.py [-m crawl_and_send] -c confs/template_conf.yml -l logs/output.log """ # Initializing args, configuration = init_main() # Start in the specified mode if args.run_mode == 'list_emails': data_store = JobBotMySqlDatastore( config=configuration.get_datastores()[0]) show_ads_checked(ads=data_store.get_applications_sent()) elif args.run_mode == 'remove_email': data_store = JobBotMySqlDatastore( config=configuration.get_datastores()[0]) data_store.remove_ad(email_id=args.email_id) elif args.run_mode == 'upload_files': upload_files_to_cloudstore(cloud_store=JobBotDropboxCloudstore( config=configuration.get_cloudstores()[0])) elif args.run_mode == 'create_table': data_store = JobBotMySqlDatastore( config=configuration.get_datastores()[0]) data_store.create_applications_sent_table() elif args.run_mode == 'crawl_and_send': crawl_and_send_loop(lookup_url=configuration.lookup_url, check_interval=configuration.check_interval, crawl_interval=configuration.crawl_interval, anchor_class_name=configuration.anchor_class_name, data_store=JobBotMySqlDatastore( config=configuration.get_datastores()[0]), cloud_store=JobBotDropboxCloudstore( config=configuration.get_cloudstores()[0]), email_app=GmailEmailApp( config=configuration.get_email_apps()[0], test_mode=configuration.test_mode)) else: logger.error('Incorrect run_mode specified!') raise argparse.ArgumentTypeError('Incorrect run_mode specified!')
def test_init(self): req_only_conf = Configuration( config_src=os.path.join(self.test_data_path, 'template_conf_required_args_only.yml')) cloud_store = JobBotDropboxCloudstore(config=self.configuration.get_cloudstores()[0], remote_files_folder=self.remote_tests_folder) boolean_attributes = [True if len(cloud_store.attachments_names) > 0 else False, cloud_store._update_stop_words, cloud_store._update_application_to_send_email, cloud_store._update_inform_success_email, cloud_store._update_inform_should_call_email] self.assertTrue(True, all(boolean_attributes)) req_only_cloud_store = JobBotDropboxCloudstore(config=req_only_conf.get_cloudstores()[0], remote_files_folder=self.remote_tests_folder) req_only_boolean_attributes = [True if len(req_only_cloud_store.attachments_names) == 0 else False, not req_only_cloud_store._update_stop_words, not req_only_cloud_store._update_application_to_send_email, not req_only_cloud_store._update_inform_success_email, not req_only_cloud_store._update_inform_should_call_email] self.assertTrue(True, all(req_only_boolean_attributes))
def test_update_get_stop_words_data(self): cloud_store = JobBotDropboxCloudstore(config=self.configuration.get_cloudstores()[0], remote_files_folder=self.remote_tests_folder) # Copy bck to to actual file bck_stop_words_path = os.path.join(cloud_store.local_files_folder, 'bck_stop_words.txt') stop_words_path = os.path.join(cloud_store.local_files_folder, 'stop_words.txt') copyfile(bck_stop_words_path, stop_words_path) # Upload stop_words logger.info('Uploading stop_words..') cloud_store.update_stop_words_data() # Check if it was uploaded self.assertIn('stop_words.txt', cloud_store.ls(self.remote_tests_folder).keys()) # Rename the old file before downloading it logger.info('Renaming the old file before downloading it..') os.rename(os.path.join(self.test_data_path, 'stop_words.txt'), os.path.join(self.test_data_path, self.file_name)) # Download it logger.info('Downloading stop_words..') stop_words_downloaded = cloud_store.get_stop_words_data() stop_words_downloaded = "['" + "', '".join(stop_words_downloaded) + "']" # Compare contents of downloaded file with the original self.assertEqual(open(os.path.join(self.test_data_path, self.file_name), 'rb').read(), bytes(stop_words_downloaded, encoding='utf8'))
def crawl_and_send_loop(lookup_url: str, check_interval: int, crawl_interval: int, anchor_class_name: str, data_store: JobBotMySqlDatastore, cloud_store: JobBotDropboxCloudstore, email_app: GmailEmailApp) -> None: """ The main loop. Crawls the ad site for new ads and sends emails where applicable and informs the applicant. :params lookup_url: :params check_interval: :params data_store: :params cloud_store: :params gmail_app: """ ad_site_crawler = XeGrAdSiteCrawler( stop_words=cloud_store.get_stop_words_data(), anchor_class_name=anchor_class_name) attachments_local_paths = [ os.path.join(cloud_store.local_files_folder, attachment_name) for attachment_name in cloud_store.attachments_names ] # Get the email_data, the attachments and the stop_words list from the cloudstore cloud_store.download_attachments() application_to_send_subject, application_to_send_html = cloud_store.get_application_to_send_email_data( ) inform_should_call_subject, inform_should_call_html = cloud_store.get_inform_should_call_email_data( ) inform_success_subject, inform_success_html = cloud_store.get_inform_success_email_data( ) links_checked = [ row[0] for row in data_store.get_applications_sent(columns='link') ] logger.info("Waiting for new ads..") while True: new_ads = list( ad_site_crawler.get_new_ads(lookup_url=lookup_url, ads_checked=links_checked, crawl_interval=crawl_interval)) if len(new_ads) > 0: links_checked = [ row[0] for row in data_store.get_applications_sent(columns='link') ] emails_checked = [ row[0] for row in data_store.get_applications_sent(columns='email') ] for link, email in new_ads: if link not in links_checked and (email not in emails_checked or email is None): if email is None: # Email applicant to inform him that he should call manually logger.info( "Link ({}) has no email. Inform the applicant.". format(link)) email_app.send_email( subject=inform_should_call_subject, html=inform_should_call_html.format(link=link), to=[email_app.get_self_email()]) else: # Send application after 1 minute (don't be too cocky) time.sleep(60) logger.info("Sending email to: {}. Ad Link: {}".format( email, link)) email_app.send_email( subject=application_to_send_subject, html=application_to_send_html.format(link), to=[email], attachments=attachments_local_paths) # Inform applicant that an application has been sent successfully email_app.send_email(subject=inform_success_subject, html=inform_success_html.format( email=email, link=link), to=[email_app.get_self_email()]) email_info = { "link": link, "email": email, "sent_on": datetime.datetime.utcnow().isoformat() } data_store.save_sent_application(email_info) logger.info("Waiting for new ads..") # Look for new ads every 2 minutes logger.debug("Sleeping for {check_interval} seconds..".format( check_interval=check_interval)) time.sleep(check_interval)
def upload_files_to_cloudstore(cloud_store: JobBotDropboxCloudstore): cloud_store.update_stop_words_data() cloud_store.update_application_to_send_email_data() cloud_store.update_inform_should_call_email_data() cloud_store.update_inform_success_email_data() cloud_store.upload_attachments()
def tearDownClass(cls): cloud_store = JobBotDropboxCloudstore(config=cls.configuration.get_cloudstores()[0]) cloud_store.delete_file(cls.remote_tests_folder)