def play_selected_bot_thread(self): '''run the bot selected from the dropdown menu.''' job_site_bot_name = self.ui.jobsite_select.currentText() + '_Bot' CommonFuncs.log( self, 'attempting to play selected: %s' % job_site_bot_name) with CommonFuncs.get_db() as db: try: bot = CommonFuncs.get_bot(job_site_bot_name) bot.is_running = True except: bot = JobSiteAccount() bot.site_bot_name = job_site_bot_name bot.is_running = True db.add(bot) db.commit() jobsiteaccount = CommonFuncs.get_bot( job_site_bot_name ) if jobsiteaccount.username is None or jobsiteaccount.password is None: CommonFuncs.log(self, 'no valid login creds available') CommonFuncs.log(self, 'playing of bot canceled') return if bot_threads[job_site_bot_name]['applier'] is None or not bot_threads[job_site_bot_name]['applier'].isRunning(): bot_threads[job_site_bot_name]['applier'] = BotThread(job_site_bot_name) # only build thread, if it doesn't exist bot_threads[job_site_bot_name]['applier'].started.connect(self.bot_thread_started) bot_threads[job_site_bot_name]['applier'].finished.connect(self.bot_thread_finished) bot_threads[job_site_bot_name]['applier'].start() CommonFuncs.log(self, 'playing of %s successful!' % job_site_bot_name) else: CommonFuncs.log(self, 'playing of %s unsuccessful!' % job_site_bot_name)
def job_site_account_select(self): '''load user's account creds for the selected site.''' job_site_bot_name = self.ui.jobsite_select.currentText() + '_Bot' CommonFuncs.log(self, 'starting to find the account creds and stats for the user after job site account select') todo_count = 0 applied_count = 0 try: with CommonFuncs.get_db() as db: todo_count = len(db.query(UnprocessedJob).filter(UnprocessedJob.bot_type == job_site_bot_name).all()) applied_count = len(db.query(Job).filter( and_( Job.job_site == JOB_SITE_LINKS[self.ui.jobsite_select.currentText()]['job_site'], Job.applied == True )).all()) except: pass self.ui.todoforsite_btn.setText(str(todo_count)) self.ui.appliedforsite_btn.setText(str(applied_count)) jobsiteaccount = None jobsiteaccount = CommonFuncs.get_bot(job_site_bot_name) if not jobsiteaccount: self.ui.jobsiteusername_box.setText('') self.ui.jobsitepassword_box.setText('') else: self.ui.jobsiteusername_box.setText(jobsiteaccount.username) self.ui.jobsitepassword_box.setText(jobsiteaccount.password) if jobsiteaccount.is_running: self.ui.playload_lbl.show() else: self.ui.playload_lbl.hide() self.ui.jobsiteusername_box.setStyleSheet( 'background-color: white' ) self.ui.jobsitepassword_box.setStyleSheet( 'background-color: white' ) self.ui.verify_btn.setIcon( QtGui.QIcon( STATIC_FILES[ 'checked' ] ) ) CommonFuncs.log(self, 'finished finding the account creds and stats for the user after job site account select')
def delete_selected_job_site(self): msg = QMessageBox() # show error message msg.setIcon(QMessageBox.Critical) msg.setText("Your login creds and unprocessed jobs will be deleted for this site.") msg.setInformativeText( "Are you sure you want to continue?" ) msg.setWindowTitle("Warning About Deletion: Irreversible") msg.setStandardButtons(QMessageBox.Ok | QMessageBox.Cancel) reply = msg.exec() if reply == QMessageBox.Ok: self.ui.deleteload_lbl.show() job_site = self.ui.jobsite_select.currentText() + '_Bot' with CommonFuncs.get_db() as db: # DELETE ACCOUNT jobsiteaccount = CommonFuncs.get_bot( job_site ) if not jobsiteaccount is None: db.delete(jobsiteaccount) db.commit() CommonFuncs.log(self, 'successfully deleted account for: ' + job_site) # DELETE ANY UNPROCESSED JOBS db.query(UnprocessedJob).filter(UnprocessedJob.bot_type == job_site).delete(synchronize_session=False) db.commit() CommonFuncs.log(self, 'successfully deleted all unprocessed jobs for account: ' + job_site) self.job_site_account_select() # refresh job site account section of gui self.ui.deleteload_lbl.hide()
def pause_selected_bot_thread(self): job_site_bot_name = self.ui.jobsite_select.currentText() + '_Bot' CommonFuncs.log(self, 'attempting to send pause signal to bot: %s' % job_site_bot_name ) with CommonFuncs.get_db() as db: try: bot = CommonFuncs.get_bot(job_site_bot_name) if bot.is_running: self.ui.pauseload_lbl.show() # only show loading gif if there is bot to pause bot.is_running = False db.add(bot) db.commit() except: CommonFuncs.log(self, 'problem sending pause signal for bot: %s' % job_site_bot_name, level='debug') pass CommonFuncs.log(self,'pause signal for %s successfully sent' % job_site_bot_name)
def verify_job_site_account_thread_finished(self): CommonFuncs.log(self, 'completed verification process of account creds') self.ui.jobsiteusername_box.setEnabled(True) self.ui.jobsitepassword_box.setEnabled(True) self.ui.jobsite_select.setEnabled(True) self.ui.jobsiteaccountcancel_btn.setEnabled(True) self.ui.verifyload_lbl.hide() self.ui.jobsiteaccountcancel_btn.setIcon(QIcon(STATIC_FILES['revert'])) if self.threads['verify_job_site_account'].error == True: self.ui.jobsiteusername_box.setStyleSheet('background-color: rgb(247, 126, 74)') self.ui.jobsitepassword_box.setStyleSheet('background-color: rgb(247, 126, 74)') self.ui.verify_btn.setIcon( QtGui.QIcon(STATIC_FILES['submit'])) # show the site creds need to be verified msg = QMessageBox() # show error message msg.setIcon(QMessageBox.Critical) msg.setText("Job Site Account Verification Failed") msg.setInformativeText("Please correct your username and password and try again.") msg.setWindowTitle("Job Site Login Failed") msg.setStandardButtons(QMessageBox.Ok) msg.exec() else: # COMMIT THE JOB SITE ACCOUNT CREDS jobsitestring = str(self.ui.jobsite_select.currentText()) + '_Bot' jobsiteaccount = None try: jobsiteaccount = CommonFuncs.get_bot(jobsitestring) except: pass if not jobsiteaccount: jobsiteaccount = JobSiteAccount() jobsiteaccount.site_bot_name = jobsitestring jobsiteaccount.username = self.ui.jobsiteusername_box.text() jobsiteaccount.password = self.ui.jobsitepassword_box.text() with CommonFuncs.get_db() as db: db.add(jobsiteaccount) db.commit() CommonFuncs.log(self, 'successfully stored valid account creds') self.ui.verify_btn.setIcon( QtGui.QIcon(STATIC_FILES['checked'])) # show the site creds have been verified self.ui.jobsiteusername_box.setStyleSheet('background-color: rgb(70, 188, 128)') self.ui.jobsitepassword_box.setStyleSheet('background-color: rgb(70, 188, 128)')
def __init__(self): self.init_process = True # some processes in functions disabled during initialization if os.path.isfile(LOG_FILE_PATH): os.remove(LOG_FILE_PATH) # delete the log from the last session CommonFuncs.log(self, 'Jobbybot session started') self.user_settings = None # store login creds, job profile, etc self.threads = THREADS_DICT # RESET ALL BOTS TO NOT IS_RUNNING for j_site in JOB_SITE_LINKS: site_bot_name = j_site + '_Bot' with CommonFuncs.get_db() as db: try: bot = CommonFuncs.get_bot(site_bot_name) bot.is_running = False except: bot = JobSiteAccount() bot.is_running = False bot.site_bot_name = site_bot_name db.add(bot) db.commit() CommonFuncs.log(self,'reset %s to not running in db' % site_bot_name) # CHECK FOR SETTINGS OBJECT - create if it does not exist settings = None with CommonFuncs.get_db() as db: try: settings = db.query(JobbybotSettings).one() except: pass if not settings: new_settings = JobbybotSettings() new_settings.connect_to_gsheets = False new_settings.delete_ujobs_on_jprofile_edit = True db.add(new_settings) db.commit() # add settings object to database # START GUI SETUP app = QApplication(sys.argv) self.MainWindow = QtWidgets.QMainWindow() self.ui = Ui_MainWindow() self.ui.setupUi(self.MainWindow) QApplication.setStyle(QStyleFactory.create('Fusion')) self.MainWindow.setWindowIcon(QIcon(STATIC_FILES['logo'])) self.MainWindow.setGeometry(0,60,778,629) self.initialize_gui() CommonFuncs.log(self,'finished initializing gui') CommonFuncs.log(self,'Launching Jobbybot!') self.threads['stats'].start() self.threads['database_tables'].start() # OPEN AND RUN THE GUI self.init_process = False self.MainWindow.show() self.job_profile_table_edited() # initial population of the results for the job profile sys.exit(app.exec_())
def run(self): self.isRunning() self.set_error(False) # reset error Bot_Class = eval(self.site_bot_name) site_name = self.site_bot_name.split('_Bot')[0] spider_name = '_' + site_name.lower() + '_' + 'webcrawler.py' cached_username = '' cached_password = '' logged_in = False # APPLY LOOP bot = CommonFuncs.get_bot(self.site_bot_name) new_links = [''] with CommonFuncs.get_driver(visible=WEB_DRIVERS_VISIBLE, headless=WEB_DRIVERS_HEADLESS) as driver: bot_inst = Bot_Class(driver) while bot.is_running and len(new_links)>0: if cached_username != bot.username or cached_password != bot.password: # if the username or password changed, attempt new login cached_username = bot.username cached_password = bot.password logged_in = bot_inst.login(bot) if logged_in: # if logged in and bot is running, apply to a job with CommonFuncs.get_db() as db: try: new_to_db = False while not new_to_db: unprocessed_job = db.query(UnprocessedJob).filter( UnprocessedJob.bot_type == self.site_bot_name).all() new_link = unprocessed_job[0].job db.delete(unprocessed_job[0]) db.commit() db_results = db.query(Job).filter(Job.link_to_job == new_link).all() if db_results is None or db_results == []: new_to_db = True except: new_link = None pass if not new_link is None: CommonFuncs.log(self, 'attempting to apply to: ' + new_link) new_job = bot_inst.apply(new_link) # goto page and apply if new_job != False and isinstance(new_job, Job): # only add the job to database, if it is an instance with CommonFuncs.get_db() as db: # save job object to db try: db.add(new_job) db.commit() except Exception as e: print(e) else: CommonFuncs.log('applier taking a timeout as it waits for more job links') Jobbybot.run_bot_job_link_webcrawler( spider_name=spider_name ) # start the webcrawler for this bot sleep_count = 5 for i in range(sleep_count): # wait for more results, check to make sure the bot is still running if CommonFuncs.is_bot_running(self.site_bot_name): sleep(1) else: break bot = CommonFuncs.get_bot(self.site_bot_name) sleep(0.1) self.isFinished()
def start_requests(self): '''return iterable of job links''' with CommonFuncs.get_db() as db: todoforsite = db.query(UnprocessedJob).filter( UnprocessedJob.bot_type == 'Ziprecruiter_Bot').all() if len(todoforsite) >= 100: return start_time = datetime.now() job_profile = CommonFuncs.get_job_profile() locations = CommonFuncs.get_locations_list(job_profile) query_list = CommonFuncs.build_query_string(job_profile=job_profile, or_delim='', bracket1='', bracket2='', adv_supp=False) if len(query_list) == 0: return ########## # URL ENCODE EACH QUERY ########## start_urls = [] for location in locations: for query_string in query_list: bot = CommonFuncs.get_bot('Ziprecruiter_Bot') if bot.is_running: # verify that the bot is running before continuing to the next page query_dict = {'search': query_string, 'location': location} encoded_query = urllib.parse.urlencode(query_dict, safe='') job_url = JOB_SITE_LINKS['Ziprecruiter'][ 'query'] + '&' + encoded_query start_urls.append(job_url) response = html.fromstring(requests.get(job_url).content) temp = response.xpath( "//menu[@class='select-menu-submenu t_filter_dropdown_titles']/a/@href" ) temp = [ JOB_SITE_LINKS['Ziprecruiter']['job_site_base'] + i for i in temp ] start_urls += temp # append all of the links from filtering by job title temp = response.xpath( "//menu[@class='select-menu-submenu t_filter_dropdown_companies']/a/@href" ) temp = [ JOB_SITE_LINKS['Ziprecruiter']['job_site_base'] + i for i in temp ] start_urls += temp # append all of the links from filtering by company else: return msg = 'time spent building start_urls for Ziprecruiter: ' + str( datetime.now() - start_time) # CommonFuncs.log( msg ) print(msg) ########## # GET URL RESPONSES AND CALL PARSE FUNCTION TO ITERATE OVER PAGES ########## print('TOTAL START URLs: ' + str(len(start_urls))) i = 1 for url in start_urls: print('LINK#: ' + str(i) + ' WORKING ON NEW START URL: ' + url) yield scrapy.Request(url=url, callback=self.parse) i += 1
def parse(self, response): with CommonFuncs.get_db() as db: todoforsite = db.query(UnprocessedJob).filter( UnprocessedJob.bot_type == 'Ziprecruiter_Bot').all() if len(todoforsite) >= 100: return # EXTRACT JOB LINKS ON THE PAGE AND COMMIT TO DB this_url = response._url try: searching_by = dict(parse_qsl(urlsplit(this_url).query)) print('searching by: ' + str(searching_by)) except: pass # CommonFuncs.log('starting parsing job page for ZiprecruiterWebcrawler: ' + response.url) new_jobs = None try: #@data-tracking='quick_apply' new_jobs = response.xpath( "//div[@class='job_results']/article/div[@class='job_tools']/" + "button[@data-tracking='quick_apply']" + "/ancestor::article" + "/div[@class='job_content']/a/@href").extract() except: # CommonFuncs.log('could not find jobs on the page: ' + this_url) pass new_count = 0 if not new_jobs is None: # if no results found return for job_link in new_jobs: # dump the job links to the db with CommonFuncs.get_db() as db: db_results = db.query(Job).filter( Job.link_to_job == job_link).all() if db_results is None or db_results == []: try: with CommonFuncs.get_db() as db: u_job = UnprocessedJob() u_job.bot_type = 'Ziprecruiter_Bot' u_job.job = job_link db.add(u_job) db.commit() new_count += 1 except: # CommonFuncs.log('something went wrong in ZiprecruiterWebcrawler trying to commit job link: %s' % job_link, level='debug') pass # CommonFuncs.log('%s new jobs found on page %s' % (new_count, response._url) ) if new_count > 0: print('%s new jobs found on page' % new_count) ########## # JUMP TO NEXT PAGE WHILE THE BOT IS STILL RUNNING ########## data_next_url = '' try: data_next_url = response.xpath("//div[@class='job_results']") data_next_url = data_next_url[0].root.attrib['data-next-url'] if len(data_next_url) > 0: url = JOB_SITE_LINKS['Ziprecruiter'][ 'job_site_base'] + data_next_url bot = CommonFuncs.get_bot('Ziprecruiter_Bot') # CommonFuncs.log('finished parsing job page for ZiprecruiterWebcrawler: ' + this_url) if bot.is_running: # verify that the bot is running before continuing to the next page yield scrapy.Request(url=url, callback=self.parse) else: return except: pass # if __name__ == '__main__': # runner = CrawlerRunner() # runner.crawl(ZiprecruiterLoginWebcrawler(username='******', password='******')) # d = runner.join() # d.addBoth(lambda _: reactor.stop()) # reactor.run()
def parse(self, response): with CommonFuncs.get_db() as db: todoforsite = db.query(UnprocessedJob).filter(UnprocessedJob.bot_type == 'Indeed_Bot').all() if len(todoforsite) >= 100: return this_url = response._url try: searching_by = dict(parse_qsl(urlsplit(this_url).query)) print('searching by: ' + str(searching_by)) except: pass # CommonFuncs.log('starting parsing job page for IndeedWebcrawler: ' + response.url) # COLLECT NEW JOB LINKS FROM SITE jobs = response.xpath("//div[@data-tn-component='organicJob']") new_count = 0 for job in jobs: bot = CommonFuncs.get_bot('Indeed_Bot') if not bot.is_running: return # exit if the bot is not running extracted_job = job.extract() job_state = None if 'Easily apply' in extracted_job: job_link = JOB_SITE_LINKS[ 'Indeed' ][ 'job_site_base' ] + job.xpath('h2/a/@href').extract()[0] with CommonFuncs.get_db() as db: db_results = db.query(Job).filter(Job.link_to_job == job_link).all() if db_results is None or db_results == []: new_count += 1 try: with CommonFuncs.get_db() as db: u_job = UnprocessedJob() u_job.bot_type = 'Indeed_Bot' u_job.job = job_link db.add(u_job) db.commit() except: pass # CommonFuncs.log('%s new jobs found on page %s' % (new_count, response.url)) if new_count > 0: print('%s new jobs found on page' % new_count) ########## # JUMP TO NEXT PAGE WHILE THE BOT IS STILL RUNNING ########## pagination_links = response.xpath( "//div[@class='pagination']/a" ).extract() for link in pagination_links: if 'Next' in link: bot = CommonFuncs.get_bot('Indeed_Bot') if bot.is_running: # verify that the bot is running before continuing to the next page # CommonFuncs.log('finished parsing job page for IndeedWebcrawler: ' + this_url) next_link = bs(link,'lxml').body.find('a').get('href') full_link = JOB_SITE_LINKS[ 'Indeed' ][ 'job_site_base' ] + next_link yield scrapy.Request( url=full_link, callback=self.parse ) else: return # if __name__ == '__main__': # runner = CrawlerRunner() # runner.crawl(IndeedWebcrawler) # d = runner.join() # d.addBoth(lambda _: reactor.stop()) # reactor.run()