job_title = sys.argv[1] job_location = sys.argv[2] radius = sys.argv[3] except IndexError: raise Exception( 'Program needs a job title, job location, and radius inputted!') base_URL = 'http://jobs.monster.com/search/?' query_parameters = [ 'q={}'.format('-'.join(job_title.split())), '&where={}'.format('-'.join(job_location.split())), '&sort=dt.rv.di', '&rad={}'.format(radius) ] query_URL = format_query(base_URL, query_parameters) driver = issue_driver_query(query_URL) try: num_jobs_txt = get_num_jobs_txt(driver) num_jobs = int(parse_num(num_jobs_txt, 0)) except: print('No jobs for search {} in {}'.format(job_title, job_location)) sys.exit(0) current_date = str(datetime.datetime.now(pytz.timezone('US/Mountain'))) storage_dct = { 'job_site': 'monster', 'num_jobs': num_jobs, 'date': current_date, 'title': job_title, 'location': job_location
elif elem2_text: return int(elem2_text) == num_pages if __name__ == '__main__': try: job_title = sys.argv[1] job_location = sys.argv[2] except IndexError: raise Exception('Program needs a job title and job location inputted!') # Issue the job query. base_URL = 'https://www.glassdoor.com/index.htm' query_params = (('KeywordSearch', job_title), ('LocationSearch', job_location)) driver = issue_driver_query(base_URL, query_params) # Find the text holding the number of jobs, and parse it. time.sleep(random.randint(7, 15)) num_jobs_txt = driver.find_elements_by_xpath('//header')[1].text num_jobs = int(parse_num(num_jobs_txt, 0)) current_date = str(datetime.datetime.now(pytz.timezone('US/Mountain'))) storage_dct = { 'job_site': 'glassdoor', 'num_jobs': num_jobs, 'date': current_date, 'title': job_title, 'location': job_location } store_in_mongo([storage_dct], 'job_numbers', 'glassdoor')
# will be passed in, so I'll attempt to get both of those within # a try except and throw an error otherwise. try: job_title = sys.argv[1] job_location = sys.argv[2] radius = sys.argv[3] except IndexError: raise Exception('Program needs a job title, job location, and radius inputted!') base_URL = 'http://jobs.monster.com/search/?' query_parameters = ['q={}'.format('-'.join(job_title.split())), '&where={}'.format('-'.join(job_location.split())), '&sort=dt.rv.di', '&rad={}'.format(radius)] query_URL = format_query(base_URL, query_parameters) driver = issue_driver_query(query_URL) try: num_jobs_txt = get_num_jobs_txt(driver) num_jobs = int(parse_num(num_jobs_txt, 0)) except: print 'No jobs for search {} in {}'.format(job_title, job_location) sys.exit(0) current_date = str(datetime.datetime.now(pytz.timezone('US/Mountain'))) storage_dct = {'job_site': 'monster', 'num_jobs': num_jobs, 'date': current_date, 'title': job_title, 'location': job_location} store_in_mongo([storage_dct], 'job_numbers', 'monster') # This loop will be used to keep clicking the next button after # scraping jobs on that page.
if __name__ == '__main__': # I expect that at the very least a job title and job location # will be passed in, so I'll attempt to get both of those within # a try except and throw an error otherwise. try: job_title = sys.argv[1] job_location = sys.argv[2] except IndexError: raise Exception('Program needs a job title and job location inputted!') # Issue the job query. base_URL = 'https://www.glassdoor.com/index.htm' query_params = (('KeywordSearch', job_title), ('LocationSearch', job_location)) driver = issue_driver_query(base_URL, query_params) # Find the text holding the number of jobs, and parse it. time.sleep(random.randint(7, 15)) num_jobs_txt = driver.find_elements_by_xpath('//header')[1].text num_jobs = int(parse_num(num_jobs_txt, 0)) current_date = str(datetime.datetime.now(pytz.timezone('US/Mountain'))) storage_dct = {'job_site': 'glassdoor', 'num_jobs': num_jobs, 'date': current_date, 'title': job_title, 'location': job_location} store_in_mongo([storage_dct], 'job_numbers', 'glassdoor') # Find the text holding the number of pages in the job search. time.sleep(random.randint(2, 6)) try: num_pages_txt = driver.find_element_by_id('ResultsFooter').text