Esempio n. 1
0
        job_title = sys.argv[1]
        job_location = sys.argv[2]
        radius = sys.argv[3]
    except IndexError:
        raise Exception(
            'Program needs a job title, job location, and radius inputted!')

    base_URL = 'http://jobs.monster.com/search/?'
    query_parameters = [
        'q={}'.format('-'.join(job_title.split())),
        '&where={}'.format('-'.join(job_location.split())), '&sort=dt.rv.di',
        '&rad={}'.format(radius)
    ]

    query_URL = format_query(base_URL, query_parameters)
    driver = issue_driver_query(query_URL)

    try:
        num_jobs_txt = get_num_jobs_txt(driver)
        num_jobs = int(parse_num(num_jobs_txt, 0))
    except:
        print('No jobs for search {} in {}'.format(job_title, job_location))
        sys.exit(0)

    current_date = str(datetime.datetime.now(pytz.timezone('US/Mountain')))
    storage_dct = {
        'job_site': 'monster',
        'num_jobs': num_jobs,
        'date': current_date,
        'title': job_title,
        'location': job_location
Esempio n. 2
0
        elif elem2_text:
            return int(elem2_text) == num_pages


if __name__ == '__main__':
    try:
        job_title = sys.argv[1]
        job_location = sys.argv[2]
    except IndexError:
        raise Exception('Program needs a job title and job location inputted!')

    # Issue the job query.
    base_URL = 'https://www.glassdoor.com/index.htm'
    query_params = (('KeywordSearch', job_title), ('LocationSearch',
                                                   job_location))
    driver = issue_driver_query(base_URL, query_params)

    # Find the text holding the number of jobs, and parse it.
    time.sleep(random.randint(7, 15))
    num_jobs_txt = driver.find_elements_by_xpath('//header')[1].text
    num_jobs = int(parse_num(num_jobs_txt, 0))

    current_date = str(datetime.datetime.now(pytz.timezone('US/Mountain')))
    storage_dct = {
        'job_site': 'glassdoor',
        'num_jobs': num_jobs,
        'date': current_date,
        'title': job_title,
        'location': job_location
    }
    store_in_mongo([storage_dct], 'job_numbers', 'glassdoor')
Esempio n. 3
0
    # will be passed in, so I'll attempt to get both of those within
    # a try except and throw an error otherwise. 
    try: 
        job_title = sys.argv[1]
        job_location = sys.argv[2]
        radius = sys.argv[3]
    except IndexError: 
        raise Exception('Program needs a job title, job location, and radius inputted!')

    base_URL = 'http://jobs.monster.com/search/?'
    query_parameters = ['q={}'.format('-'.join(job_title.split())), 
            '&where={}'.format('-'.join(job_location.split())), '&sort=dt.rv.di', 
            '&rad={}'.format(radius)]

    query_URL = format_query(base_URL, query_parameters)
    driver = issue_driver_query(query_URL)
    
    try: 
        num_jobs_txt = get_num_jobs_txt(driver)
        num_jobs = int(parse_num(num_jobs_txt, 0))
    except: 
        print 'No jobs for search {} in {}'.format(job_title, job_location)
        sys.exit(0)

    current_date = str(datetime.datetime.now(pytz.timezone('US/Mountain')))
    storage_dct = {'job_site': 'monster', 'num_jobs': num_jobs, 
            'date': current_date, 'title': job_title, 'location': job_location}
    store_in_mongo([storage_dct], 'job_numbers', 'monster')
    
    # This loop will be used to keep clicking the next button after
    # scraping jobs on that page. 
Esempio n. 4
0
if __name__ == '__main__':
    # I expect that at the very least a job title and job location
    # will be passed in, so I'll attempt to get both of those within
    # a try except and throw an error otherwise. 
    try: 
        job_title = sys.argv[1]
        job_location = sys.argv[2]
    except IndexError: 
        raise Exception('Program needs a job title and job location inputted!')
    
    # Issue the job query. 
    base_URL = 'https://www.glassdoor.com/index.htm'
    query_params = (('KeywordSearch', job_title), 
            ('LocationSearch', job_location))
    driver = issue_driver_query(base_URL, query_params)

    # Find the text holding the number of jobs, and parse it. 
    time.sleep(random.randint(7, 15))
    num_jobs_txt = driver.find_elements_by_xpath('//header')[1].text
    num_jobs = int(parse_num(num_jobs_txt, 0)) 

    current_date = str(datetime.datetime.now(pytz.timezone('US/Mountain')))
    storage_dct = {'job_site': 'glassdoor', 'num_jobs': num_jobs, 
            'date': current_date, 'title': job_title, 'location': job_location}
    store_in_mongo([storage_dct], 'job_numbers', 'glassdoor')

    # Find the text holding the number of pages in the job search. 
    time.sleep(random.randint(2, 6))
    try: 
        num_pages_txt = driver.find_element_by_id('ResultsFooter').text