Python parse_num Examples

Programming Language: Python

Namespace/Package Name: utils.parsing_utilities

Method/Function: parse_num

Examples at hotexamples.com: 4

Python parse_num - 4 examples found. These are the top rated real world Python examples of utils.parsing_utilities.parse_num extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def query_for_data(driver, json_dct, job, idx):
    """Grab all info. from the job posting
    
    This will include the job title, the job location, the 
    posting company, the date posted, and then any stars assigned. 
    After grabbing this information, click and get the job posting's
    actual text. 

    Args: 
        driver: Selenium webdriver
        json_dct: dict 
            Dictionary holding the current information that is being stored
            for that job posting. 
        job: Selenium WebElement
        idx: int
            Holds the # of the job posting the program is on (0 indexed here). 

    Return: dct
    """

    posting_title = job.find_element_by_class_name('title').text
    split_posting_company = job.find_element_by_class_name(
        'companyInfo').text.split()
    posting_location = job.find_element_by_xpath(
        "//div//span[@itemprop='jobLocation']").text
    try:
        posting_date = job.find_element_by_class_name('minor').text
    except:
        posting_date = ''

    # I couldn't think of any clearly better way to do this. If they have
    # a number of stars, it comes in the posting companies text. I guess
    # I could have done a search and replace, but I'd rather slightly adjust
    # some functionality I already have (i.e. parse_num) than build another
    # function to find the number of stars, store it, and then replace it with
    # empty text.
    if parse_num(' '.join(split_posting_company), 0):
        num_stars = split_posting_company[0]
        posting_company = ' '.join(split_posting_company[1:])
        out_json_dct = gen_output(json_dct.copy(), posting_title,
                                  posting_location, posting_date,
                                  posting_company, num_stars)
    else:
        posting_company = ' '.join(split_posting_company)
        out_json_dct = gen_output(json_dct.copy(), posting_title,
                                  posting_location, posting_date,
                                  posting_company)

    out_json_dct['posting_txt'] = grab_posting_txt(driver, job, idx)
    return out_json_dct

Example #2

Show file

        raise Exception(
            'Program needs a job title, job location, and radius inputted!')

    base_URL = 'http://www.simplyhired.com/search?'
    query_parameters = [
        'q={}'.format('+'.join(job_title.split())),
        '&l={}'.format('+'.join(job_location.split())),
        '&mi={}'.format(radius), '&fdb=5', '&clst=CTL'
    ]

    query_URL = format_query(base_URL, query_parameters)

    html = get_html(query_URL)
    try:
        num_jobs_txt = str(html.select('.result-headline')[0].text)
        num_jobs = int(parse_num(num_jobs_txt, 2))
    except:
        print('No jobs for search {} in {}'.format(job_title, job_location))
        sys.exit(0)

    current_date = str(datetime.datetime.now(pytz.timezone('US/Mountain')))
    storage_dct = {
        'job_site': 'simplyhired',
        'num_jobs': num_jobs,
        'date': current_date,
        'title': job_title,
        'location': job_location
    }
    store_in_mongo([storage_dct], 'job_numbers', 'simplyhired')

    # All of the jobs should be available through the '.js-job-link' CSS class.

Example #3

Show file

    try:
        job_title = sys.argv[1]
        job_location = sys.argv[2]
    except IndexError:
        raise Exception('Program needs a job title and job location inputted!')

    # Issue the job query.
    base_URL = 'https://www.glassdoor.com/index.htm'
    query_params = (('KeywordSearch', job_title), ('LocationSearch',
                                                   job_location))
    driver = issue_driver_query(base_URL, query_params)

    # Find the text holding the number of jobs, and parse it.
    time.sleep(random.randint(7, 15))
    num_jobs_txt = driver.find_elements_by_xpath('//header')[1].text
    num_jobs = int(parse_num(num_jobs_txt, 0))

    current_date = str(datetime.datetime.now(pytz.timezone('US/Mountain')))
    storage_dct = {
        'job_site': 'glassdoor',
        'num_jobs': num_jobs,
        'date': current_date,
        'title': job_title,
        'location': job_location
    }
    store_in_mongo([storage_dct], 'job_numbers', 'glassdoor')

    # Find the text holding the number of pages in the job search.
    time.sleep(random.randint(2, 6))
    try:
        num_pages_txt = driver.find_element_by_id('ResultsFooter').text

Example #4

Show file

File: job_scraper.py Project: xkortex/web-scrapers

    base_URL = 'http://jobs.monster.com/search/?'
    query_parameters = [
        'q={}'.format('-'.join(job_title.split())),
        '&where={}'.format('-'.join(job_location.split())), '&sort=dt.rv.di',
        '&rad={}'.format(radius)
    ]

    query_URL = format_query(base_URL, query_parameters)
    driver = issue_driver_query(query_URL, driver_path=driver_path)

    if verbose: print('<v> Successfully connected selenium')

    try:
        num_jobs = get_num_jobs_txt(driver)
        num_jobs = int(parse_num(num_jobs, 0))
        if verbose: print('<v> {} jobs found'.format(num_jobs))
    except:
        print('No jobs for search {} in {}'.format(job_title, job_location))
        sys.exit(0)

    assert 0, 'halt'

    current_date = str(datetime.datetime.now(pytz.timezone('US/Mountain')))
    storage_dct = {
        'job_site': 'monster',
        'num_jobs': num_jobs,
        'date': current_date,
        'title': job_title,
        'location': job_location
    }