Python get_javascript_soupの例、globals.get_javascript_soup Pythonの例

コード例 #1

0

ファイルを表示

ファイル: los-angeles-centers-for-alcohol-and-drug-abuse.py プロジェクト: nclairesays/jobs-for-hope

def run(url):
    soup = get_javascript_soup(url)
    jobs_list = soup.select('div.wpb_wrapper > p > a')
    for job_entry in jobs_list:
        globals.job_title = job_entry.text.strip()
        globals.info_link = job_entry['href']
        update_db(organization)

コード例 #2

0

ファイルを表示

def run(url):
    soup = get_javascript_soup(url)

    job_listings = soup.find_all('div', {'class': 'job-listing-job-item'})
    job_class = Job(organization, "")
    job_class.organization_id = organization_id
    insert_count = 0
    for job_listing in job_listings:
        job_class.title = job_listing.find('span', {
            'class': 'job-item-title'
        }).a.text.strip()
        job_class.info_link = 'https://recruiting.paylocity.com' + \
            job_listing.find('span', {'class': 'job-item-title'}).a['href']
        details = get_soup(job_class.info_link)
        location = details.find('div', {'class': 'preview-location'})
        if location.a:
            job_class.location = location.a.text
            zipcode = location.a['href'].split('+')[-1]
            try:
                job_class.zip_code = int(zipcode)
            except ValueError:
                # generate a zip code if one is not available
                job_class.zip_code = city_to_zip(job_class.location)
        else:
            job_class.location = ''
            job_class.zip_code = ''
        job_class.post_date = string_to_date(
            job_listing.find('div', {
                'class': 'job-title-column'
            }).find_all('span')[1].text.split(' - ')[0])
        insert_count += job_insert(job_class)
    return insert_count

コード例 #3

0

ファイルを表示

def run(url):
    soup = get_javascript_soup(url)
    jobs_list = soup.find(
        'table', {'class': 'srJobList'}).tbody.find_all('tr')[1:]
    job_class= Job(organization, "")
    job_class.organization_id= organization_id
    insert_count= 0
    for job_entry in jobs_list:
        job_class.title = job_entry.find(
            'td', {'class': 'srJobListJobTitle'}).text.strip()
        onClickLink = job_entry['onclick']
        job_class.info_link = onClickLink[13:len(onClickLink) - 3]
        job_class.full_or_part = job_entry.find(
            'td', {'class': 'srJobListTypeOfEmployment'}).text
        job_class.location = job_entry.find(
            'td', {'class': 'srJobListLocation'}).text
        location_parts = job_class.location.split(',')
        if len(location_parts) > 1 and len(
                location_parts[-1]) and location_parts[-1].strip().lower() != 'ca':
            # skip job if state is not CA
            print('Skip location: %s' % job_class.location)
            continue
        job_class.zip_code = city_to_zip(location_parts[0])
        insert_count+= job_insert(job_class)
    return insert_count

コード例 #4

0

ファイルを表示

ファイル: center-for-the-pacific-asian-family-inc.py プロジェクト: nclairesays/jobs-for-hope

def run(url):
    soup = get_javascript_soup(url)
    jobs_list = soup.select('div.entry-content div.small-12.columns > p > a')

    for job_entry in jobs_list:
        globals.job_title = job_entry.text
        globals.info_link = job_entry['href']
        update_db(organization)

コード例 #5

0

ファイルを表示

ファイル: los_angeles_centers_for_alcohol_and_drug_abuse.py プロジェクト: weisisheng/jobs-for-hope

def run(url):
    soup = get_javascript_soup(url)
    jobs_list = soup.select('div.wpb_wrapper > p > a')
    job_class = Job(organization, "")
    job_class.organization_id = organization_id
    insert_count = 0
    for job_entry in jobs_list:
        job_class.title = job_entry.text.strip()
        job_class.info_link = job_entry['href']
        insert_count += job_insert(job_class)
    return insert_count

コード例 #6

0

ファイルを表示

ファイル: center_for_the_pacific_asian_family_inc.py プロジェクト: weisisheng/jobs-for-hope

def run(url):
    soup = get_javascript_soup(url)
    jobs_list = soup.select('div.entry-content div.small-12.columns > p > a')
    job_class = Job(organization, "")
    job_class.organization_id = organization_id
    insert_count = 0
    for job_entry in jobs_list:
        job_class.title = job_entry.text
        job_class.info_link = job_entry['href']
        insert_count += job_insert(job_class)
    return insert_count

コード例 #7

0

ファイルを表示

ファイル: st-joseph-center.py プロジェクト: nclairesays/jobs-for-hope

def run(url):
    soup = get_javascript_soup(url)
    jobs_list = soup.find('table',{'class':'srJobList'}).tbody.find_all('tr')[1:]

    for job_entry in jobs_list:
        globals.job_title = job_entry.find('td',{'class':'srJobListJobTitle'}).text.strip()
        onClickLink = job_entry['onclick']
        globals.info_link = onClickLink[13:len(onClickLink)-3]
        globals.full_or_part = job_entry.find('td',{'class':'srJobListTypeOfEmployment'}).text
        globals.job_location = clean_location(job_entry.find('td',{'class':'srJobListLocation'}).text)
        globals.job_zip_code = city_to_zip(globals.job_location)
        update_db(organization)

コード例 #8

0

ファイルを表示

ファイル: the_whole_child.py プロジェクト: weisisheng/jobs-for-hope

def run(url):
    soup = get_javascript_soup(url)

    jobs_list = soup.find('h3', text='Job Opportunities').next_sibling
    job_class = Job(organization, "")
    job_class.organization_id = organization_id
    insert_count = 0
    for job_entry in jobs_list.find_all('li'):
        job_class.title = job_entry.text
        job_class.info_link = job_entry.a['href']
        insert_count += job_insert(job_class)
    return insert_count

コード例 #9

0

ファイルを表示

def run(url):
    soup = get_javascript_soup(url)

    current_openings = soup.findAll(attrs={"data-tn-element": "jobLink[]"})
    job_class = Job(organization, "")
    job_class.organization_id = organization_id
    insert_count = 0
    for current_opening in current_openings:

        detail_page_link = current_opening.find('a')['href']
        detail_page_soup = get_soup(detail_page_link)
        detail_page_desc = detail_page_soup.find(
            'div', {"data-tn-component": "jobDescription"})

        job_class.title = detail_page_desc.find('h1').text.strip()

        job_summary_parts = detail_page_desc.findAll(['p', 'li'])
        job_class.summary = ' '.join(
            map(lambda a: a.getText(), job_summary_parts[1:-1])).strip()

        job_class.location = detail_page_desc.find(
            'dt', string="Location").findNext().get_text()

        location_parts = job_class.location.split(',')
        if len(location_parts) > 1 and len(
                location_parts[-1]
        ) and location_parts[-1].strip().lower() != 'ca':
            # skip job if state is not CA
            print('Skip location: %s' % job_class.location)
            continue
        job_class.zip_code = city_to_zip(location_parts[0])

        posted_ago = job_summary_parts[-1].get_text().split(' ')
        length = posted_ago[1]
        if (length[-1:] == '+'):
            length = length[:1]
        length = int(length)
        unit = posted_ago[2]
        job_class.post_date = date_ago(length, unit)

        job_class.full_or_part = detail_page_desc.find(
            'dt', string="Job Type").findNext().get_text()

        salary_search = detail_page_desc.find('dt', string="Salary")
        if (salary_search is not None):
            job_class.salary = salary_search.findNext().get_text()

        job_class.info_link = detail_page_link

        insert_count += job_insert(job_class)
    return insert_count

コード例 #10

0

ファイルを表示

def run(url):
    soup = get_javascript_soup(url)
    job_class = Job(organization, "")
    job_class.organization_id = organization_id
    insert_count = 0
    scraping = True
    while scraping:
        jobs_list = soup.find_all('article', {'class': 'et_pb_post'})
        for job_entry in jobs_list:
            job_title = job_entry.find('h2', {'class': 'entry-title'})
            job_class.title = job_title.text
            job_class.info_link = job_title.a['href']
            job_class.summary = job_entry.find('div', {
                'class': 'post-content'
            }).p.text
            insert_count += job_insert(job_class)
        # Check if more job entries on website to scrape
        if soup.find(text="« Older Entries"):
            soup = get_javascript_soup(
                soup.find(text="« Older Entries").parent['href'])
        else:
            scraping = False
    return insert_count

コード例 #11

0

ファイルを表示

ファイル: a_community_of_friends.py プロジェクト: weisisheng/jobs-for-hope

def run(url):
    soup = get_javascript_soup(url)
    job_listings = soup.find_all('div', {'class': 'job-listing-job-item'})
    job_class = Job(organization, "")
    job_class.organization_id = organization_id
    insert_count = 0
    for job_listing in job_listings:
        job_description = job_listing.find_all('span')
        # Get job title and link
        job_class.title = job_description[0].a.text
        job_class.info_link = 'https://recruiting.paylocity.com' + \
            job_description[0].a['href']
        # Get date as string
        date = job_description[1].text
        # Clean up date string by removing trailing -'s, then split and convert
        # to datetime object
        if date[len(date) - 2] == '-':
            date = date[0:len(date) - 3]
        date = date.strip().split('/')
        month = int(date[0])
        day = int(date[1])
        year = int(date[2])
        job_class.post_date = datetime(year, month, day)
        # Get Location
        job_class.location = job_listing.find('div', {
            'class': 'location-column'
        }).span.text
        # Get soup of job listing to scrape more info
        listing_soup = get_soup(job_class.info_link)
        listing_body = listing_soup.find('body').find_all('p')
        # Retrieve Full/Part-time and Salary info if available
        if 'Location' in listing_body[0].text:
            location_string = listing_body[0].text.split(':')[1].lstrip()
            zip_code_result = re.search(r'(\d{5})', location_string)
            if zip_code_result is not None:
                job_class.zip_code = zip_code_result.group(1)
            # can't get city since there's no standard. It could be
            # "Hollywood", "Koreatown, Los angeles, California", or even
            # "Multiple Locations"
        if len(job_class.zip_code) == 0:
            job_class.zip_code = globals.city_to_zip(job_class.location)
        if 'Status' in listing_body[1].text:
            job_class.full_or_part = listing_body[1].text[8:]
        if 'Salary' in listing_body[2].text:
            job_class.salary = listing_body[2].text[14:]
        insert_count += job_insert(job_class)
    return insert_count

コード例 #12

0

ファイルを表示

def run(url):
    soup = get_javascript_soup(url)

    jobs_table = soup.find('table', {
        'id': 'careers_table'
    }).tbody.find_all('tr')
    job_class = Job(organization, "")
    job_class.organization_id = organization_id
    insert_count = 0
    for job_row in jobs_table:
        job_entry = job_row.find_all('td')
        job_class.title = job_entry[0].a.text
        job_class.info_link = 'https://theapplicantmanager.com/' + \
            job_entry[0].a['href']
        job_class.location = job_entry[1].text
        job_class.full_or_part = job_entry[3].text
        job_class.post_date = job_entry[4].text
        insert_count += job_insert(job_class)
    return insert_count

コード例 #13

0

ファイルを表示

ファイル: jewish_family_service_of_los_angeles.py プロジェクト: fyliu/jobs-for-hope

def run(url):
    soup = get_javascript_soup(url)

    jobs_list = soup.find(
        'table', {'id': 'cws-search-results'}).find_all('tr')[1:]
    job_class= Job(organization, "")
    job_class.organization_id= organization_id
    insert_count= 0
    for job_entry in jobs_list:
        row_cells = job_entry.find_all('td')
        job_class.title = row_cells[1].a.text.strip()
        job_class.info_link = row_cells[1].a['href']
        job_class.location = clean_location(row_cells[2].text)
        job_class.zip_code = city_to_zip(job_class.location)
        job_soup = get_soup(job_class.info_link)
        job_class.full_or_part = job_soup.find(
            text="Employment Duration:").parent.parent.b.text.strip()
        insert_count+= job_insert(job_class)
    return insert_count

コード例 #14

0

ファイルを表示

def run(url):
    soup = get_javascript_soup(url)

    job_divs = soup.find_all('div', {'class': 'ui-accordion-content'})
    job_class = Job(organization, "")
    job_class.organization_id = organization_id
    insert_count = 0
    for job_div in job_divs:
        for job_listing in job_div.find_all('li'):
            job_class.title = job_listing.text.strip()
            job_class.info_link = 'https://lalgbtcenter.org' + \
                job_listing.find_all('a')[-1]['href']
            insert_count += job_insert(job_class)

    job_lists = soup.find_all('ul', {'class': 'ui-accordion-content'})

    for job_list in job_lists:
        for job_listing in job_list.find_all('li'):
            job_class.title = job_listing.text.strip()
            job_class.info_link = 'https://lalgbtcenter.org' + \
                job_listing.find_all('a')[-1]['href']
            insert_count += job_insert(job_class)
    return insert_count

コード例 #15

0

ファイルを表示

def run(url):
    soup = get_javascript_soup(url)

    job_listings = soup.find_all('div', {'class': 'jobInfo'})
    job_class = Job(organization, "")
    job_class.organization_id = organization_id
    insert_count = 0
    for job_listing in job_listings:
        job_class.title = job_listing.find('span', {
            'class': 'jobTitle'
        }).a.text.strip()
        job_class.info_link = 'https://www.paycomonline.net' + \
            job_listing.find('span', {'class': 'jobTitle'}).a['href']
        if job_listing.find('span', {'class': 'jobLocation'}).text:
            job_class.location = globals.clean_location(
                job_listing.find('span', {
                    'class': 'jobLocation'
                }).text.split(' - ')[1])
            job_class.zip_code = globals.city_to_zip(job_class.location)
        if job_listing.find('span', {'class': 'jobDescription'}).text:
            job_class.summary = job_listing.find('span', {
                'class': 'jobDescription'
            }).text.strip()
        if job_listing.find('span', {'class': 'jobType'}).text:
            if ('ft' in str(
                    job_listing.find('span', {
                        'class': 'jobType'
                    }).text).lower()) or ('full' in str(
                        job_listing.find('span', {
                            'class': 'jobType'
                        }).text).lower()):
                job_class.full_or_part = 'full'
            else:
                job_class.full_or_part = 'part'
        insert_count += job_insert(job_class)
    return insert_count